Merge branch 'main' into lorenze/feat/grep-tool

2026-05-05 09:12:39 +00:00 · 2026-03-10 10:31:04 -07:00
parent 2d0e81c10d f070ce8abd
commit 8e336a476f
187 changed files with 22972 additions and 6467 deletions
--- a/lib/crewai-files/pyproject.toml
+++ b/lib/crewai-files/pyproject.toml
@@ -8,8 +8,8 @@ authors = [
 ]
 requires-python = ">=3.10, <3.14"
 dependencies = [
-    "Pillow~=10.4.0",
-    "pypdf~=4.0.0",
+    "Pillow~=12.1.1",
+    "pypdf~=6.7.5",
    "python-magic>=0.4.27",
    "aiocache~=0.12.3",
    "aiofiles~=24.1.0",
--- a/lib/crewai-files/src/crewai_files/init.py
+++ b/lib/crewai-files/src/crewai_files/init.py
@@ -152,4 +152,4 @@ __all__ = [
    "wrap_file_source",
 ]

-__version__ = "1.9.3"
+__version__ = "1.10.1"
--- a/lib/crewai-tools/pyproject.toml
+++ b/lib/crewai-tools/pyproject.toml
@@ -8,12 +8,10 @@ authors = [
 ]
 requires-python = ">=3.10, <3.14"
 dependencies = [
-    "lancedb~=0.5.4",
    "pytube~=15.0.0",
    "requests~=2.32.5",
    "docker~=7.1.0",
-    "crewai==1.9.3",
-    "lancedb~=0.5.4",
+    "crewai==1.10.1",
    "tiktoken~=0.8.0",
    "beautifulsoup4~=4.13.4",
    "python-docx~=1.2.0",
@@ -110,7 +108,7 @@ stagehand = [
    "stagehand>=0.4.1",
 ]
 github = [
-    "gitpython==3.1.38",
+    "gitpython>=3.1.41,<4",
    "PyGithub==1.59.1",
 ]
 rag = [
--- a/lib/crewai-tools/src/crewai_tools/init.py
+++ b/lib/crewai-tools/src/crewai_tools/init.py
@@ -10,7 +10,18 @@ from crewai_tools.aws.s3.writer_tool import S3WriterTool
 from crewai_tools.tools.ai_mind_tool.ai_mind_tool import AIMindTool
 from crewai_tools.tools.apify_actors_tool.apify_actors_tool import ApifyActorsTool
 from crewai_tools.tools.arxiv_paper_tool.arxiv_paper_tool import ArxivPaperTool
+from crewai_tools.tools.brave_search_tool.brave_image_tool import BraveImageSearchTool
+from crewai_tools.tools.brave_search_tool.brave_llm_context_tool import (
+    BraveLLMContextTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_local_pois_tool import (
+    BraveLocalPOIsDescriptionTool,
+    BraveLocalPOIsTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_news_tool import BraveNewsSearchTool
 from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
+from crewai_tools.tools.brave_search_tool.brave_video_tool import BraveVideoSearchTool
+from crewai_tools.tools.brave_search_tool.brave_web_tool import BraveWebSearchTool
 from crewai_tools.tools.brightdata_tool.brightdata_dataset import (
    BrightDataDatasetTool,
 )
@@ -201,7 +212,14 @@ __all__ = [
    "ArxivPaperTool",
    "BedrockInvokeAgentTool",
    "BedrockKBRetrieverTool",
+    "BraveImageSearchTool",
+    "BraveLLMContextTool",
+    "BraveLocalPOIsDescriptionTool",
+    "BraveLocalPOIsTool",
+    "BraveNewsSearchTool",
    "BraveSearchTool",
+    "BraveVideoSearchTool",
+    "BraveWebSearchTool",
    "BrightDataDatasetTool",
    "BrightDataSearchTool",
    "BrightDataWebUnlockerTool",
@@ -293,4 +311,4 @@ __all__ = [
    "ZapierActionTools",
 ]

-__version__ = "1.9.3"
+__version__ = "1.10.1"
--- a/lib/crewai-tools/src/crewai_tools/tools/init.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/init.py
@@ -1,7 +1,18 @@
 from crewai_tools.tools.ai_mind_tool.ai_mind_tool import AIMindTool
 from crewai_tools.tools.apify_actors_tool.apify_actors_tool import ApifyActorsTool
 from crewai_tools.tools.arxiv_paper_tool.arxiv_paper_tool import ArxivPaperTool
+from crewai_tools.tools.brave_search_tool.brave_image_tool import BraveImageSearchTool
+from crewai_tools.tools.brave_search_tool.brave_llm_context_tool import (
+    BraveLLMContextTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_local_pois_tool import (
+    BraveLocalPOIsDescriptionTool,
+    BraveLocalPOIsTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_news_tool import BraveNewsSearchTool
 from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
+from crewai_tools.tools.brave_search_tool.brave_video_tool import BraveVideoSearchTool
+from crewai_tools.tools.brave_search_tool.brave_web_tool import BraveWebSearchTool
 from crewai_tools.tools.brightdata_tool import (
    BrightDataDatasetTool,
    BrightDataSearchTool,
@@ -186,7 +197,14 @@ __all__ = [
    "AIMindTool",
    "ApifyActorsTool",
    "ArxivPaperTool",
+    "BraveImageSearchTool",
+    "BraveLLMContextTool",
+    "BraveLocalPOIsDescriptionTool",
+    "BraveLocalPOIsTool",
+    "BraveNewsSearchTool",
    "BraveSearchTool",
+    "BraveVideoSearchTool",
+    "BraveWebSearchTool",
    "BrightDataDatasetTool",
    "BrightDataSearchTool",
    "BrightDataWebUnlockerTool",
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/base.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/base.py
@@ -0,0 +1,322 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from datetime import datetime
+import json
+import logging
+import os
+import threading
+import time
+from typing import Any, ClassVar
+
+from crewai.tools import BaseTool, EnvVar
+from pydantic import BaseModel, Field
+import requests
+
+
+logger = logging.getLogger(__name__)
+
+# Brave API error codes that indicate non-retryable quota/usage exhaustion.
+_QUOTA_CODES = frozenset({"QUOTA_LIMITED", "USAGE_LIMIT_EXCEEDED"})
+
+
+def _save_results_to_file(content: str) -> None:
+    """Saves the search results to a file."""
+    filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
+    with open(filename, "w") as file:
+        file.write(content)
+
+
+def _parse_error_body(resp: requests.Response) -> dict[str, Any] | None:
+    """Extract the structured "error" object from a Brave API error response."""
+    try:
+        body = resp.json()
+        error = body.get("error")
+        return error if isinstance(error, dict) else None
+    except (ValueError, KeyError):
+        return None
+
+
+def _raise_for_error(resp: requests.Response) -> None:
+    """Brave Search API error responses contain helpful JSON payloads"""
+    status = resp.status_code
+    try:
+        body = json.dumps(resp.json())
+    except (ValueError, KeyError):
+        body = resp.text[:500]
+
+    raise RuntimeError(f"Brave Search API error (HTTP {status}): {body}")
+
+
+def _is_retryable(resp: requests.Response) -> bool:
+    """Return True for transient failures that are worth retrying.
+
+    * 429 + RATE_LIMITED — the per-second sliding window is full.
+    * 5xx — transient server-side errors.
+
+    Quota exhaustion (QUOTA_LIMITED, USAGE_LIMIT_EXCEEDED) is
+    explicitly excluded: retrying will never succeed until the billing
+    period resets.
+    """
+    if resp.status_code == 429:
+        error = _parse_error_body(resp) or {}
+        return error.get("code") not in _QUOTA_CODES
+    return 500 <= resp.status_code < 600
+
+
+def _retry_delay(resp: requests.Response, attempt: int) -> float:
+    """Compute wait time before the next retry attempt.
+
+    Prefers the server-supplied Retry-After header when available;
+    falls back to exponential backoff (1s, 2s, 4s, ...).
+    """
+    retry_after = resp.headers.get("Retry-After")
+    if retry_after is not None:
+        try:
+            return max(0.0, float(retry_after))
+        except (ValueError, TypeError):
+            pass
+    return float(2**attempt)
+
+
+class BraveSearchToolBase(BaseTool, ABC):
+    """
+    Base class for Brave Search API interactions.
+
+    Individual tool subclasses must provide the following:
+      - search_url
+      - header_schema (pydantic model)
+      - args_schema (pydantic model)
+      - _refine_payload() -> dict[str, Any]
+    """
+
+    search_url: str
+    raw: bool = False
+    args_schema: type[BaseModel]
+    header_schema: type[BaseModel]
+
+    # Tool options (legacy parameters)
+    country: str | None = None
+    save_file: bool = False
+    n_results: int = 10
+
+    env_vars: list[EnvVar] = Field(
+        default_factory=lambda: [
+            EnvVar(
+                name="BRAVE_API_KEY",
+                description="API key for Brave Search",
+                required=True,
+            ),
+        ]
+    )
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        headers: dict[str, Any] | None = None,
+        requests_per_second: float = 1.0,
+        save_file: bool = False,
+        raw: bool = False,
+        timeout: int = 30,
+        **kwargs: Any,
+    ):
+        super().__init__(**kwargs)
+
+        self._api_key = api_key or os.environ.get("BRAVE_API_KEY")
+        if not self._api_key:
+            raise ValueError("BRAVE_API_KEY environment variable is required")
+
+        self.raw = bool(raw)
+        self._timeout = int(timeout)
+        self.save_file = bool(save_file)
+        self._requests_per_second = float(requests_per_second)
+        self._headers = self._build_and_validate_headers(headers or {})
+        # Per-instance rate limiting: each instance has its own clock and lock.
+        # Total process rate is the sum of limits of instances you create.
+        self._last_request_time: float = 0
+        self._rate_limit_lock = threading.Lock()
+
+    @property
+    def api_key(self) -> str:
+        return self._api_key
+
+    @property
+    def headers(self) -> dict[str, Any]:
+        return self._headers
+
+    def set_headers(self, headers: dict[str, Any]) -> BraveSearchToolBase:
+        merged = {**self._headers, **{k.lower(): v for k, v in headers.items()}}
+        self._headers = self._build_and_validate_headers(merged)
+        return self
+
+    def _build_and_validate_headers(self, headers: dict[str, Any]) -> dict[str, Any]:
+        normalized = {k.lower(): v for k, v in headers.items()}
+        normalized.setdefault("x-subscription-token", self._api_key)
+        normalized.setdefault("accept", "application/json")
+
+        try:
+            self.header_schema(**normalized)
+        except Exception as e:
+            raise ValueError(f"Invalid headers: {e}") from e
+
+        return normalized
+
+    def _rate_limit(self) -> None:
+        """Enforce minimum interval between requests for this instance. Thread-safe."""
+        if self._requests_per_second <= 0:
+            return
+
+        min_interval = 1.0 / self._requests_per_second
+        with self._rate_limit_lock:
+            now = time.time()
+            next_allowed = self._last_request_time + min_interval
+            if now < next_allowed:
+                time.sleep(next_allowed - now)
+                now = time.time()
+            self._last_request_time = now
+
+    def _make_request(
+        self, params: dict[str, Any], *, _max_retries: int = 3
+    ) -> dict[str, Any]:
+        """Execute an HTTP GET against the Brave Search API with retry logic."""
+        last_resp: requests.Response | None = None
+
+        # Retry the request up to _max_retries times
+        for attempt in range(_max_retries):
+            self._rate_limit()
+
+            # Make the request
+            try:
+                resp = requests.get(
+                    self.search_url,
+                    headers=self._headers,
+                    params=params,
+                    timeout=self._timeout,
+                )
+            except requests.ConnectionError as exc:
+                raise RuntimeError(
+                    f"Brave Search API connection failed: {exc}"
+                ) from exc
+            except requests.Timeout as exc:
+                raise RuntimeError(
+                    f"Brave Search API request timed out after {self._timeout}s: {exc}"
+                ) from exc
+
+            # Log the rate limit headers and request details
+            logger.debug(
+                "Brave Search API request: %s %s -> %d",
+                "GET",
+                resp.url,
+                resp.status_code,
+            )
+
+            # Response was OK, return the JSON body
+            if resp.ok:
+                try:
+                    return resp.json()
+                except ValueError as exc:
+                    raise RuntimeError(
+                        f"Brave Search API returned invalid JSON (HTTP {resp.status_code}): {exc}"
+                    ) from exc
+
+            # Response was not OK, but is retryable
+            # (e.g., 429 Too Many Requests, 500 Internal Server Error)
+            if _is_retryable(resp) and attempt < _max_retries - 1:
+                delay = _retry_delay(resp, attempt)
+                logger.warning(
+                    "Brave Search API returned %d. Retrying in %.1fs (attempt %d/%d)",
+                    resp.status_code,
+                    delay,
+                    attempt + 1,
+                    _max_retries,
+                )
+                time.sleep(delay)
+                last_resp = resp
+                continue
+
+            # Response was not OK, nor was it retryable
+            # (e.g., 422 Unprocessable Entity, 400 Bad Request (OPTION_NOT_IN_PLAN))
+            _raise_for_error(resp)
+
+        # All retries exhausted
+        _raise_for_error(last_resp or resp)  # type: ignore[possibly-undefined]
+        return {}  # unreachable (here to satisfy the type checker and linter)
+
+    def _run(self, q: str | None = None, **params: Any) -> Any:
+        # Allow positional usage: tool.run("latest Brave browser features")
+        if q is not None:
+            params["q"] = q
+
+        params = self._common_payload_refinement(params)
+
+        # Validate only schema fields
+        schema_keys = self.args_schema.model_fields
+        payload_in = {k: v for k, v in params.items() if k in schema_keys}
+
+        try:
+            validated = self.args_schema(**payload_in)
+        except Exception as e:
+            raise ValueError(f"Invalid parameters: {e}") from e
+
+        # The subclass may have additional refinements to apply to the payload, such as goggles or other parameters
+        payload = self._refine_request_payload(validated.model_dump(exclude_none=True))
+        response = self._make_request(payload)
+
+        if not self.raw:
+            response = self._refine_response(response)
+
+        if self.save_file:
+            _save_results_to_file(json.dumps(response, indent=2))
+
+        return response
+
+    @abstractmethod
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        """Subclass must implement: transform validated params dict into API request params."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def _refine_response(self, response: dict[str, Any]) -> Any:
+        """Subclass must implement: transform response dict into a more useful format."""
+        raise NotImplementedError
+
+    _EMPTY_VALUES: ClassVar[tuple[None, str, str, list[Any]]] = (None, "", "null", [])
+
+    def _common_payload_refinement(self, params: dict[str, Any]) -> dict[str, Any]:
+        """Common payload refinement for all tools."""
+        # crewAI's schema pipeline (ensure_all_properties_required in
+        # pydantic_schema_utils.py) marks every property as required so
+        # that OpenAI strict-mode structured outputs work correctly.
+        # The side-effect is that the LLM fills in *every* parameter —
+        # even truly optional ones — using placeholder values such as
+        # None, "", "null", or [].  Only optional fields are affected,
+        # so we limit the check to those.
+        fields = self.args_schema.model_fields
+        params = {
+            k: v
+            for k, v in params.items()
+            # Permit custom and required fields, and fields with non-empty values
+            if k not in fields or fields[k].is_required() or v not in self._EMPTY_VALUES
+        }
+
+        # Make sure params has "q" for query instead of "query" or "search_query"
+        query = params.get("query") or params.get("search_query")
+        if query is not None and "q" not in params:
+            params["q"] = query
+        params.pop("query", None)
+        params.pop("search_query", None)
+
+        # If "count" was not explicitly provided, use n_results
+        # (only when the schema actually supports a "count" field)
+        if "count" in self.args_schema.model_fields:
+            if "count" not in params and self.n_results is not None:
+                params["count"] = self.n_results
+
+        # If "country" was not explicitly provided, but self.country is set, use it
+        # (only when the schema actually supports a "country" field)
+        if "country" in self.args_schema.model_fields:
+            if "country" not in params and self.country is not None:
+                params["country"] = self.country
+
+        return params
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_image_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_image_tool.py
@@ -0,0 +1,42 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.schemas import (
+    ImageSearchHeaders,
+    ImageSearchParams,
+)
+
+
+class BraveImageSearchTool(BraveSearchToolBase):
+    """A tool that performs image searches using the Brave Search API."""
+
+    name: str = "Brave Image Search"
+    args_schema: type[BaseModel] = ImageSearchParams
+    header_schema: type[BaseModel] = ImageSearchHeaders
+
+    description: str = (
+        "A tool that performs image searches using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/images/search"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: dict[str, Any]) -> list[dict[str, Any]]:
+        # Make the response more concise, and easier to consume
+        results = response.get("results", [])
+        return [
+            {
+                "title": result.get("title"),
+                "url": result.get("properties", {}).get("url"),
+                "dimensions": f"{w}x{h}"
+                if (w := result.get("properties", {}).get("width"))
+                and (h := result.get("properties", {}).get("height"))
+                else None,
+            }
+            for result in results
+        ]
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_llm_context_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_llm_context_tool.py
@@ -0,0 +1,32 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.response_types import LLMContext
+from crewai_tools.tools.brave_search_tool.schemas import (
+    LLMContextHeaders,
+    LLMContextParams,
+)
+
+
+class BraveLLMContextTool(BraveSearchToolBase):
+    """A tool that retrieves context for LLM usage from the Brave Search API."""
+
+    name: str = "Brave LLM Context"
+    args_schema: type[BaseModel] = LLMContextParams
+    header_schema: type[BaseModel] = LLMContextHeaders
+
+    description: str = (
+        "A tool that retrieves context for LLM usage from the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/llm/context"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: LLMContext.Response) -> LLMContext.Response:
+        """The LLM Context response schema is fairly simple. Return as is."""
+        return response
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_local_pois_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_local_pois_tool.py
@@ -0,0 +1,109 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.response_types import LocalPOIs
+from crewai_tools.tools.brave_search_tool.schemas import (
+    LocalPOIsDescriptionHeaders,
+    LocalPOIsDescriptionParams,
+    LocalPOIsHeaders,
+    LocalPOIsParams,
+)
+
+
+DayOpeningHours = LocalPOIs.DayOpeningHours
+OpeningHours = LocalPOIs.OpeningHours
+LocationResult = LocalPOIs.LocationResult
+LocalPOIsResponse = LocalPOIs.Response
+
+
+def _flatten_slots(slots: list[DayOpeningHours]) -> list[dict[str, str]]:
+    """Convert a list of DayOpeningHours dicts into simplified entries."""
+    return [
+        {
+            "day": slot["full_name"].lower(),
+            "opens": slot["opens"],
+            "closes": slot["closes"],
+        }
+        for slot in slots
+    ]
+
+
+def _simplify_opening_hours(result: LocationResult) -> list[dict[str, str]] | None:
+    """Collapse opening_hours into a flat list of {day, opens, closes} dicts."""
+    hours = result.get("opening_hours")
+    if not hours:
+        return None
+
+    entries: list[dict[str, str]] = []
+
+    current = hours.get("current_day")
+    if current:
+        entries.extend(_flatten_slots(current))
+
+    days = hours.get("days")
+    if days:
+        for day_slots in days:
+            entries.extend(_flatten_slots(day_slots))
+
+    return entries or None
+
+
+class BraveLocalPOIsTool(BraveSearchToolBase):
+    """A tool that retrieves local POIs using the Brave Search API."""
+
+    name: str = "Brave Local POIs"
+    args_schema: type[BaseModel] = LocalPOIsParams
+    header_schema: type[BaseModel] = LocalPOIsHeaders
+    description: str = (
+        "A tool that retrieves local POIs using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+    search_url: str = "https://api.search.brave.com/res/v1/local/pois"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: LocalPOIsResponse) -> list[dict[str, Any]]:
+        results = response.get("results", [])
+        return [
+            {
+                "title": result.get("title"),
+                "url": result.get("url"),
+                "description": result.get("description"),
+                "address": result.get("postal_address", {}).get("displayAddress"),
+                "contact": result.get("contact", {}).get("telephone")
+                or result.get("contact", {}).get("email")
+                or None,
+                "opening_hours": _simplify_opening_hours(result),
+            }
+            for result in results
+        ]
+
+
+class BraveLocalPOIsDescriptionTool(BraveSearchToolBase):
+    """A tool that retrieves AI-generated descriptions for local POIs using the Brave Search API."""
+
+    name: str = "Brave Local POI Descriptions"
+    args_schema: type[BaseModel] = LocalPOIsDescriptionParams
+    header_schema: type[BaseModel] = LocalPOIsDescriptionHeaders
+    description: str = (
+        "A tool that retrieves AI-generated descriptions for local POIs using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+    search_url: str = "https://api.search.brave.com/res/v1/local/descriptions"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: LocalPOIsResponse) -> list[dict[str, Any]]:
+        # Make the response more concise, and easier to consume
+        results = response.get("results", [])
+        return [
+            {
+                "id": result.get("id"),
+                "description": result.get("description"),
+            }
+            for result in results
+        ]
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_news_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_news_tool.py
@@ -0,0 +1,39 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.schemas import (
+    NewsSearchHeaders,
+    NewsSearchParams,
+)
+
+
+class BraveNewsSearchTool(BraveSearchToolBase):
+    """A tool that performs news searches using the Brave Search API."""
+
+    name: str = "Brave News Search"
+    args_schema: type[BaseModel] = NewsSearchParams
+    header_schema: type[BaseModel] = NewsSearchHeaders
+
+    description: str = (
+        "A tool that performs news searches using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/news/search"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: dict[str, Any]) -> list[dict[str, Any]]:
+        # Make the response more concise, and easier to consume
+        results = response.get("results", [])
+        return [
+            {
+                "url": result.get("url"),
+                "title": result.get("title"),
+                "description": result.get("description"),
+            }
+            for result in results
+        ]
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py
@@ -10,16 +10,13 @@ from pydantic import BaseModel, Field
 from pydantic.types import StringConstraints
 import requests

+from crewai_tools.tools.brave_search_tool.schemas import WebSearchParams
+from crewai_tools.tools.brave_search_tool.base import _save_results_to_file
+
+
 load_dotenv()


-def _save_results_to_file(content: str) -> None:
-    """Saves the search results to a file."""
-    filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
-    with open(filename, "w") as file:
-        file.write(content)
-
-
 FreshnessPreset = Literal["pd", "pw", "pm", "py"]
 FreshnessRange = Annotated[
    str, StringConstraints(pattern=r"^\d{4}-\d{2}-\d{2}to\d{4}-\d{2}-\d{2}$")
@@ -28,51 +25,6 @@ Freshness = FreshnessPreset | FreshnessRange
 SafeSearch = Literal["off", "moderate", "strict"]


-class BraveSearchToolSchema(BaseModel):
-    """Input for BraveSearchTool"""
-
-    query: str = Field(..., description="Search query to perform")
-    country: str | None = Field(
-        default=None,
-        description="Country code for geo-targeting (e.g., 'US', 'BR').",
-    )
-    search_language: str | None = Field(
-        default=None,
-        description="Language code for the search results (e.g., 'en', 'es').",
-    )
-    count: int | None = Field(
-        default=None,
-        description="The maximum number of results to return. Actual number may be less.",
-    )
-    offset: int | None = Field(
-        default=None, description="Skip the first N result sets/pages. Max is 9."
-    )
-    safesearch: SafeSearch | None = Field(
-        default=None,
-        description="Filter out explicit content. Options: off/moderate/strict",
-    )
-    spellcheck: bool | None = Field(
-        default=None,
-        description="Attempt to correct spelling errors in the search query.",
-    )
-    freshness: Freshness | None = Field(
-        default=None,
-        description="Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
-    )
-    text_decorations: bool | None = Field(
-        default=None,
-        description="Include markup to highlight search terms in the results.",
-    )
-    extra_snippets: bool | None = Field(
-        default=None,
-        description="Include up to 5 text snippets for each page if possible.",
-    )
-    operators: bool | None = Field(
-        default=None,
-        description="Whether to apply search operators (e.g., site:example.com).",
-    )
-
-
 # TODO: Extend support to additional endpoints (e.g., /images, /news, etc.)
 class BraveSearchTool(BaseTool):
    """A tool that performs web searches using the Brave Search API."""
@@ -82,7 +34,7 @@ class BraveSearchTool(BaseTool):
        "A tool that performs web searches using the Brave Search API. "
        "Results are returned as structured JSON data."
    )
-    args_schema: type[BaseModel] = BraveSearchToolSchema
+    args_schema: type[BaseModel] = WebSearchParams
    search_url: str = "https://api.search.brave.com/res/v1/web/search"
    n_results: int = 10
    save_file: bool = False
@@ -119,8 +71,8 @@ class BraveSearchTool(BaseTool):

        # Construct and send the request
        try:
-            # Maintain both "search_query" and "query" for backwards compatibility
-            query = kwargs.get("search_query") or kwargs.get("query")
+            # Fallback to "query" or "search_query" for backwards compatibility
+            query = kwargs.get("q") or kwargs.get("query") or kwargs.get("search_query")
            if not query:
                raise ValueError("Query is required")

@@ -129,8 +81,11 @@ class BraveSearchTool(BaseTool):
            if country := kwargs.get("country"):
                payload["country"] = country

-            if search_language := kwargs.get("search_language"):
-                payload["search_language"] = search_language
+            # Fallback to "search_language" for backwards compatibility
+            if search_lang := kwargs.get("search_lang") or kwargs.get(
+                "search_language"
+            ):
+                payload["search_lang"] = search_lang

            # Fallback to deprecated n_results parameter if no count is provided
            count = kwargs.get("count")
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_video_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_video_tool.py
@@ -0,0 +1,39 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.schemas import (
+    VideoSearchHeaders,
+    VideoSearchParams,
+)
+
+
+class BraveVideoSearchTool(BraveSearchToolBase):
+    """A tool that performs video searches using the Brave Search API."""
+
+    name: str = "Brave Video Search"
+    args_schema: type[BaseModel] = VideoSearchParams
+    header_schema: type[BaseModel] = VideoSearchHeaders
+
+    description: str = (
+        "A tool that performs video searches using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/videos/search"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: dict[str, Any]) -> list[dict[str, Any]]:
+        # Make the response more concise, and easier to consume
+        results = response.get("results", [])
+        return [
+            {
+                "url": result.get("url"),
+                "title": result.get("title"),
+                "description": result.get("description"),
+            }
+            for result in results
+        ]
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_web_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_web_tool.py
@@ -0,0 +1,45 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.schemas import (
+    WebSearchHeaders,
+    WebSearchParams,
+)
+
+
+class BraveWebSearchTool(BraveSearchToolBase):
+    """A tool that performs web searches using the Brave Search API."""
+
+    name: str = "Brave Web Search"
+    args_schema: type[BaseModel] = WebSearchParams
+    header_schema: type[BaseModel] = WebSearchHeaders
+
+    description: str = (
+        "A tool that performs web searches using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/web/search"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: dict[str, Any]) -> list[dict[str, Any]]:
+        results = response.get("web", {}).get("results", [])
+        refined = []
+        for result in results:
+            snippets = result.get("extra_snippets") or []
+            if not snippets:
+                desc = result.get("description")
+                if desc:
+                    snippets = [desc]
+            refined.append(
+                {
+                    "url": result.get("url"),
+                    "title": result.get("title"),
+                    "snippets": snippets,
+                }
+            )
+        return refined
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/response_types.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/response_types.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+from typing import Literal, TypedDict
+
+
+class LocalPOIs:
+    class PostalAddress(TypedDict, total=False):
+        type: Literal["PostalAddress"]
+        country: str
+        postalCode: str
+        streetAddress: str
+        addressRegion: str
+        addressLocality: str
+        displayAddress: str
+
+    class DayOpeningHours(TypedDict):
+        abbr_name: str
+        full_name: str
+        opens: str
+        closes: str
+
+    class OpeningHours(TypedDict, total=False):
+        current_day: list[LocalPOIs.DayOpeningHours]
+        days: list[list[LocalPOIs.DayOpeningHours]]
+
+    class LocationResult(TypedDict, total=False):
+        provider_url: str
+        title: str
+        url: str
+        id: str | None
+        opening_hours: LocalPOIs.OpeningHours | None
+        postal_address: LocalPOIs.PostalAddress | None
+
+    class Response(TypedDict, total=False):
+        type: Literal["local_pois"]
+        results: list[LocalPOIs.LocationResult]
+
+
+class LLMContext:
+    class LLMContextItem(TypedDict, total=False):
+        snippets: list[str]
+        title: str
+        url: str
+
+    class LLMContextMapItem(TypedDict, total=False):
+        name: str
+        snippets: list[str]
+        title: str
+        url: str
+
+    class LLMContextPOIItem(TypedDict, total=False):
+        name: str
+        snippets: list[str]
+        title: str
+        url: str
+
+    class Grounding(TypedDict, total=False):
+        generic: list[LLMContext.LLMContextItem]
+        poi: LLMContext.LLMContextPOIItem
+        map: list[LLMContext.LLMContextMapItem]
+
+    class Sources(TypedDict, total=False):
+        pass
+
+    class Response(TypedDict, total=False):
+        grounding: LLMContext.Grounding
+        sources: LLMContext.Sources
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/schemas.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/schemas.py
@@ -0,0 +1,525 @@
+from typing import Annotated, Literal
+
+from pydantic import BaseModel, Field
+from pydantic.types import StringConstraints
+
+
+# Common types
+Units = Literal["metric", "imperial"]
+SafeSearch = Literal["off", "moderate", "strict"]
+Freshness = (
+    Literal["pd", "pw", "pm", "py"]
+    | Annotated[
+        str, StringConstraints(pattern=r"^\d{4}-\d{2}-\d{2}to\d{4}-\d{2}-\d{2}$")
+    ]
+)
+ResultFilter = list[
+    Literal[
+        "discussions",
+        "faq",
+        "infobox",
+        "news",
+        "query",
+        "summarizer",
+        "videos",
+        "web",
+        "locations",
+    ]
+]
+
+
+class LLMContextParams(BaseModel):
+    """Parameters for Brave LLM Context endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return. Actual number may be less.",
+        ge=1,
+        le=50,
+    )
+    maximum_number_of_urls: int | None = Field(
+        default=None,
+        description="The maximum number of URLs to include in the context.",
+        ge=1,
+        le=50,
+    )
+    maximum_number_of_tokens: int | None = Field(
+        default=None,
+        description="The approximate maximum number of tokens to include in the context.",
+        ge=1,
+        le=32768,
+    )
+    maximum_number_of_snippets: int | None = Field(
+        default=None,
+        description="The maximum number of different snippets to include in the context.",
+        ge=1,
+        le=100,
+    )
+    context_threshold_mode: (
+        Literal["disabled", "strict", "lenient", "balanced"] | None
+    ) = Field(
+        default=None,
+        description="The mode to use for the context thresholding.",
+    )
+    maximum_number_of_tokens_per_url: int | None = Field(
+        default=None,
+        description="The maximum number of tokens to include for each URL in the context.",
+        ge=1,
+        le=8192,
+    )
+    maximum_number_of_snippets_per_url: int | None = Field(
+        default=None,
+        description="The maximum number of snippets to include per URL.",
+        ge=1,
+        le=100,
+    )
+    goggles: str | list[str] | None = Field(
+        default=None,
+        description="Goggles act as a custom re-ranking mechanism. Goggle source or URLs.",
+    )
+    enable_local: bool | None = Field(
+        default=None,
+        description="Whether to enable local recall. Not setting this value means auto-detect and uses local recall if any of the localization headers are provided.",
+    )
+
+
+class WebSearchParams(BaseModel):
+    """Parameters for Brave Web Search endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    ui_lang: str | None = Field(
+        default=None,
+        description="Language code for the user interface (e.g., 'en-US', 'es-AR').",
+        pattern=r"^[a-z]{2}-[A-Z]{2}$",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return. Actual number may be less.",
+        ge=1,
+        le=20,
+    )
+    offset: int | None = Field(
+        default=None,
+        description="Skip the first N result sets/pages. Max is 9.",
+        ge=0,
+        le=9,
+    )
+    safesearch: Literal["off", "moderate", "strict"] | None = Field(
+        default=None,
+        description="Filter out explicit content. Options: off/moderate/strict",
+    )
+    spellcheck: bool | None = Field(
+        default=None,
+        description="Attempt to correct spelling errors in the search query.",
+    )
+    freshness: Freshness | None = Field(
+        default=None,
+        description="Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
+    )
+    text_decorations: bool | None = Field(
+        default=None,
+        description="Include markup to highlight search terms in the results.",
+    )
+    extra_snippets: bool | None = Field(
+        default=None,
+        description="Include up to 5 text snippets for each page if possible.",
+    )
+    result_filter: ResultFilter | None = Field(
+        default=None,
+        description="Filter the results by type. Options: discussions/faq/infobox/news/query/summarizer/videos/web/locations. Note: The `count` parameter is applied only to the `web` results.",
+    )
+    units: Units | None = Field(
+        default=None,
+        description="The units to use for the results. Options: metric/imperial",
+    )
+    goggles: str | list[str] | None = Field(
+        default=None,
+        description="Goggles act as a custom re-ranking mechanism. Goggle source or URLs.",
+    )
+    summary: bool | None = Field(
+        default=None,
+        description="Whether to generate a summarizer ID for the results.",
+    )
+    enable_rich_callback: bool | None = Field(
+        default=None,
+        description="Whether to enable rich callbacks for the results. Requires Pro level subscription.",
+    )
+    include_fetch_metadata: bool | None = Field(
+        default=None,
+        description="Whether to include fetch metadata (e.g., last fetch time) in the results.",
+    )
+    operators: bool | None = Field(
+        default=None,
+        description="Whether to apply search operators (e.g., site:example.com).",
+    )
+
+
+class LocalPOIsParams(BaseModel):
+    """Parameters for Brave Local POIs endpoint."""
+
+    ids: list[str] = Field(
+        description="List of POI IDs to retrieve. Maximum of 20. IDs are valid for 8 hours.",
+        min_length=1,
+        max_length=20,
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    ui_lang: str | None = Field(
+        default=None,
+        description="Language code for the user interface (e.g., 'en-US', 'es-AR').",
+        pattern=r"^[a-z]{2}-[A-Z]{2}$",
+    )
+    units: Units | None = Field(
+        default=None,
+        description="The units to use for the results. Options: metric/imperial",
+    )
+
+
+class LocalPOIsDescriptionParams(BaseModel):
+    """Parameters for Brave Local POI Descriptions endpoint."""
+
+    ids: list[str] = Field(
+        description="List of POI IDs to retrieve. Maximum of 20. IDs are valid for 8 hours.",
+        min_length=1,
+        max_length=20,
+    )
+
+
+class ImageSearchParams(BaseModel):
+    """Parameters for Brave Image Search endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    safesearch: Literal["off", "strict"] | None = Field(
+        default=None,
+        description="Filter out explicit content. Default is strict.",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return.",
+        ge=1,
+        le=200,
+    )
+    spellcheck: bool | None = Field(
+        default=None,
+        description="Attempt to correct spelling errors in the search query.",
+    )
+
+
+class VideoSearchParams(BaseModel):
+    """Parameters for Brave Video Search endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    ui_lang: str | None = Field(
+        default=None,
+        description="Language code for the user interface (e.g., 'en-US', 'es-AR').",
+        pattern=r"^[a-z]{2}-[A-Z]{2}$",
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    safesearch: SafeSearch | None = Field(
+        default=None,
+        description="Filter out explicit content. Options: off/moderate/strict",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return.",
+        ge=1,
+        le=50,
+    )
+    offset: int | None = Field(
+        default=None,
+        description="Skip the first N result sets/pages. Max is 9.",
+        ge=0,
+        le=9,
+    )
+    spellcheck: bool | None = Field(
+        default=None,
+        description="Attempt to correct spelling errors in the search query.",
+    )
+    freshness: Freshness | None = Field(
+        default=None,
+        description="Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
+    )
+    include_fetch_metadata: bool | None = Field(
+        default=None,
+        description="Whether to include fetch metadata (e.g., last fetch time) in the results.",
+    )
+    operators: bool | None = Field(
+        default=None,
+        description="Whether to apply search operators (e.g., site:example.com).",
+    )
+
+
+class NewsSearchParams(BaseModel):
+    """Parameters for Brave News Search endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    ui_lang: str | None = Field(
+        default=None,
+        description="Language code for the user interface (e.g., 'en-US', 'es-AR').",
+        pattern=r"^[a-z]{2}-[A-Z]{2}$",
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    safesearch: Literal["off", "moderate", "strict"] | None = Field(
+        default=None,
+        description="Filter out explicit content. Options: off/moderate/strict",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return.",
+        ge=1,
+        le=50,
+    )
+    offset: int | None = Field(
+        default=None,
+        description="Skip the first N result sets/pages. Max is 9.",
+        ge=0,
+        le=9,
+    )
+    spellcheck: bool | None = Field(
+        default=None,
+        description="Attempt to correct spelling errors in the search query.",
+    )
+    freshness: Freshness | None = Field(
+        default=None,
+        description="Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
+    )
+    extra_snippets: bool | None = Field(
+        default=None,
+        description="Include up to 5 text snippets for each page if possible.",
+    )
+    goggles: str | list[str] | None = Field(
+        default=None,
+        description="Goggles act as a custom re-ranking mechanism. Goggle source or URLs.",
+    )
+    include_fetch_metadata: bool | None = Field(
+        default=None,
+        description="Whether to include fetch metadata in the results.",
+    )
+    operators: bool | None = Field(
+        default=None,
+        description="Whether to apply search operators (e.g., site:example.com).",
+    )
+
+
+class BaseSearchHeaders(BaseModel):
+    """Common headers for Brave Search endpoints."""
+
+    x_subscription_token: str = Field(
+        alias="x-subscription-token",
+        description="API key for Brave Search",
+    )
+    api_version: str | None = Field(
+        alias="api-version",
+        default=None,
+        description="API version to use. Default is latest available.",
+        pattern=r"^\d{4}-\d{2}-\d{2}$",  # YYYY-MM-DD
+    )
+    accept: Literal["application/json"] | Literal["*/*"] | None = Field(
+        default=None,
+        description="Accept header for the request.",
+    )
+    cache_control: Literal["no-cache"] | None = Field(
+        alias="cache-control",
+        default=None,
+        description="Cache control header for the request.",
+    )
+    user_agent: str | None = Field(
+        alias="user-agent",
+        default=None,
+        description="User agent for the request.",
+    )
+
+
+class LLMContextHeaders(BaseSearchHeaders):
+    """Headers for Brave LLM Context endpoint."""
+
+    x_loc_lat: float | None = Field(
+        alias="x-loc-lat",
+        default=None,
+        description="Latitude of the user's location.",
+        ge=-90.0,
+        le=90.0,
+    )
+    x_loc_long: float | None = Field(
+        alias="x-loc-long",
+        default=None,
+        description="Longitude of the user's location.",
+        ge=-180.0,
+        le=180.0,
+    )
+    x_loc_city: str | None = Field(
+        alias="x-loc-city",
+        default=None,
+        description="City of the user's location.",
+    )
+    x_loc_state: str | None = Field(
+        alias="x-loc-state",
+        default=None,
+        description="State of the user's location.",
+    )
+    x_loc_state_name: str | None = Field(
+        alias="x-loc-state-name",
+        default=None,
+        description="Name of the state of the user's location.",
+    )
+    x_loc_country: str | None = Field(
+        alias="x-loc-country",
+        default=None,
+        description="The ISO 3166-1 alpha-2 country code of the user's location.",
+    )
+
+
+class LocalPOIsHeaders(BaseSearchHeaders):
+    """Headers for Brave Local POIs endpoint."""
+
+    x_loc_lat: float | None = Field(
+        alias="x-loc-lat",
+        default=None,
+        description="Latitude of the user's location.",
+        ge=-90.0,
+        le=90.0,
+    )
+    x_loc_long: float | None = Field(
+        alias="x-loc-long",
+        default=None,
+        description="Longitude of the user's location.",
+        ge=-180.0,
+        le=180.0,
+    )
+
+
+class LocalPOIsDescriptionHeaders(BaseSearchHeaders):
+    """Headers for Brave Local POI Descriptions endpoint."""
+
+
+class VideoSearchHeaders(BaseSearchHeaders):
+    """Headers for Brave Video Search endpoint."""
+
+
+class ImageSearchHeaders(BaseSearchHeaders):
+    """Headers for Brave Image Search endpoint."""
+
+
+class NewsSearchHeaders(BaseSearchHeaders):
+    """Headers for Brave News Search endpoint."""
+
+
+class WebSearchHeaders(BaseSearchHeaders):
+    """Headers for Brave Web Search endpoint."""
+
+    x_loc_lat: float | None = Field(
+        alias="x-loc-lat",
+        default=None,
+        description="Latitude of the user's location.",
+        ge=-90.0,
+        le=90.0,
+    )
+    x_loc_long: float | None = Field(
+        alias="x-loc-long",
+        default=None,
+        description="Longitude of the user's location.",
+        ge=-180.0,
+        le=180.0,
+    )
+    x_loc_timezone: str | None = Field(
+        alias="x-loc-timezone",
+        default=None,
+        description="Timezone of the user's location.",
+    )
+    x_loc_city: str | None = Field(
+        alias="x-loc-city",
+        default=None,
+        description="City of the user's location.",
+    )
+    x_loc_state: str | None = Field(
+        alias="x-loc-state",
+        default=None,
+        description="State of the user's location.",
+    )
+    x_loc_state_name: str | None = Field(
+        alias="x-loc-state-name",
+        default=None,
+        description="Name of the state of the user's location.",
+    )
+    x_loc_country: str | None = Field(
+        alias="x-loc-country",
+        default=None,
+        description="The ISO 3166-1 alpha-2 country code of the user's location.",
+    )
+    x_loc_postal_code: str | None = Field(
+        alias="x-loc-postal-code",
+        default=None,
+        description="The postal code of the user's location.",
+    )
--- a/lib/crewai-tools/src/crewai_tools/tools/multion_tool/example.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/multion_tool/example.py
@@ -1,7 +1,7 @@
 import os

 from crewai import Agent, Crew, Task
-from multion_tool import MultiOnTool # type: ignore[import-not-found]
+from multion_tool import MultiOnTool  # type: ignore[import-not-found]


 os.environ["OPENAI_API_KEY"] = "Your Key"
--- a/lib/crewai-tools/src/crewai_tools/tools/stagehand_tool/example.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/stagehand_tool/example.py
@@ -17,11 +17,11 @@ Usage:

 import os

+from crewai import Agent, Crew, Process, Task
 from crewai.utilities.printer import Printer
 from dotenv import load_dotenv
 from stagehand.schemas import AvailableModel  # type: ignore[import-untyped]

-from crewai import Agent, Crew, Process, Task
 from crewai_tools import StagehandTool


--- a/lib/crewai-tools/tests/tools/brave_search_tool_test.py
+++ b/lib/crewai-tools/tests/tools/brave_search_tool_test.py
@@ -1,80 +1,777 @@
-import json
-from unittest.mock import patch
+import os
+from unittest.mock import MagicMock, patch

 import pytest
+import requests as requests_lib

-from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.brave_web_tool import BraveWebSearchTool
+from crewai_tools.tools.brave_search_tool.brave_image_tool import BraveImageSearchTool
+from crewai_tools.tools.brave_search_tool.brave_news_tool import BraveNewsSearchTool
+from crewai_tools.tools.brave_search_tool.brave_video_tool import BraveVideoSearchTool
+from crewai_tools.tools.brave_search_tool.brave_llm_context_tool import (
+    BraveLLMContextTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_local_pois_tool import (
+    BraveLocalPOIsTool,
+    BraveLocalPOIsDescriptionTool,
+)
+from crewai_tools.tools.brave_search_tool.schemas import (
+    WebSearchParams,
+    WebSearchHeaders,
+    ImageSearchParams,
+    ImageSearchHeaders,
+    NewsSearchParams,
+    NewsSearchHeaders,
+    VideoSearchParams,
+    VideoSearchHeaders,
+    LLMContextParams,
+    LLMContextHeaders,
+    LocalPOIsParams,
+    LocalPOIsHeaders,
+    LocalPOIsDescriptionParams,
+    LocalPOIsDescriptionHeaders,
+)
+
+
+def _mock_response(
+    status_code: int = 200,
+    json_data: dict | None = None,
+    headers: dict | None = None,
+    text: str = "",
+) -> MagicMock:
+    """Build a ``requests.Response``-like mock with the attributes used by ``_make_request``."""
+    resp = MagicMock(spec=requests_lib.Response)
+    resp.status_code = status_code
+    resp.ok = 200 <= status_code < 400
+    resp.url = "https://api.search.brave.com/res/v1/web/search?q=test"
+    resp.text = text or (str(json_data) if json_data else "")
+    resp.headers = headers or {}
+    resp.json.return_value = json_data if json_data is not None else {}
+    return resp
+
+
+# Fixtures
+
+
+@pytest.fixture(autouse=True)
+def _brave_env_and_rate_limit():
+    """Set BRAVE_API_KEY for every test. Rate limiting is per-instance (each tool starts with a fresh clock)."""
+    with patch.dict(os.environ, {"BRAVE_API_KEY": "test-api-key"}):
+        yield


@pytest.fixture
-def brave_tool():
-    return BraveSearchTool(n_results=2)
+def web_tool():
+    return BraveWebSearchTool()


-def test_brave_tool_initialization():
-    tool = BraveSearchTool()
-    assert tool.n_results == 10
+@pytest.fixture
+def image_tool():
+    return BraveImageSearchTool()
+
+
+@pytest.fixture
+def news_tool():
+    return BraveNewsSearchTool()
+
+
+@pytest.fixture
+def video_tool():
+    return BraveVideoSearchTool()
+
+
+# Initialization
+
+ALL_TOOL_CLASSES = [
+    BraveWebSearchTool,
+    BraveImageSearchTool,
+    BraveNewsSearchTool,
+    BraveVideoSearchTool,
+    BraveLLMContextTool,
+    BraveLocalPOIsTool,
+    BraveLocalPOIsDescriptionTool,
+]
+
+
+@pytest.mark.parametrize("tool_cls", ALL_TOOL_CLASSES)
+def test_instantiation_with_env_var(tool_cls):
+    """Each tool can be created when BRAVE_API_KEY is in the environment."""
+    tool = tool_cls()
+    assert tool.api_key == "test-api-key"
+
+
+@pytest.mark.parametrize("tool_cls", ALL_TOOL_CLASSES)
+def test_instantiation_with_explicit_key(tool_cls):
+    """An explicit api_key takes precedence over the environment."""
+    tool = tool_cls(api_key="explicit-key")
+    assert tool.api_key == "explicit-key"
+
+
+def test_missing_api_key_raises():
+    with patch.dict(os.environ, {}, clear=True):
+        with pytest.raises(ValueError, match="BRAVE_API_KEY"):
+            BraveWebSearchTool()
+
+
+def test_default_attributes():
+    tool = BraveWebSearchTool()
    assert tool.save_file is False
+    assert tool.n_results == 10
+    assert tool._timeout == 30
+    assert tool._requests_per_second == 1.0
+    assert tool.raw is False


-@patch("requests.get")
-def test_brave_tool_search(mock_get, brave_tool):
-    mock_response = {
+def test_custom_constructor_args():
+    tool = BraveWebSearchTool(
+        save_file=True,
+        timeout=60,
+        n_results=5,
+        requests_per_second=0.5,
+        raw=True,
+    )
+    assert tool.save_file is True
+    assert tool._timeout == 60
+    assert tool.n_results == 5
+    assert tool._requests_per_second == 0.5
+    assert tool.raw is True
+
+
+# Headers
+
+
+def test_default_headers():
+    tool = BraveWebSearchTool()
+    assert tool.headers["x-subscription-token"] == "test-api-key"
+    assert tool.headers["accept"] == "application/json"
+
+
+def test_set_headers_merges_and_normalizes():
+    tool = BraveWebSearchTool()
+    tool.set_headers({"Cache-Control": "no-cache"})
+    assert tool.headers["cache-control"] == "no-cache"
+    assert tool.headers["x-subscription-token"] == "test-api-key"
+
+
+def test_set_headers_returns_self_for_chaining():
+    tool = BraveWebSearchTool()
+    assert tool.set_headers({"Cache-Control": "no-cache"}) is tool
+
+
+def test_invalid_header_value_raises():
+    tool = BraveImageSearchTool()
+    with pytest.raises(ValueError, match="Invalid headers"):
+        tool.set_headers({"Accept": "text/xml"})
+
+
+# Endpoint & Schema Wiring
+
+
+@pytest.mark.parametrize(
+    "tool_cls, expected_url, expected_params, expected_headers",
+    [
+        (
+            BraveWebSearchTool,
+            "https://api.search.brave.com/res/v1/web/search",
+            WebSearchParams,
+            WebSearchHeaders,
+        ),
+        (
+            BraveImageSearchTool,
+            "https://api.search.brave.com/res/v1/images/search",
+            ImageSearchParams,
+            ImageSearchHeaders,
+        ),
+        (
+            BraveNewsSearchTool,
+            "https://api.search.brave.com/res/v1/news/search",
+            NewsSearchParams,
+            NewsSearchHeaders,
+        ),
+        (
+            BraveVideoSearchTool,
+            "https://api.search.brave.com/res/v1/videos/search",
+            VideoSearchParams,
+            VideoSearchHeaders,
+        ),
+        (
+            BraveLLMContextTool,
+            "https://api.search.brave.com/res/v1/llm/context",
+            LLMContextParams,
+            LLMContextHeaders,
+        ),
+        (
+            BraveLocalPOIsTool,
+            "https://api.search.brave.com/res/v1/local/pois",
+            LocalPOIsParams,
+            LocalPOIsHeaders,
+        ),
+        (
+            BraveLocalPOIsDescriptionTool,
+            "https://api.search.brave.com/res/v1/local/descriptions",
+            LocalPOIsDescriptionParams,
+            LocalPOIsDescriptionHeaders,
+        ),
+    ],
+)
+def test_tool_wiring(tool_cls, expected_url, expected_params, expected_headers):
+    tool = tool_cls()
+    assert tool.search_url == expected_url
+    assert tool.args_schema is expected_params
+    assert tool.header_schema is expected_headers
+
+
+# Payload Refinement  (e.g., `query` -> `q`, `count` fallback, param pass-through)
+
+
+def test_web_refine_request_payload_passes_all_params(web_tool):
+    params = web_tool._common_payload_refinement(
+        {
+            "query": "test",
+            "country": "US",
+            "search_lang": "en",
+            "count": 5,
+            "offset": 2,
+            "safesearch": "moderate",
+            "freshness": "pw",
+        }
+    )
+    refined_params = web_tool._refine_request_payload(params)
+
+    assert refined_params["q"] == "test"
+    assert "query" not in refined_params
+    assert refined_params["count"] == 5
+    assert refined_params["country"] == "US"
+    assert refined_params["search_lang"] == "en"
+    assert refined_params["offset"] == 2
+    assert refined_params["safesearch"] == "moderate"
+    assert refined_params["freshness"] == "pw"
+
+
+def test_image_refine_request_payload_passes_all_params(image_tool):
+    params = image_tool._common_payload_refinement(
+        {
+            "query": "cat photos",
+            "country": "US",
+            "search_lang": "en",
+            "safesearch": "strict",
+            "count": 50,
+            "spellcheck": True,
+        }
+    )
+    refined_params = image_tool._refine_request_payload(params)
+
+    assert refined_params["q"] == "cat photos"
+    assert "query" not in refined_params
+    assert refined_params["country"] == "US"
+    assert refined_params["safesearch"] == "strict"
+    assert refined_params["count"] == 50
+    assert refined_params["spellcheck"] is True
+
+
+def test_news_refine_request_payload_passes_all_params(news_tool):
+    params = news_tool._common_payload_refinement(
+        {
+            "query": "breaking news",
+            "country": "US",
+            "count": 10,
+            "offset": 1,
+            "freshness": "pd",
+            "extra_snippets": True,
+        }
+    )
+    refined_params = news_tool._refine_request_payload(params)
+
+    assert refined_params["q"] == "breaking news"
+    assert "query" not in refined_params
+    assert refined_params["country"] == "US"
+    assert refined_params["offset"] == 1
+    assert refined_params["freshness"] == "pd"
+    assert refined_params["extra_snippets"] is True
+
+
+def test_video_refine_request_payload_passes_all_params(video_tool):
+    params = video_tool._common_payload_refinement(
+        {
+            "query": "tutorial",
+            "country": "US",
+            "count": 25,
+            "offset": 0,
+            "safesearch": "strict",
+            "freshness": "pm",
+        }
+    )
+    refined_params = video_tool._refine_request_payload(params)
+
+    assert refined_params["q"] == "tutorial"
+    assert "query" not in refined_params
+    assert refined_params["country"] == "US"
+    assert refined_params["offset"] == 0
+    assert refined_params["freshness"] == "pm"
+
+
+def test_legacy_constructor_params_flow_into_query_params():
+    """The legacy n_results and country constructor params are applied as defaults
+    when count/country are not explicitly provided at call time."""
+    tool = BraveWebSearchTool(n_results=3, country="BR")
+    params = tool._common_payload_refinement({"query": "test"})
+
+    assert params["count"] == 3
+    assert params["country"] == "BR"
+
+
+def test_legacy_constructor_params_do_not_override_explicit_query_params():
+    """Explicit query-time count/country take precedence over constructor defaults."""
+    tool = BraveWebSearchTool(n_results=3, country="BR")
+    params = tool._common_payload_refinement(
+        {"query": "test", "count": 10, "country": "US"}
+    )
+
+    assert params["count"] == 10
+    assert params["country"] == "US"
+
+
+def test_refine_request_payload_passes_multiple_goggles_as_multiple_params(web_tool):
+    result = web_tool._refine_request_payload(
+        {
+            "query": "test",
+            "goggles": ["goggle1", "goggle2"],
+        }
+    )
+    assert result["goggles"] == ["goggle1", "goggle2"]
+
+
+# Null-like / empty value stripping
+#
+# crewAI's ensure_all_properties_required (pydantic_schema_utils.py) marks
+# every schema property as required for OpenAI strict-mode compatibility.
+# Because optional Brave API parameters look required to the LLM, it fills
+# them with placeholder junk — None, "", "null", or [].  The test below
+# verifies that _common_payload_refinement strips these from optional fields.
+
+
+def test_common_refinement_strips_null_like_values(web_tool):
+    """_common_payload_refinement drops optional keys with None / '' / 'null' / []."""
+    params = web_tool._common_payload_refinement(
+        {
+            "query": "test",
+            "country": "US",
+            "search_lang": "",
+            "freshness": "null",
+            "count": 5,
+            "goggles": [],
+        }
+    )
+    assert params["q"] == "test"
+    assert params["country"] == "US"
+    assert params["count"] == 5
+    assert "search_lang" not in params
+    assert "freshness" not in params
+    assert "goggles" not in params
+
+
+# End-to-End _run() with Mocked HTTP Response
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_web_search_end_to_end(mock_get, web_tool):
+    web_tool.raw = True
+    data = {"web": {"results": [{"title": "R", "url": "http://r.co"}]}}
+    mock_get.return_value = _mock_response(json_data=data)
+
+    result = web_tool._run(query="test")
+
+    mock_get.assert_called_once()
+    call_args = mock_get.call_args.kwargs
+    assert call_args["params"]["q"] == "test"
+    assert call_args["headers"]["x-subscription-token"] == "test-api-key"
+    assert result == data
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_image_search_end_to_end(mock_get, image_tool):
+    image_tool.raw = True
+    data = {"results": [{"url": "http://img.co/a.jpg"}]}
+    mock_get.return_value = _mock_response(json_data=data)
+
+    assert image_tool._run(query="cats") == data
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_news_search_end_to_end(mock_get, news_tool):
+    news_tool.raw = True
+    data = {"results": [{"title": "News", "url": "http://n.co"}]}
+    mock_get.return_value = _mock_response(json_data=data)
+
+    assert news_tool._run(query="headlines") == data
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_video_search_end_to_end(mock_get, video_tool):
+    video_tool.raw = True
+    data = {"results": [{"title": "Vid", "url": "http://v.co"}]}
+    mock_get.return_value = _mock_response(json_data=data)
+
+    assert video_tool._run(query="python tutorial") == data
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_raw_false_calls_refine_response(mock_get, web_tool):
+    """With raw=False (the default), _refine_response transforms the API response."""
+    api_response = {
        "web": {
            "results": [
                {
-                    "title": "Test Title",
-                    "url": "http://test.com",
-                    "description": "Test Description",
+                    "title": "CrewAI",
+                    "url": "https://crewai.com",
+                    "description": "AI agent framework",
                }
            ]
        }
    }
-    mock_get.return_value.json.return_value = mock_response
+    mock_get.return_value = _mock_response(json_data=api_response)

-    result = brave_tool.run(query="test")
-    data = json.loads(result)
-    assert isinstance(data, list)
-    assert len(data) >= 1
-    assert data[0]["title"] == "Test Title"
-    assert data[0]["url"] == "http://test.com"
+    assert web_tool.raw is False
+    result = web_tool._run(query="crewai")
+
+    # The web tool's _refine_response extracts and reshapes results.
+    # The key assertion: we should NOT get back the raw API envelope.
+    assert result != api_response


-@patch("requests.get")
-def test_brave_tool(mock_get):
-    mock_response = {
-        "web": {
-            "results": [
-                {
-                    "title": "Brave Browser",
-                    "url": "https://brave.com",
-                    "description": "Brave Browser description",
-                }
-            ]
-        }
-    }
-    mock_get.return_value.json.return_value = mock_response
-
-    tool = BraveSearchTool(n_results=2)
-    result = tool.run(query="Brave Browser")
-    assert result is not None
-
-    # Parse JSON so we can examine the structure
-    data = json.loads(result)
-    assert isinstance(data, list)
-    assert len(data) >= 1
-
-    # First item should have expected fields: title, url, and description
-    first = data[0]
-    assert "title" in first
-    assert first["title"] == "Brave Browser"
-    assert "url" in first
-    assert first["url"] == "https://brave.com"
-    assert "description" in first
-    assert first["description"] == "Brave Browser description"
+# Backward Compatibility & Legacy Parameter Support


-if __name__ == "__main__":
-    test_brave_tool()
-    test_brave_tool_initialization()
-    # test_brave_tool_search(brave_tool)
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_positional_query_argument(mock_get, web_tool):
+    """tool.run('my query') works as a positional argument."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    web_tool._run("positional test")
+
+    assert mock_get.call_args.kwargs["params"]["q"] == "positional test"
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_search_query_backward_compat(mock_get, web_tool):
+    """The legacy 'search_query' param is mapped to 'query'."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    web_tool._run(search_query="legacy test")
+
+    assert mock_get.call_args.kwargs["params"]["q"] == "legacy test"
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base._save_results_to_file")
+def test_save_file_called_when_enabled(mock_save, mock_get):
+    mock_get.return_value = _mock_response(json_data={"results": []})
+
+    tool = BraveWebSearchTool(save_file=True)
+    tool._run(query="test")
+
+    mock_save.assert_called_once()
+
+
+# Error Handling
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_connection_error_raises_runtime_error(mock_get, web_tool):
+    mock_get.side_effect = requests_lib.exceptions.ConnectionError("refused")
+    with pytest.raises(RuntimeError, match="Brave Search API connection failed"):
+        web_tool._run(query="test")
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_timeout_raises_runtime_error(mock_get, web_tool):
+    mock_get.side_effect = requests_lib.exceptions.Timeout("timed out")
+    with pytest.raises(RuntimeError, match="timed out"):
+        web_tool._run(query="test")
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_invalid_params_raises_value_error(mock_get, web_tool):
+    """count=999 exceeds WebSearchParams.count le=20."""
+    with pytest.raises(ValueError, match="Invalid parameters"):
+        web_tool._run(query="test", count=999)
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_4xx_error_raises_with_api_detail(mock_get, web_tool):
+    """A 422 with a structured error body includes code and detail in the message."""
+    mock_get.return_value = _mock_response(
+        status_code=422,
+        json_data={
+            "error": {
+                "id": "abc-123",
+                "status": 422,
+                "code": "OPTION_NOT_IN_PLAN",
+                "detail": "extra_snippets requires a Pro plan",
+            }
+        },
+    )
+    with pytest.raises(RuntimeError, match="OPTION_NOT_IN_PLAN") as exc_info:
+        web_tool._run(query="test")
+    assert "extra_snippets requires a Pro plan" in str(exc_info.value)
+    assert "HTTP 422" in str(exc_info.value)
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_auth_error_raises_immediately(mock_get, web_tool):
+    """A 401 with SUBSCRIPTION_TOKEN_INVALID is not retried."""
+    mock_get.return_value = _mock_response(
+        status_code=401,
+        json_data={
+            "error": {
+                "id": "xyz",
+                "status": 401,
+                "code": "SUBSCRIPTION_TOKEN_INVALID",
+                "detail": "The subscription token is invalid",
+            }
+        },
+    )
+    with pytest.raises(RuntimeError, match="SUBSCRIPTION_TOKEN_INVALID"):
+        web_tool._run(query="test")
+    # Should NOT have retried — only one call.
+    assert mock_get.call_count == 1
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_quota_limited_429_raises_immediately(mock_get, web_tool):
+    """A 429 with QUOTA_LIMITED is NOT retried — quota exhaustion is terminal."""
+    mock_get.return_value = _mock_response(
+        status_code=429,
+        json_data={
+            "error": {
+                "id": "ql-1",
+                "status": 429,
+                "code": "QUOTA_LIMITED",
+                "detail": "Monthly quota exceeded",
+            }
+        },
+    )
+    with pytest.raises(RuntimeError, match="QUOTA_LIMITED") as exc_info:
+        web_tool._run(query="test")
+    assert "Monthly quota exceeded" in str(exc_info.value)
+    # Terminal — only one HTTP call, no retries.
+    assert mock_get.call_count == 1
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_usage_limit_exceeded_429_raises_immediately(mock_get, web_tool):
+    """USAGE_LIMIT_EXCEEDED is also non-retryable, just like QUOTA_LIMITED."""
+    mock_get.return_value = _mock_response(
+        status_code=429,
+        json_data={
+            "error": {
+                "id": "ule-1",
+                "status": 429,
+                "code": "USAGE_LIMIT_EXCEEDED",
+            }
+        },
+        text="usage limit exceeded",
+    )
+    with pytest.raises(RuntimeError, match="USAGE_LIMIT_EXCEEDED"):
+        web_tool._run(query="test")
+    assert mock_get.call_count == 1
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_error_body_is_fully_included_in_message(mock_get, web_tool):
+    """The full JSON error body is included in the RuntimeError message."""
+    mock_get.return_value = _mock_response(
+        status_code=429,
+        json_data={
+            "error": {
+                "id": "x",
+                "status": 429,
+                "code": "QUOTA_LIMITED",
+                "detail": "Exceeded",
+                "meta": {"plan": "free", "limit": 1000},
+            }
+        },
+    )
+    with pytest.raises(RuntimeError) as exc_info:
+        web_tool._run(query="test")
+    msg = str(exc_info.value)
+    assert "HTTP 429" in msg
+    assert "QUOTA_LIMITED" in msg
+    assert "free" in msg
+    assert "1000" in msg
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_error_without_json_body_falls_back_to_text(mock_get, web_tool):
+    """When the error response isn't valid JSON, resp.text is used as the detail."""
+    resp = _mock_response(status_code=500, text="Internal Server Error")
+    resp.json.side_effect = ValueError("No JSON")
+    mock_get.return_value = resp
+
+    with pytest.raises(RuntimeError, match="Internal Server Error"):
+        web_tool._run(query="test")
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_invalid_json_on_success_raises_runtime_error(mock_get, web_tool):
+    """A 200 OK with a non-JSON body raises RuntimeError."""
+    resp = _mock_response(status_code=200)
+    resp.json.side_effect = ValueError("Expecting value")
+    mock_get.return_value = resp
+
+    with pytest.raises(RuntimeError, match="invalid JSON"):
+        web_tool._run(query="test")
+
+
+# Rate Limiting
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_rate_limit_sleeps_when_too_fast(mock_time, mock_get, web_tool):
+    """Back-to-back calls within the interval trigger a sleep."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    # Simulate: last request was at t=100, "now" is t=100.2 (only 0.2s elapsed).
+    # With default 1 req/s the min interval is 1.0s, so it should sleep ~0.8s.
+    mock_time.time.return_value = 100.2
+    web_tool._last_request_time = 100.0
+
+    web_tool._run(query="test")
+
+    mock_time.sleep.assert_called_once()
+    sleep_duration = mock_time.sleep.call_args[0][0]
+    assert 0.7 < sleep_duration < 0.9  # approximately 0.8s
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_rate_limit_skips_sleep_when_enough_time_passed(mock_time, mock_get, web_tool):
+    """No sleep when the elapsed time already exceeds the interval."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    # Last request was at t=100, "now" is t=102 (2s elapsed > 1s interval).
+    mock_time.time.return_value = 102.0
+    web_tool._last_request_time = 100.0
+
+    web_tool._run(query="test")
+
+    mock_time.sleep.assert_not_called()
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_rate_limit_disabled_when_zero(mock_time, mock_get, web_tool):
+    """requests_per_second=0 disables rate limiting entirely."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    web_tool._last_request_time = 100.0
+    mock_time.time.return_value = 100.0  # same instant
+
+    web_tool._run(query="test")
+
+    mock_time.sleep.assert_not_called()
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_rate_limit_per_instance_independent(mock_time, mock_get, web_tool, image_tool):
+    """Each instance has its own rate-limit clock; a request on one does not delay the other."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    # Web tool fires at t=100 (its clock goes 0 -> 100).
+    mock_time.time.return_value = 100.0
+    web_tool._run(query="test")
+
+    # Image tool fires at t=100.3. Its clock is still 0 (separate instance), so
+    # next_allowed = 1.0 and 100.3 > 1.0 — no sleep. Total process rate can be sum of instance limits.
+    mock_time.time.return_value = 100.3
+    image_tool._run(query="cats")
+
+    mock_time.sleep.assert_not_called()
+
+
+# Retry Behavior
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_429_rate_limited_retries_then_succeeds(mock_time, mock_get, web_tool):
+    """A transient RATE_LIMITED 429 is retried; success on the second attempt."""
+    mock_time.time.return_value = 200.0
+
+    resp_429 = _mock_response(
+        status_code=429,
+        json_data={"error": {"id": "r", "status": 429, "code": "RATE_LIMITED"}},
+        headers={"Retry-After": "2"},
+    )
+    resp_200 = _mock_response(status_code=200, json_data={"web": {"results": []}})
+    mock_get.side_effect = [resp_429, resp_200]
+
+    web_tool.raw = True
+    result = web_tool._run(query="test")
+
+    assert result == {"web": {"results": []}}
+    assert mock_get.call_count == 2
+    # Slept for the Retry-After value.
+    retry_sleeps = [c for c in mock_time.sleep.call_args_list if c[0][0] == 2.0]
+    assert len(retry_sleeps) == 1
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_5xx_is_retried(mock_time, mock_get, web_tool):
+    """A 502 server error is retried; success on the second attempt."""
+    mock_time.time.return_value = 200.0
+
+    resp_502 = _mock_response(status_code=502, text="Bad Gateway")
+    resp_502.json.side_effect = ValueError("no json")
+    resp_200 = _mock_response(status_code=200, json_data={"web": {"results": []}})
+    mock_get.side_effect = [resp_502, resp_200]
+
+    web_tool.raw = True
+    result = web_tool._run(query="test")
+
+    assert result == {"web": {"results": []}}
+    assert mock_get.call_count == 2
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_429_rate_limited_exhausts_retries(mock_time, mock_get, web_tool):
+    """Persistent RATE_LIMITED 429s exhaust retries and raise RuntimeError."""
+    mock_time.time.return_value = 200.0
+
+    resp_429 = _mock_response(
+        status_code=429,
+        json_data={"error": {"id": "r", "status": 429, "code": "RATE_LIMITED"}},
+    )
+    mock_get.return_value = resp_429
+
+    with pytest.raises(RuntimeError, match="RATE_LIMITED"):
+        web_tool._run(query="test")
+    # 3 attempts (default _max_retries).
+    assert mock_get.call_count == 3
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_retry_uses_exponential_backoff_when_no_retry_after(
+    mock_time, mock_get, web_tool
+):
+    """Without Retry-After, backoff is 2^attempt (1s, 2s, ...)."""
+    mock_time.time.return_value = 200.0
+
+    resp_503 = _mock_response(status_code=503, text="Service Unavailable")
+    resp_503.json.side_effect = ValueError("no json")
+    resp_200 = _mock_response(status_code=200, json_data={"ok": True})
+    mock_get.side_effect = [resp_503, resp_503, resp_200]
+
+    web_tool.raw = True
+    web_tool._run(query="test")
+
+    # Two retries: attempt 0 → sleep(1.0), attempt 1 → sleep(2.0).
+    retry_sleeps = [c[0][0] for c in mock_time.sleep.call_args_list]
+    assert 1.0 in retry_sleeps
+    assert 2.0 in retry_sleeps
--- a/lib/crewai-tools/tool.specs.json
+++ b/lib/crewai-tools/tool.specs.json
--- a/lib/crewai/pyproject.toml
+++ b/lib/crewai/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
    "opentelemetry-exporter-otlp-proto-http~=1.34.0",
    # Data Handling
    "chromadb~=1.1.0",
-    "tokenizers~=0.20.3",
+    "tokenizers>=0.21,<1",
    "openpyxl~=3.1.5",
    # Authentication and Security
    "python-dotenv~=1.1.1",
@@ -38,10 +38,11 @@ dependencies = [
    "json5~=0.10.0",
    "portalocker~=2.7.0",
    "pydantic-settings~=2.10.1",
+    "httpx~=0.28.1",
    "mcp~=1.26.0",
    "uv~=0.9.13",
    "aiosqlite~=0.21.0",
-    "lancedb>=0.4.0",
+    "lancedb>=0.29.2",
 ]

 [project.urls]
@@ -52,7 +53,7 @@ Repository = "https://github.com/crewAIInc/crewAI"

 [project.optional-dependencies]
 tools = [
-    "crewai-tools==1.9.3",
+    "crewai-tools==1.10.1",
 ]
 embeddings = [
    "tiktoken~=0.8.0"
@@ -65,7 +66,7 @@ openpyxl = [
 ]
 mem0 = ["mem0ai~=0.1.94"]
 docling = [
-    "docling~=2.63.0",
+    "docling~=2.75.0",
 ]
 qdrant = [
    "qdrant-client[fastembed]~=1.14.3",
@@ -87,7 +88,7 @@ bedrock = [
    "boto3~=1.40.45",
 ]
 google-genai = [
-    "google-genai~=1.49.0",
+    "google-genai~=1.65.0",
 ]
 azure-ai-inference = [
    "azure-ai-inference~=1.0.0b9",
--- a/lib/crewai/src/crewai/init.py
+++ b/lib/crewai/src/crewai/init.py
@@ -10,7 +10,6 @@ from crewai.flow.flow import Flow
 from crewai.knowledge.knowledge import Knowledge
 from crewai.llm import LLM
 from crewai.llms.base_llm import BaseLLM
-from crewai.memory.unified_memory import Memory
 from crewai.process import Process
 from crewai.task import Task
 from crewai.tasks.llm_guardrail import LLMGuardrail
@@ -41,7 +40,7 @@ def _suppress_pydantic_deprecation_warnings() -> None:

 _suppress_pydantic_deprecation_warnings()

-__version__ = "1.9.3"
+__version__ = "1.10.1"
 _telemetry_submitted = False


@@ -72,6 +71,25 @@ def _track_install_async() -> None:


 _track_install_async()
+
+_LAZY_IMPORTS: dict[str, tuple[str, str]] = {
+    "Memory": ("crewai.memory.unified_memory", "Memory"),
+}
+
+
+def __getattr__(name: str) -> Any:
+    """Lazily import heavy modules (e.g. Memory → lancedb) on first access."""
+    if name in _LAZY_IMPORTS:
+        module_path, attr = _LAZY_IMPORTS[name]
+        import importlib
+
+        mod = importlib.import_module(module_path)
+        val = getattr(mod, attr)
+        globals()[name] = val
+        return val
+    raise AttributeError(f"module 'crewai' has no attribute {name!r}")
+
+
 __all__ = [
    "LLM",
    "Agent",
--- a/lib/crewai/src/crewai/a2a/utils/agent_card.py
+++ b/lib/crewai/src/crewai/a2a/utils/agent_card.py
@@ -4,6 +4,7 @@ from __future__ import annotations

 import asyncio
 from collections.abc import MutableMapping
+import concurrent.futures
 from functools import lru_cache
 import ssl
 import time
@@ -138,14 +139,17 @@ def fetch_agent_card(
        ttl_hash = int(time.time() // cache_ttl)
        return _fetch_agent_card_cached(endpoint, auth_hash, timeout, ttl_hash)

-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
+    coro = afetch_agent_card(endpoint=endpoint, auth=auth, timeout=timeout)
    try:
-        return loop.run_until_complete(
-            afetch_agent_card(endpoint=endpoint, auth=auth, timeout=timeout)
-        )
-    finally:
-        loop.close()
+        asyncio.get_running_loop()
+        has_running_loop = True
+    except RuntimeError:
+        has_running_loop = False
+
+    if has_running_loop:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            return pool.submit(asyncio.run, coro).result()
+    return asyncio.run(coro)


 async def afetch_agent_card(
@@ -203,14 +207,17 @@ def _fetch_agent_card_cached(
    """Cached sync version of fetch_agent_card."""
    auth = _auth_store.get(auth_hash)

-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
+    coro = _afetch_agent_card_impl(endpoint=endpoint, auth=auth, timeout=timeout)
    try:
-        return loop.run_until_complete(
-            _afetch_agent_card_impl(endpoint=endpoint, auth=auth, timeout=timeout)
-        )
-    finally:
-        loop.close()
+        asyncio.get_running_loop()
+        has_running_loop = True
+    except RuntimeError:
+        has_running_loop = False
+
+    if has_running_loop:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            return pool.submit(asyncio.run, coro).result()
+    return asyncio.run(coro)


@cached(ttl=300, serializer=PickleSerializer())  # type: ignore[untyped-decorator]
--- a/lib/crewai/src/crewai/a2a/utils/delegation.py
+++ b/lib/crewai/src/crewai/a2a/utils/delegation.py
@@ -5,6 +5,7 @@ from __future__ import annotations
 import asyncio
 import base64
 from collections.abc import AsyncIterator, Callable, MutableMapping
+import concurrent.futures
 from contextlib import asynccontextmanager
 import logging
 from typing import TYPE_CHECKING, Any, Final, Literal
@@ -194,56 +195,43 @@ def execute_a2a_delegation(

    Returns:
        TaskStateResult with status, result/error, history, and agent_card.
-
-    Raises:
-        RuntimeError: If called from an async context with a running event loop.
    """
+    coro = aexecute_a2a_delegation(
+        endpoint=endpoint,
+        auth=auth,
+        timeout=timeout,
+        task_description=task_description,
+        context=context,
+        context_id=context_id,
+        task_id=task_id,
+        reference_task_ids=reference_task_ids,
+        metadata=metadata,
+        extensions=extensions,
+        conversation_history=conversation_history,
+        agent_id=agent_id,
+        agent_role=agent_role,
+        agent_branch=agent_branch,
+        response_model=response_model,
+        turn_number=turn_number,
+        updates=updates,
+        from_task=from_task,
+        from_agent=from_agent,
+        skill_id=skill_id,
+        client_extensions=client_extensions,
+        transport=transport,
+        accepted_output_modes=accepted_output_modes,
+        input_files=input_files,
+    )
    try:
        asyncio.get_running_loop()
-        raise RuntimeError(
-            "execute_a2a_delegation() cannot be called from an async context. "
-            "Use 'await aexecute_a2a_delegation()' instead."
-        )
-    except RuntimeError as e:
-        if "no running event loop" not in str(e).lower():
-            raise
+        has_running_loop = True
+    except RuntimeError:
+        has_running_loop = False

-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        return loop.run_until_complete(
-            aexecute_a2a_delegation(
-                endpoint=endpoint,
-                auth=auth,
-                timeout=timeout,
-                task_description=task_description,
-                context=context,
-                context_id=context_id,
-                task_id=task_id,
-                reference_task_ids=reference_task_ids,
-                metadata=metadata,
-                extensions=extensions,
-                conversation_history=conversation_history,
-                agent_id=agent_id,
-                agent_role=agent_role,
-                agent_branch=agent_branch,
-                response_model=response_model,
-                turn_number=turn_number,
-                updates=updates,
-                from_task=from_task,
-                from_agent=from_agent,
-                skill_id=skill_id,
-                client_extensions=client_extensions,
-                transport=transport,
-                accepted_output_modes=accepted_output_modes,
-                input_files=input_files,
-            )
-        )
-    finally:
-        try:
-            loop.run_until_complete(loop.shutdown_asyncgens())
-        finally:
-            loop.close()
+    if has_running_loop:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            return pool.submit(asyncio.run, coro).result()
+    return asyncio.run(coro)


 async def aexecute_a2a_delegation(
--- a/lib/crewai/src/crewai/agent/core.py
+++ b/lib/crewai/src/crewai/agent/core.py
@@ -8,11 +8,9 @@ import time
 from typing import (
    TYPE_CHECKING,
    Any,
-    Final,
    Literal,
    cast,
 )
-from urllib.parse import urlparse

 from pydantic import (
    BaseModel,
@@ -61,16 +59,8 @@ from crewai.knowledge.knowledge import Knowledge
 from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
 from crewai.lite_agent_output import LiteAgentOutput
 from crewai.llms.base_llm import BaseLLM
-from crewai.mcp import (
-    MCPClient,
-    MCPServerConfig,
-    MCPServerHTTP,
-    MCPServerSSE,
-    MCPServerStdio,
-)
-from crewai.mcp.transports.http import HTTPTransport
-from crewai.mcp.transports.sse import SSETransport
-from crewai.mcp.transports.stdio import StdioTransport
+from crewai.mcp import MCPServerConfig
+from crewai.mcp.tool_resolver import MCPToolResolver
 from crewai.rag.embeddings.types import EmbedderConfig
 from crewai.security.fingerprint import Fingerprint
 from crewai.tools.agent_tools.agent_tools import AgentTools
@@ -111,18 +101,8 @@ if TYPE_CHECKING:
    from crewai.utilities.types import LLMMessage


-# MCP Connection timeout constants (in seconds)
-MCP_CONNECTION_TIMEOUT: Final[int] = 10
-MCP_TOOL_EXECUTION_TIMEOUT: Final[int] = 30
-MCP_DISCOVERY_TIMEOUT: Final[int] = 15
-MCP_MAX_RETRIES: Final[int] = 3
-
 _passthrough_exceptions: tuple[type[Exception], ...] = ()

-# Simple in-memory cache for MCP tool schemas (duration: 5 minutes)
-_mcp_schema_cache: dict[str, Any] = {}
-_cache_ttl: Final[int] = 300  # 5 minutes
-

 class Agent(BaseAgent):
    """Represents an agent in a system.
@@ -154,7 +134,7 @@ class Agent(BaseAgent):
    model_config = ConfigDict()

    _times_executed: int = PrivateAttr(default=0)
-    _mcp_clients: list[Any] = PrivateAttr(default_factory=list)
+    _mcp_resolver: MCPToolResolver | None = PrivateAttr(default=None)
    _last_messages: list[LLMMessage] = PrivateAttr(default_factory=list)
    max_execution_time: int | None = Field(
        default=None,
@@ -384,10 +364,10 @@ class Agent(BaseAgent):
                )
                if unified_memory is not None:
                    query = task.description
-                    matches = unified_memory.recall(query, limit=10)
+                    matches = unified_memory.recall(query, limit=5)
                    if matches:
                        memory = "Relevant memories:\n" + "\n".join(
-                            f"- {m.record.content}" for m in matches
+                            m.format() for m in matches
                        )
                if memory.strip() != "":
                    task_prompt += self.i18n.slice("memory").format(memory=memory)
@@ -622,10 +602,10 @@ class Agent(BaseAgent):
                )
                if unified_memory is not None:
                    query = task.description
-                    matches = unified_memory.recall(query, limit=10)
+                    matches = unified_memory.recall(query, limit=5)
                    if matches:
                        memory = "Relevant memories:\n" + "\n".join(
-                            f"- {m.record.content}" for m in matches
+                            m.format() for m in matches
                        )
                if memory.strip() != "":
                    task_prompt += self.i18n.slice("memory").format(memory=memory)
@@ -864,7 +844,11 @@ class Agent(BaseAgent):
                respect_context_window=self.respect_context_window,
                request_within_rpm_limit=rpm_limit_fn,
                callbacks=[TokenCalcHandler(self._token_process)],
-                response_model=task.response_model if task else None,
+                response_model=(
+                    task.response_model or task.output_pydantic or task.output_json
+                )
+                if task
+                else None,
            )

    def _update_executor_parameters(
@@ -893,7 +877,11 @@ class Agent(BaseAgent):
        self.agent_executor.stop = stop_words
        self.agent_executor.tools_names = get_tool_names(tools)
        self.agent_executor.tools_description = render_text_description_and_args(tools)
-        self.agent_executor.response_model = task.response_model if task else None
+        self.agent_executor.response_model = (
+            (task.response_model or task.output_pydantic or task.output_json)
+            if task
+            else None
+        )

        self.agent_executor.tools_handler = self.tools_handler
        self.agent_executor.request_within_rpm_limit = rpm_limit_fn
@@ -926,544 +914,17 @@ class Agent(BaseAgent):
    def get_mcp_tools(self, mcps: list[str | MCPServerConfig]) -> list[BaseTool]:
        """Convert MCP server references/configs to CrewAI tools.

-        Supports both string references (backwards compatible) and structured
-        configuration objects (MCPServerStdio, MCPServerHTTP, MCPServerSSE).
-
-        Args:
-            mcps: List of MCP server references (strings) or configurations.
-
-        Returns:
-            List of BaseTool instances from MCP servers.
+        Delegates to :class:`~crewai.mcp.tool_resolver.MCPToolResolver`.
        """
-        all_tools = []
-        clients = []
-
-        for mcp_config in mcps:
-            if isinstance(mcp_config, str):
-                tools = self._get_mcp_tools_from_string(mcp_config)
-            else:
-                tools, client = self._get_native_mcp_tools(mcp_config)
-                if client:
-                    clients.append(client)
-
-            all_tools.extend(tools)
-
-        # Store clients for cleanup
-        self._mcp_clients.extend(clients)
-        return all_tools
+        self._cleanup_mcp_clients()
+        self._mcp_resolver = MCPToolResolver(agent=self, logger=self._logger)
+        return self._mcp_resolver.resolve(mcps)

    def _cleanup_mcp_clients(self) -> None:
        """Cleanup MCP client connections after task execution."""
-        if not self._mcp_clients:
-            return
-
-        async def _disconnect_all() -> None:
-            for client in self._mcp_clients:
-                if client and hasattr(client, "connected") and client.connected:
-                    await client.disconnect()
-
-        try:
-            asyncio.run(_disconnect_all())
-        except Exception as e:
-            self._logger.log("error", f"Error during MCP client cleanup: {e}")
-        finally:
-            self._mcp_clients.clear()
-
-    def _get_mcp_tools_from_string(self, mcp_ref: str) -> list[BaseTool]:
-        """Get tools from legacy string-based MCP references.
-
-        This method maintains backwards compatibility with string-based
-        MCP references (https://... and crewai-amp:...).
-
-        Args:
-            mcp_ref: String reference to MCP server.
-
-        Returns:
-            List of BaseTool instances.
-        """
-        if mcp_ref.startswith("crewai-amp:"):
-            return self._get_amp_mcp_tools(mcp_ref)
-        if mcp_ref.startswith("https://"):
-            return self._get_external_mcp_tools(mcp_ref)
-        return []
-
-    def _get_external_mcp_tools(self, mcp_ref: str) -> list[BaseTool]:
-        """Get tools from external HTTPS MCP server with graceful error handling."""
-        from crewai.tools.mcp_tool_wrapper import MCPToolWrapper
-
-        # Parse server URL and optional tool name
-        if "#" in mcp_ref:
-            server_url, specific_tool = mcp_ref.split("#", 1)
-        else:
-            server_url, specific_tool = mcp_ref, None
-
-        server_params = {"url": server_url}
-        server_name = self._extract_server_name(server_url)
-
-        try:
-            # Get tool schemas with timeout and error handling
-            tool_schemas = self._get_mcp_tool_schemas(server_params)
-
-            if not tool_schemas:
-                self._logger.log(
-                    "warning", f"No tools discovered from MCP server: {server_url}"
-                )
-                return []
-
-            tools = []
-            for tool_name, schema in tool_schemas.items():
-                # Skip if specific tool requested and this isn't it
-                if specific_tool and tool_name != specific_tool:
-                    continue
-
-                try:
-                    wrapper = MCPToolWrapper(
-                        mcp_server_params=server_params,
-                        tool_name=tool_name,
-                        tool_schema=schema,
-                        server_name=server_name,
-                    )
-                    tools.append(wrapper)
-                except Exception as e:
-                    self._logger.log(
-                        "warning",
-                        f"Failed to create MCP tool wrapper for {tool_name}: {e}",
-                    )
-                    continue
-
-            if specific_tool and not tools:
-                self._logger.log(
-                    "warning",
-                    f"Specific tool '{specific_tool}' not found on MCP server: {server_url}",
-                )
-
-            return cast(list[BaseTool], tools)
-
-        except Exception as e:
-            self._logger.log(
-                "warning", f"Failed to connect to MCP server {server_url}: {e}"
-            )
-            return []
-
-    def _get_native_mcp_tools(
-        self, mcp_config: MCPServerConfig
-    ) -> tuple[list[BaseTool], Any | None]:
-        """Get tools from MCP server using structured configuration.
-
-        This method creates an MCP client based on the configuration type,
-        connects to the server, discovers tools, applies filtering, and
-        returns wrapped tools along with the client instance for cleanup.
-
-        Args:
-            mcp_config: MCP server configuration (MCPServerStdio, MCPServerHTTP, or MCPServerSSE).
-
-        Returns:
-            Tuple of (list of BaseTool instances, MCPClient instance for cleanup).
-        """
-        from crewai.tools.base_tool import BaseTool
-        from crewai.tools.mcp_native_tool import MCPNativeTool
-
-        transport: StdioTransport | HTTPTransport | SSETransport
-        if isinstance(mcp_config, MCPServerStdio):
-            transport = StdioTransport(
-                command=mcp_config.command,
-                args=mcp_config.args,
-                env=mcp_config.env,
-            )
-            server_name = f"{mcp_config.command}_{'_'.join(mcp_config.args)}"
-        elif isinstance(mcp_config, MCPServerHTTP):
-            transport = HTTPTransport(
-                url=mcp_config.url,
-                headers=mcp_config.headers,
-                streamable=mcp_config.streamable,
-            )
-            server_name = self._extract_server_name(mcp_config.url)
-        elif isinstance(mcp_config, MCPServerSSE):
-            transport = SSETransport(
-                url=mcp_config.url,
-                headers=mcp_config.headers,
-            )
-            server_name = self._extract_server_name(mcp_config.url)
-        else:
-            raise ValueError(f"Unsupported MCP server config type: {type(mcp_config)}")
-
-        client = MCPClient(
-            transport=transport,
-            cache_tools_list=mcp_config.cache_tools_list,
-        )
-
-        async def _setup_client_and_list_tools() -> list[dict[str, Any]]:
-            """Async helper to connect and list tools in same event loop."""
-
-            try:
-                if not client.connected:
-                    await client.connect()
-
-                tools_list = await client.list_tools()
-
-                try:
-                    await client.disconnect()
-                    # Small delay to allow background tasks to finish cleanup
-                    # This helps prevent "cancel scope in different task" errors
-                    # when asyncio.run() closes the event loop
-                    await asyncio.sleep(0.1)
-                except Exception as e:
-                    self._logger.log("error", f"Error during disconnect: {e}")
-
-                return tools_list
-            except Exception as e:
-                if client.connected:
-                    await client.disconnect()
-                    await asyncio.sleep(0.1)
-                raise RuntimeError(
-                    f"Error during setup client and list tools: {e}"
-                ) from e
-
-        try:
-            try:
-                asyncio.get_running_loop()
-                import concurrent.futures
-
-                with concurrent.futures.ThreadPoolExecutor() as executor:
-                    future = executor.submit(
-                        asyncio.run, _setup_client_and_list_tools()
-                    )
-                    tools_list = future.result()
-            except RuntimeError:
-                try:
-                    tools_list = asyncio.run(_setup_client_and_list_tools())
-                except RuntimeError as e:
-                    error_msg = str(e).lower()
-                    if "cancel scope" in error_msg or "task" in error_msg:
-                        raise ConnectionError(
-                            "MCP connection failed due to event loop cleanup issues. "
-                            "This may be due to authentication errors or server unavailability."
-                        ) from e
-                except asyncio.CancelledError as e:
-                    raise ConnectionError(
-                        "MCP connection was cancelled. This may indicate an authentication "
-                        "error or server unavailability."
-                    ) from e
-
-            if mcp_config.tool_filter:
-                filtered_tools = []
-                for tool in tools_list:
-                    if callable(mcp_config.tool_filter):
-                        try:
-                            from crewai.mcp.filters import ToolFilterContext
-
-                            context = ToolFilterContext(
-                                agent=self,
-                                server_name=server_name,
-                                run_context=None,
-                            )
-                            if mcp_config.tool_filter(context, tool):  # type: ignore[call-arg, arg-type]
-                                filtered_tools.append(tool)
-                        except (TypeError, AttributeError):
-                            if mcp_config.tool_filter(tool):  # type: ignore[call-arg, arg-type]
-                                filtered_tools.append(tool)
-                    else:
-                        # Not callable - include tool
-                        filtered_tools.append(tool)
-                tools_list = filtered_tools
-
-            tools = []
-            for tool_def in tools_list:
-                tool_name = tool_def.get("name", "")
-                if not tool_name:
-                    continue
-
-                # Convert inputSchema to Pydantic model if present
-                args_schema = None
-                if tool_def.get("inputSchema"):
-                    args_schema = self._json_schema_to_pydantic(
-                        tool_name, tool_def["inputSchema"]
-                    )
-
-                tool_schema = {
-                    "description": tool_def.get("description", ""),
-                    "args_schema": args_schema,
-                }
-
-                try:
-                    native_tool = MCPNativeTool(
-                        mcp_client=client,
-                        tool_name=tool_name,
-                        tool_schema=tool_schema,
-                        server_name=server_name,
-                    )
-                    tools.append(native_tool)
-                except Exception as e:
-                    self._logger.log("error", f"Failed to create native MCP tool: {e}")
-                    continue
-
-            return cast(list[BaseTool], tools), client
-        except Exception as e:
-            if client.connected:
-                asyncio.run(client.disconnect())
-
-            raise RuntimeError(f"Failed to get native MCP tools: {e}") from e
-
-    def _get_amp_mcp_tools(self, amp_ref: str) -> list[BaseTool]:
-        """Get tools from CrewAI AMP MCP marketplace."""
-        # Parse: "crewai-amp:mcp-name" or "crewai-amp:mcp-name#tool_name"
-        amp_part = amp_ref.replace("crewai-amp:", "")
-        if "#" in amp_part:
-            mcp_name, specific_tool = amp_part.split("#", 1)
-        else:
-            mcp_name, specific_tool = amp_part, None
-
-        # Call AMP API to get MCP server URLs
-        mcp_servers = self._fetch_amp_mcp_servers(mcp_name)
-
-        tools = []
-        for server_config in mcp_servers:
-            server_ref = server_config["url"]
-            if specific_tool:
-                server_ref += f"#{specific_tool}"
-            server_tools = self._get_external_mcp_tools(server_ref)
-            tools.extend(server_tools)
-
-        return tools
-
-    @staticmethod
-    def _extract_server_name(server_url: str) -> str:
-        """Extract clean server name from URL for tool prefixing."""
-
-        parsed = urlparse(server_url)
-        domain = parsed.netloc.replace(".", "_")
-        path = parsed.path.replace("/", "_").strip("_")
-        return f"{domain}_{path}" if path else domain
-
-    def _get_mcp_tool_schemas(
-        self, server_params: dict[str, Any]
-    ) -> dict[str, dict[str, Any]]:
-        """Get tool schemas from MCP server for wrapper creation with caching."""
-        server_url = server_params["url"]
-
-        # Check cache first
-        cache_key = server_url
-        current_time = time.time()
-
-        if cache_key in _mcp_schema_cache:
-            cached_data, cache_time = _mcp_schema_cache[cache_key]
-            if current_time - cache_time < _cache_ttl:
-                self._logger.log(
-                    "debug", f"Using cached MCP tool schemas for {server_url}"
-                )
-                return cached_data  # type: ignore[no-any-return]
-
-        try:
-            schemas = asyncio.run(self._get_mcp_tool_schemas_async(server_params))
-
-            # Cache successful results
-            _mcp_schema_cache[cache_key] = (schemas, current_time)
-
-            return schemas
-        except Exception as e:
-            # Log warning but don't raise - this allows graceful degradation
-            self._logger.log(
-                "warning", f"Failed to get MCP tool schemas from {server_url}: {e}"
-            )
-            return {}
-
-    async def _get_mcp_tool_schemas_async(
-        self, server_params: dict[str, Any]
-    ) -> dict[str, dict[str, Any]]:
-        """Async implementation of MCP tool schema retrieval with timeouts and retries."""
-        server_url = server_params["url"]
-        return await self._retry_mcp_discovery(
-            self._discover_mcp_tools_with_timeout, server_url
-        )
-
-    async def _retry_mcp_discovery(
-        self, operation_func: Any, server_url: str
-    ) -> dict[str, dict[str, Any]]:
-        """Retry MCP discovery operation with exponential backoff, avoiding try-except in loop."""
-        last_error = None
-
-        for attempt in range(MCP_MAX_RETRIES):
-            # Execute single attempt outside try-except loop structure
-            result, error, should_retry = await self._attempt_mcp_discovery(
-                operation_func, server_url
-            )
-
-            # Success case - return immediately
-            if result is not None:
-                return result
-
-            # Non-retryable error - raise immediately
-            if not should_retry:
-                raise RuntimeError(error)
-
-            # Retryable error - continue with backoff
-            last_error = error
-            if attempt < MCP_MAX_RETRIES - 1:
-                wait_time = 2**attempt  # Exponential backoff
-                await asyncio.sleep(wait_time)
-
-        raise RuntimeError(
-            f"Failed to discover MCP tools after {MCP_MAX_RETRIES} attempts: {last_error}"
-        )
-
-    @staticmethod
-    async def _attempt_mcp_discovery(
-        operation_func: Any, server_url: str
-    ) -> tuple[dict[str, dict[str, Any]] | None, str, bool]:
-        """Attempt single MCP discovery operation and return (result, error_message, should_retry)."""
-        try:
-            result = await operation_func(server_url)
-            return result, "", False
-
-        except ImportError:
-            return (
-                None,
-                "MCP library not available. Please install with: pip install mcp",
-                False,
-            )
-
-        except asyncio.TimeoutError:
-            return (
-                None,
-                f"MCP discovery timed out after {MCP_DISCOVERY_TIMEOUT} seconds",
-                True,
-            )
-
-        except Exception as e:
-            error_str = str(e).lower()
-
-            # Classify errors as retryable or non-retryable
-            if "authentication" in error_str or "unauthorized" in error_str:
-                return None, f"Authentication failed for MCP server: {e!s}", False
-            if "connection" in error_str or "network" in error_str:
-                return None, f"Network connection failed: {e!s}", True
-            if "json" in error_str or "parsing" in error_str:
-                return None, f"Server response parsing error: {e!s}", True
-            return None, f"MCP discovery error: {e!s}", False
-
-    async def _discover_mcp_tools_with_timeout(
-        self, server_url: str
-    ) -> dict[str, dict[str, Any]]:
-        """Discover MCP tools with timeout wrapper."""
-        return await asyncio.wait_for(
-            self._discover_mcp_tools(server_url), timeout=MCP_DISCOVERY_TIMEOUT
-        )
-
-    async def _discover_mcp_tools(self, server_url: str) -> dict[str, dict[str, Any]]:
-        """Discover tools from MCP server with proper timeout handling."""
-        from mcp import ClientSession
-        from mcp.client.streamable_http import streamablehttp_client
-
-        async with streamablehttp_client(server_url) as (read, write, _):
-            async with ClientSession(read, write) as session:
-                # Initialize the connection with timeout
-                await asyncio.wait_for(
-                    session.initialize(), timeout=MCP_CONNECTION_TIMEOUT
-                )
-
-                # List available tools with timeout
-                tools_result = await asyncio.wait_for(
-                    session.list_tools(),
-                    timeout=MCP_DISCOVERY_TIMEOUT - MCP_CONNECTION_TIMEOUT,
-                )
-
-                schemas = {}
-                for tool in tools_result.tools:
-                    args_schema = None
-                    if hasattr(tool, "inputSchema") and tool.inputSchema:
-                        args_schema = self._json_schema_to_pydantic(
-                            sanitize_tool_name(tool.name), tool.inputSchema
-                        )
-
-                    schemas[sanitize_tool_name(tool.name)] = {
-                        "description": getattr(tool, "description", ""),
-                        "args_schema": args_schema,
-                    }
-                return schemas
-
-    def _json_schema_to_pydantic(
-        self, tool_name: str, json_schema: dict[str, Any]
-    ) -> type:
-        """Convert JSON Schema to Pydantic model for tool arguments.
-
-        Args:
-            tool_name: Name of the tool (used for model naming)
-            json_schema: JSON Schema dict with 'properties', 'required', etc.
-
-        Returns:
-            Pydantic BaseModel class
-        """
-        from pydantic import Field, create_model
-
-        properties = json_schema.get("properties", {})
-        required_fields = json_schema.get("required", [])
-
-        field_definitions: dict[str, Any] = {}
-
-        for field_name, field_schema in properties.items():
-            field_type = self._json_type_to_python(field_schema)
-            field_description = field_schema.get("description", "")
-
-            is_required = field_name in required_fields
-
-            if is_required:
-                field_definitions[field_name] = (
-                    field_type,
-                    Field(..., description=field_description),
-                )
-            else:
-                field_definitions[field_name] = (
-                    field_type | None,
-                    Field(default=None, description=field_description),
-                )
-
-        model_name = f"{tool_name.replace('-', '_').replace(' ', '_')}Schema"
-        return create_model(model_name, **field_definitions)  # type: ignore[no-any-return]
-
-    def _json_type_to_python(self, field_schema: dict[str, Any]) -> type:
-        """Convert JSON Schema type to Python type.
-
-        Args:
-            field_schema: JSON Schema field definition
-
-        Returns:
-            Python type
-        """
-
-        json_type = field_schema.get("type")
-
-        if "anyOf" in field_schema:
-            types: list[type] = []
-            for option in field_schema["anyOf"]:
-                if "const" in option:
-                    types.append(str)
-                else:
-                    types.append(self._json_type_to_python(option))
-            unique_types = list(set(types))
-            if len(unique_types) > 1:
-                result: Any = unique_types[0]
-                for t in unique_types[1:]:
-                    result = result | t
-                return result  # type: ignore[no-any-return]
-            return unique_types[0]
-
-        type_mapping: dict[str | None, type] = {
-            "string": str,
-            "number": float,
-            "integer": int,
-            "boolean": bool,
-            "array": list,
-            "object": dict,
-        }
-
-        return type_mapping.get(json_type, Any)
-
-    @staticmethod
-    def _fetch_amp_mcp_servers(mcp_name: str) -> list[dict[str, Any]]:
-        """Fetch MCP server configurations from CrewAI AMP API."""
-        # TODO: Implement AMP API call to "integrations/mcps" endpoint
-        # Should return list of server configs with URLs
-        return []
+        if self._mcp_resolver is not None:
+            self._mcp_resolver.cleanup()
+            self._mcp_resolver = None

    @staticmethod
    def get_multimodal_tools() -> Sequence[BaseTool]:
@@ -1695,11 +1156,15 @@ class Agent(BaseAgent):
        # Process platform apps and MCP tools
        if self.apps:
            platform_tools = self.get_platform_tools(self.apps)
-            if platform_tools and self.tools is not None:
+            if platform_tools:
+                if self.tools is None:
+                    self.tools = []
                self.tools.extend(platform_tools)
        if self.mcps:
            mcps = self.get_mcp_tools(self.mcps)
-            if mcps and self.tools is not None:
+            if mcps:
+                if self.tools is None:
+                    self.tools = []
                self.tools.extend(mcps)

        # Prepare tools
@@ -1712,7 +1177,8 @@ class Agent(BaseAgent):

            existing_names = {sanitize_tool_name(t.name) for t in raw_tools}
            raw_tools.extend(
-                mt for mt in create_memory_tools(agent_memory)
+                mt
+                for mt in create_memory_tools(agent_memory)
                if sanitize_tool_name(mt.name) not in existing_names
            )

@@ -1802,11 +1268,11 @@ class Agent(BaseAgent):
                    ),
                )
                start_time = time.time()
-                matches = agent_memory.recall(formatted_messages, limit=10)
+                matches = agent_memory.recall(formatted_messages, limit=20)
                memory_block = ""
                if matches:
                    memory_block = "Relevant memories:\n" + "\n".join(
-                        f"- {m.record.content}" for m in matches
+                        m.format() for m in matches
                    )
                if memory_block:
                    formatted_messages += "\n\n" + self.i18n.slice("memory").format(
@@ -1937,14 +1403,15 @@ class Agent(BaseAgent):
            if isinstance(messages, str):
                input_str = messages
            else:
-                input_str = "\n".join(
-                    str(msg.get("content", "")) for msg in messages if msg.get("content")
-                ) or "User request"
-            raw = (
-                f"Input: {input_str}\n"
-                f"Agent: {self.role}\n"
-                f"Result: {output_text}"
-            )
+                input_str = (
+                    "\n".join(
+                        str(msg.get("content", ""))
+                        for msg in messages
+                        if msg.get("content")
+                    )
+                    or "User request"
+                )
+            raw = f"Input: {input_str}\nAgent: {self.role}\nResult: {output_text}"
            extracted = agent_memory.extract_memories(raw)
            if extracted:
                agent_memory.remember_many(extracted)
--- a/lib/crewai/src/crewai/agents/agent_builder/base_agent.py
+++ b/lib/crewai/src/crewai/agents/agent_builder/base_agent.py
@@ -4,7 +4,8 @@ from abc import ABC, abstractmethod
 from collections.abc import Callable
 from copy import copy as shallow_copy
 from hashlib import md5
-from typing import Any, Literal
+import re
+from typing import Any, Final, Literal
 import uuid

 from pydantic import (
@@ -36,6 +37,11 @@ from crewai.utilities.rpm_controller import RPMController
 from crewai.utilities.string_utils import interpolate_only


+_SLUG_RE: Final[re.Pattern[str]] = re.compile(
+    r"^(?:crewai-amp:)?[a-zA-Z0-9][a-zA-Z0-9_-]*(?:#\w+)?$"
+)
+
+
 PlatformApp = Literal[
    "asana",
    "box",
@@ -197,7 +203,7 @@ class BaseAgent(BaseModel, ABC, metaclass=AgentMeta):
    )
    mcps: list[str | MCPServerConfig] | None = Field(
        default=None,
-        description="List of MCP server references. Supports 'https://server.com/path' for external servers and 'crewai-amp:mcp-name' for AMP marketplace. Use '#tool_name' suffix for specific tools.",
+        description="List of MCP server references. Supports 'https://server.com/path' for external servers and bare slugs like 'notion' for connected MCP integrations. Use '#tool_name' suffix for specific tools.",
    )
    memory: Any = Field(
        default=None,
@@ -276,14 +282,16 @@ class BaseAgent(BaseModel, ABC, metaclass=AgentMeta):
        validated_mcps: list[str | MCPServerConfig] = []
        for mcp in mcps:
            if isinstance(mcp, str):
-                if mcp.startswith(("https://", "crewai-amp:")):
+                if mcp.startswith("https://"):
+                    validated_mcps.append(mcp)
+                elif _SLUG_RE.match(mcp):
                    validated_mcps.append(mcp)
                else:
                    raise ValueError(
-                        f"Invalid MCP reference: {mcp}. "
-                        "String references must start with 'https://' or 'crewai-amp:'"
+                        f"Invalid MCP reference: {mcp!r}. "
+                        "String references must be an 'https://' URL or a valid "
+                        "slug (e.g. 'notion', 'notion#search', 'crewai-amp:notion')."
                    )
-
            elif isinstance(mcp, (MCPServerConfig)):
                validated_mcps.append(mcp)
            else:
--- a/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
+++ b/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
@@ -30,12 +30,9 @@ class CrewAgentExecutorMixin:
        memory = getattr(self.agent, "memory", None) or (
            getattr(self.crew, "_memory", None) if self.crew else None
        )
-        if memory is None or not self.task:
+        if memory is None or not self.task or memory.read_only:
            return
-        if (
-            f"Action: {sanitize_tool_name('Delegate work to coworker')}"
-            in output.text
-        ):
+        if f"Action: {sanitize_tool_name('Delegate work to coworker')}" in output.text:
            return
        try:
            raw = (
@@ -48,6 +45,4 @@ class CrewAgentExecutorMixin:
            if extracted:
                memory.remember_many(extracted, agent_role=self.agent.role)
        except Exception as e:
-            self.agent._logger.log(
-                "error", f"Failed to save to memory: {e}"
-            )
+            self.agent._logger.log("error", f"Failed to save to memory: {e}")
--- a/lib/crewai/src/crewai/agents/cache/init.py
+++ b/lib/crewai/src/crewai/agents/cache/init.py
@@ -1,5 +1,4 @@
 from crewai.agents.cache.cache_handler import CacheHandler


-
 __all__ = ["CacheHandler"]
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -6,7 +6,11 @@ and memory management.

 from __future__ import annotations

+import asyncio
 from collections.abc import Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import contextvars
+import inspect
 import logging
 from typing import TYPE_CHECKING, Any, Literal, cast

@@ -47,6 +51,7 @@ from crewai.utilities.agent_utils import (
    handle_unknown_error,
    has_reached_max_iterations,
    is_context_length_exceeded,
+    parse_tool_call_args,
    process_llm_response,
    track_delegation_if_needed,
 )
@@ -483,8 +488,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            # No tools available, fall back to simple LLM call
            return self._invoke_loop_native_no_tools()

-        openai_tools, available_functions = convert_tools_to_openai_schema(
-            self.original_tools
+        openai_tools, available_functions, self._tool_name_mapping = (
+            convert_tools_to_openai_schema(self.original_tools)
        )

        while True:
@@ -685,30 +690,141 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        Returns:
            AgentFinish if tool has result_as_answer=True, None otherwise.
        """
-        from datetime import datetime
-        import json
-
-        from crewai.events import crewai_event_bus
-        from crewai.events.types.tool_usage_events import (
-            ToolUsageErrorEvent,
-            ToolUsageFinishedEvent,
-            ToolUsageStartedEvent,
-        )
-
        if not tool_calls:
            return None

-        # Only process the FIRST tool call for sequential execution with reflection
-        tool_call = tool_calls[0]
+        parsed_calls = [
+            parsed
+            for tool_call in tool_calls
+            if (parsed := self._parse_native_tool_call(tool_call)) is not None
+        ]
+        if not parsed_calls:
+            return None

-        # Extract tool call info - handle OpenAI-style, Anthropic-style, and Gemini-style
+        original_tools_by_name: dict[str, Any] = dict(self._tool_name_mapping)
+
+        if len(parsed_calls) > 1:
+            has_result_as_answer_in_batch = any(
+                bool(
+                    original_tools_by_name.get(func_name)
+                    and getattr(
+                        original_tools_by_name.get(func_name), "result_as_answer", False
+                    )
+                )
+                for _, func_name, _ in parsed_calls
+            )
+            has_max_usage_count_in_batch = any(
+                bool(
+                    original_tools_by_name.get(func_name)
+                    and getattr(
+                        original_tools_by_name.get(func_name),
+                        "max_usage_count",
+                        None,
+                    )
+                    is not None
+                )
+                for _, func_name, _ in parsed_calls
+            )
+
+            # Preserve historical sequential behavior for result_as_answer batches.
+            # Also avoid threading around usage counters for max_usage_count tools.
+            if has_result_as_answer_in_batch or has_max_usage_count_in_batch:
+                logger.debug(
+                    "Skipping parallel native execution because batch includes result_as_answer or max_usage_count tool"
+                )
+            else:
+                execution_plan: list[
+                    tuple[str, str, str | dict[str, Any], Any | None]
+                ] = []
+                for call_id, func_name, func_args in parsed_calls:
+                    original_tool = original_tools_by_name.get(func_name)
+                    execution_plan.append(
+                        (call_id, func_name, func_args, original_tool)
+                    )
+
+                self._append_assistant_tool_calls_message(
+                    [
+                        (call_id, func_name, func_args)
+                        for call_id, func_name, func_args, _ in execution_plan
+                    ]
+                )
+
+                max_workers = min(8, len(execution_plan))
+                ordered_results: list[dict[str, Any] | None] = [None] * len(
+                    execution_plan
+                )
+                with ThreadPoolExecutor(max_workers=max_workers) as pool:
+                    futures = {
+                        pool.submit(
+                            contextvars.copy_context().run,
+                            self._execute_single_native_tool_call,
+                            call_id=call_id,
+                            func_name=func_name,
+                            func_args=func_args,
+                            available_functions=available_functions,
+                            original_tool=original_tool,
+                            should_execute=True,
+                        ): idx
+                        for idx, (
+                            call_id,
+                            func_name,
+                            func_args,
+                            original_tool,
+                        ) in enumerate(execution_plan)
+                    }
+                    for future in as_completed(futures):
+                        idx = futures[future]
+                        ordered_results[idx] = future.result()
+
+                for execution_result in ordered_results:
+                    if not execution_result:
+                        continue
+                    tool_finish = self._append_tool_result_and_check_finality(
+                        execution_result
+                    )
+                    if tool_finish:
+                        return tool_finish
+
+                reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+                reasoning_message: LLMMessage = {
+                    "role": "user",
+                    "content": reasoning_prompt,
+                }
+                self.messages.append(reasoning_message)
+                return None
+
+        # Sequential behavior: process only first tool call, then force reflection.
+        call_id, func_name, func_args = parsed_calls[0]
+        self._append_assistant_tool_calls_message([(call_id, func_name, func_args)])
+
+        execution_result = self._execute_single_native_tool_call(
+            call_id=call_id,
+            func_name=func_name,
+            func_args=func_args,
+            available_functions=available_functions,
+            original_tool=original_tools_by_name.get(func_name),
+            should_execute=True,
+        )
+        tool_finish = self._append_tool_result_and_check_finality(execution_result)
+        if tool_finish:
+            return tool_finish
+
+        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+        reasoning_message = {
+            "role": "user",
+            "content": reasoning_prompt,
+        }
+        self.messages.append(reasoning_message)
+        return None
+
+    def _parse_native_tool_call(
+        self, tool_call: Any
+    ) -> tuple[str, str, str | dict[str, Any]] | None:
        if hasattr(tool_call, "function"):
-            # OpenAI-style: has .function.name and .function.arguments
            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
            func_name = sanitize_tool_name(tool_call.function.name)
-            func_args = tool_call.function.arguments
-        elif hasattr(tool_call, "function_call") and tool_call.function_call:
-            # Gemini-style: has .function_call.name and .function_call.args
+            return call_id, func_name, tool_call.function.arguments
+        if hasattr(tool_call, "function_call") and tool_call.function_call:
            call_id = f"call_{id(tool_call)}"
            func_name = sanitize_tool_name(tool_call.function_call.name)
            func_args = (
@@ -716,13 +832,12 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                if tool_call.function_call.args
                else {}
            )
-        elif hasattr(tool_call, "name") and hasattr(tool_call, "input"):
-            # Anthropic format: has .name and .input (ToolUseBlock)
+            return call_id, func_name, func_args
+        if hasattr(tool_call, "name") and hasattr(tool_call, "input"):
            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
            func_name = sanitize_tool_name(tool_call.name)
-            func_args = tool_call.input  # Already a dict in Anthropic
-        elif isinstance(tool_call, dict):
-            # Support OpenAI "id", Bedrock "toolUseId", or generate one
+            return call_id, func_name, tool_call.input
+        if isinstance(tool_call, dict):
            call_id = (
                tool_call.get("id")
                or tool_call.get("toolUseId")
@@ -733,10 +848,15 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                func_info.get("name", "") or tool_call.get("name", "")
            )
            func_args = func_info.get("arguments", "{}") or tool_call.get("input", {})
-        else:
-            return None
+            return call_id, func_name, func_args
+        return None
+
+    def _append_assistant_tool_calls_message(
+        self,
+        parsed_calls: list[tuple[str, str, str | dict[str, Any]]],
+    ) -> None:
+        import json

-        # Append assistant message with single tool call
        assistant_message: LLMMessage = {
            "role": "assistant",
            "content": None,
@@ -751,42 +871,54 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        else json.dumps(func_args),
                    },
                }
+                for call_id, func_name, func_args in parsed_calls
            ],
        }
-
        self.messages.append(assistant_message)

-        # Parse arguments for the single tool call
-        if isinstance(func_args, str):
-            try:
-                args_dict = json.loads(func_args)
-            except json.JSONDecodeError:
-                args_dict = {}
-        else:
-            args_dict = func_args
+    def _execute_single_native_tool_call(
+        self,
+        *,
+        call_id: str,
+        func_name: str,
+        func_args: str | dict[str, Any],
+        available_functions: dict[str, Callable[..., Any]],
+        original_tool: Any | None = None,
+        should_execute: bool = True,
+    ) -> dict[str, Any]:
+        from datetime import datetime
+        import json

-        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+        from crewai.events.types.tool_usage_events import (
+            ToolUsageErrorEvent,
+            ToolUsageFinishedEvent,
+            ToolUsageStartedEvent,
+        )

-        # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
+        args_dict, parse_error = parse_tool_call_args(func_args, func_name, call_id, original_tool)
+        if parse_error is not None:
+            return parse_error

-        original_tool = None
-        for tool in self.original_tools or []:
-            if sanitize_tool_name(tool.name) == func_name:
-                original_tool = tool
-                break
+        if original_tool is None:
+            for tool in self.original_tools or []:
+                if sanitize_tool_name(tool.name) == func_name:
+                    original_tool = tool
+                    break

-        # Check if tool has reached max usage count
        max_usage_reached = False
-        if original_tool:
-            if (
-                hasattr(original_tool, "max_usage_count")
-                and original_tool.max_usage_count is not None
-                and original_tool.current_usage_count >= original_tool.max_usage_count
-            ):
-                max_usage_reached = True
+        if not should_execute and original_tool:
+            max_usage_reached = True
+        elif (
+            should_execute
+            and original_tool
+            and (max_count := getattr(original_tool, "max_usage_count", None))
+            is not None
+            and getattr(original_tool, "current_usage_count", 0) >= max_count
+        ):
+            max_usage_reached = True

-        # Check cache before executing
        from_cache = False
+        result: str = "Tool not found"
        input_str = json.dumps(args_dict) if args_dict else ""
        if self.tools_handler and self.tools_handler.cache:
            cached_result = self.tools_handler.cache.read(
@@ -800,7 +932,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                )
                from_cache = True

-        # Emit tool usage started event
+        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
        started_at = datetime.now()
        crewai_event_bus.emit(
            self,
@@ -816,14 +948,18 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

        track_delegation_if_needed(func_name, args_dict, self.task)

-        # Find the structured tool for hook context
        structured_tool: CrewStructuredTool | None = None
-        for structured in self.tools or []:
-            if sanitize_tool_name(structured.name) == func_name:
-                structured_tool = structured
-                break
+        if original_tool is not None:
+            for structured in self.tools or []:
+                if getattr(structured, "_original_tool", None) is original_tool:
+                    structured_tool = structured
+                    break
+        if structured_tool is None:
+            for structured in self.tools or []:
+                if sanitize_tool_name(structured.name) == func_name:
+                    structured_tool = structured
+                    break

-        # Execute before_tool_call hooks
        hook_blocked = False
        before_hook_context = ToolCallHookContext(
            tool_name=func_name,
@@ -847,58 +983,48 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    color="red",
                )

-        # If hook blocked execution, set result and skip tool execution
        if hook_blocked:
            result = f"Tool execution blocked by hook. Tool: {func_name}"
-        # Execute the tool (only if not cached, not at max usage, and not blocked by hook)
-        elif not from_cache and not max_usage_reached:
-            result = "Tool not found"
-            if func_name in available_functions:
-                try:
-                    tool_func = available_functions[func_name]
-                    raw_result = tool_func(**args_dict)
-
-                    # Add to cache after successful execution (before string conversion)
-                    if self.tools_handler and self.tools_handler.cache:
-                        should_cache = True
-                        if (
-                            original_tool
-                            and hasattr(original_tool, "cache_function")
-                            and callable(original_tool.cache_function)
-                        ):
-                            should_cache = original_tool.cache_function(
-                                args_dict, raw_result
-                            )
-                        if should_cache:
-                            self.tools_handler.cache.add(
-                                tool=func_name, input=input_str, output=raw_result
-                            )
-
-                    # Convert to string for message
-                    result = (
-                        str(raw_result)
-                        if not isinstance(raw_result, str)
-                        else raw_result
-                    )
-                except Exception as e:
-                    result = f"Error executing tool: {e}"
-                    if self.task:
-                        self.task.increment_tools_errors()
-                    crewai_event_bus.emit(
-                        self,
-                        event=ToolUsageErrorEvent(
-                            tool_name=func_name,
-                            tool_args=args_dict,
-                            from_agent=self.agent,
-                            from_task=self.task,
-                            agent_key=agent_key,
-                            error=e,
-                        ),
-                    )
-                    error_event_emitted = True
        elif max_usage_reached and original_tool:
-            # Return error message when max usage limit is reached
            result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
+        elif not from_cache and func_name in available_functions:
+            try:
+                raw_result = available_functions[func_name](**args_dict)
+
+                if self.tools_handler and self.tools_handler.cache:
+                    should_cache = True
+                    if (
+                        original_tool
+                        and hasattr(original_tool, "cache_function")
+                        and callable(original_tool.cache_function)
+                    ):
+                        should_cache = original_tool.cache_function(
+                            args_dict, raw_result
+                        )
+                    if should_cache:
+                        self.tools_handler.cache.add(
+                            tool=func_name, input=input_str, output=raw_result
+                        )
+
+                result = (
+                    str(raw_result) if not isinstance(raw_result, str) else raw_result
+                )
+            except Exception as e:
+                result = f"Error executing tool: {e}"
+                if self.task:
+                    self.task.increment_tools_errors()
+                crewai_event_bus.emit(
+                    self,
+                    event=ToolUsageErrorEvent(
+                        tool_name=func_name,
+                        tool_args=args_dict,
+                        from_agent=self.agent,
+                        from_task=self.task,
+                        agent_key=agent_key,
+                        error=e,
+                    ),
+                )
+                error_event_emitted = True

        after_hook_context = ToolCallHookContext(
            tool_name=func_name,
@@ -938,7 +1064,23 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                ),
            )

-        # Append tool result message
+        return {
+            "call_id": call_id,
+            "func_name": func_name,
+            "result": result,
+            "from_cache": from_cache,
+            "original_tool": original_tool,
+        }
+
+    def _append_tool_result_and_check_finality(
+        self, execution_result: dict[str, Any]
+    ) -> AgentFinish | None:
+        call_id = cast(str, execution_result["call_id"])
+        func_name = cast(str, execution_result["func_name"])
+        result = cast(str, execution_result["result"])
+        from_cache = cast(bool, execution_result["from_cache"])
+        original_tool = execution_result["original_tool"]
+
        tool_message: LLMMessage = {
            "role": "tool",
            "tool_call_id": call_id,
@@ -947,7 +1089,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        }
        self.messages.append(tool_message)

-        # Log the tool execution
        if self.agent and self.agent.verbose:
            cache_info = " (from cache)" if from_cache else ""
            self._printer.print(
@@ -960,20 +1101,11 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            and hasattr(original_tool, "result_as_answer")
            and original_tool.result_as_answer
        ):
-            # Return immediately with tool result as final answer
            return AgentFinish(
                thought="Tool result is the final answer",
                output=result,
                text=result,
            )
-
-        # Inject post-tool reasoning prompt to enforce analysis
-        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
-        reasoning_message: LLMMessage = {
-            "role": "user",
-            "content": reasoning_prompt,
-        }
-        self.messages.append(reasoning_message)
        return None

    async def ainvoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
@@ -1133,7 +1265,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        formatted_answer, tool_result
                    )

-                self._invoke_step_callback(formatted_answer)  # type: ignore[arg-type]
+                await self._ainvoke_step_callback(formatted_answer)  # type: ignore[arg-type]
                self._append_message(formatted_answer.text)  # type: ignore[union-attr]

            except OutputParserError as e:
@@ -1186,8 +1318,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        if not self.original_tools:
            return await self._ainvoke_loop_native_no_tools()

-        openai_tools, available_functions = convert_tools_to_openai_schema(
-            self.original_tools
+        openai_tools, available_functions, self._tool_name_mapping = (
+            convert_tools_to_openai_schema(self.original_tools)
        )

        while True:
@@ -1248,7 +1380,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        output=answer,
                        text=answer,
                    )
-                    self._invoke_step_callback(formatted_answer)
+                    await self._ainvoke_step_callback(formatted_answer)
                    self._append_message(answer)  # Save final answer to messages
                    self._show_logs(formatted_answer)
                    return formatted_answer
@@ -1260,7 +1392,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        output=answer,
                        text=output_json,
                    )
-                    self._invoke_step_callback(formatted_answer)
+                    await self._ainvoke_step_callback(formatted_answer)
                    self._append_message(output_json)
                    self._show_logs(formatted_answer)
                    return formatted_answer
@@ -1271,7 +1403,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    output=str(answer),
                    text=str(answer),
                )
-                self._invoke_step_callback(formatted_answer)
+                await self._ainvoke_step_callback(formatted_answer)
                self._append_message(str(answer))  # Save final answer to messages
                self._show_logs(formatted_answer)
                return formatted_answer
@@ -1365,13 +1497,28 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
    def _invoke_step_callback(
        self, formatted_answer: AgentAction | AgentFinish
    ) -> None:
-        """Invoke step callback.
+        """Invoke step callback (sync context).

        Args:
            formatted_answer: Current agent response.
        """
        if self.step_callback:
-            self.step_callback(formatted_answer)
+            cb_result = self.step_callback(formatted_answer)
+            if inspect.iscoroutine(cb_result):
+                asyncio.run(cb_result)
+
+    async def _ainvoke_step_callback(
+        self, formatted_answer: AgentAction | AgentFinish
+    ) -> None:
+        """Invoke step callback (async context).
+
+        Args:
+            formatted_answer: Current agent response.
+        """
+        if self.step_callback:
+            cb_result = self.step_callback(formatted_answer)
+            if inspect.iscoroutine(cb_result):
+                await cb_result

    def _append_message(
        self, text: str, role: Literal["user", "assistant", "system"] = "assistant"
--- a/lib/crewai/src/crewai/cli/authentication/init.py
+++ b/lib/crewai/src/crewai/cli/authentication/init.py
@@ -1,5 +1,4 @@
 from crewai.cli.authentication.main import AuthenticationCommand


-
 __all__ = ["AuthenticationCommand"]
--- a/lib/crewai/src/crewai/cli/authentication/main.py
+++ b/lib/crewai/src/crewai/cli/authentication/main.py
@@ -2,8 +2,8 @@ import time
 from typing import TYPE_CHECKING, Any, TypeVar, cast
 import webbrowser

+import httpx
 from pydantic import BaseModel, Field
-import requests
 from rich.console import Console

 from crewai.cli.authentication.utils import validate_jwt_token
@@ -98,7 +98,7 @@ class AuthenticationCommand:
            "scope": " ".join(self.oauth2_provider.get_oauth_scopes()),
            "audience": self.oauth2_provider.get_audience(),
        }
-        response = requests.post(
+        response = httpx.post(
            url=self.oauth2_provider.get_authorize_url(),
            data=device_code_payload,
            timeout=20,
@@ -130,7 +130,7 @@ class AuthenticationCommand:

        attempts = 0
        while True and attempts < 10:
-            response = requests.post(
+            response = httpx.post(
                self.oauth2_provider.get_token_url(), data=token_payload, timeout=30
            )
            token_data = response.json()
@@ -149,7 +149,7 @@ class AuthenticationCommand:
                return

            if token_data["error"] not in ("authorization_pending", "slow_down"):
-                raise requests.HTTPError(
+                raise httpx.HTTPError(
                    token_data.get("error_description") or token_data.get("error")
                )

--- a/lib/crewai/src/crewai/cli/command.py
+++ b/lib/crewai/src/crewai/cli/command.py
@@ -1,5 +1,6 @@
-import requests
-from requests.exceptions import JSONDecodeError
+import json
+
+import httpx
 from rich.console import Console

 from crewai.cli.authentication.token import get_auth_token
@@ -30,16 +31,16 @@ class PlusAPIMixin:
            console.print("Run 'crewai login' to sign up/login.", style="bold green")
            raise SystemExit from None

-    def _validate_response(self, response: requests.Response) -> None:
+    def _validate_response(self, response: httpx.Response) -> None:
        """
        Handle and display error messages from API responses.

        Args:
-            response (requests.Response): The response from the Plus API
+            response (httpx.Response): The response from the Plus API
        """
        try:
            json_response = response.json()
-        except (JSONDecodeError, ValueError):
+        except (json.JSONDecodeError, ValueError):
            console.print(
                "Failed to parse response from Enterprise API failed. Details:",
                style="bold red",
@@ -62,7 +63,7 @@ class PlusAPIMixin:
                    )
            raise SystemExit

-        if not response.ok:
+        if not response.is_success:
            console.print(
                "Request to Enterprise API failed. Details:", style="bold red"
            )
--- a/lib/crewai/src/crewai/cli/constants.py
+++ b/lib/crewai/src/crewai/cli/constants.py
@@ -69,7 +69,7 @@ ENV_VARS: dict[str, list[dict[str, Any]]] = {
        },
        {
            "prompt": "Enter your AWS Region Name (press Enter to skip)",
-            "key_name": "AWS_REGION_NAME",
+            "key_name": "AWS_DEFAULT_REGION",
        },
    ],
    "azure": [
--- a/lib/crewai/src/crewai/cli/create_crew.py
+++ b/lib/crewai/src/crewai/cli/create_crew.py
@@ -143,7 +143,7 @@ def create_folder_structure(
        (folder_path / "src" / folder_name).mkdir(parents=True)
        (folder_path / "src" / folder_name / "tools").mkdir(parents=True)
        (folder_path / "src" / folder_name / "config").mkdir(parents=True)
-        
+
        # Copy AGENTS.md to project root (top-level projects only)
        package_dir = Path(__file__).parent
        agents_md_src = package_dir / "templates" / "AGENTS.md"
--- a/lib/crewai/src/crewai/cli/create_flow.py
+++ b/lib/crewai/src/crewai/cli/create_flow.py
@@ -1,5 +1,5 @@
-import shutil
 from pathlib import Path
+import shutil

 import click

--- a/lib/crewai/src/crewai/cli/enterprise/main.py
+++ b/lib/crewai/src/crewai/cli/enterprise/main.py
@@ -1,7 +1,7 @@
+import json
 from typing import Any, cast

-import requests
-from requests.exceptions import JSONDecodeError, RequestException
+import httpx
 from rich.console import Console

 from crewai.cli.authentication.main import Oauth2Settings, ProviderFactory
@@ -47,12 +47,12 @@ class EnterpriseConfigureCommand(BaseCommand):
                "User-Agent": f"CrewAI-CLI/{get_crewai_version()}",
                "X-Crewai-Version": get_crewai_version(),
            }
-            response = requests.get(oauth_endpoint, timeout=30, headers=headers)
+            response = httpx.get(oauth_endpoint, timeout=30, headers=headers)
            response.raise_for_status()

            try:
                oauth_config = response.json()
-            except JSONDecodeError as e:
+            except json.JSONDecodeError as e:
                raise ValueError(f"Invalid JSON response from {oauth_endpoint}") from e

            self._validate_oauth_config(oauth_config)
@@ -62,7 +62,7 @@ class EnterpriseConfigureCommand(BaseCommand):
            )
            return cast(dict[str, Any], oauth_config)

-        except RequestException as e:
+        except httpx.HTTPError as e:
            raise ValueError(f"Failed to connect to enterprise URL: {e!s}") from e
        except Exception as e:
            raise ValueError(f"Error fetching OAuth2 configuration: {e!s}") from e
--- a/lib/crewai/src/crewai/cli/memory_tui.py
+++ b/lib/crewai/src/crewai/cli/memory_tui.py
@@ -290,13 +290,20 @@ class MemoryTUI(App[None]):
        if self._memory is None:
            panel.update(self._init_error or "No memory loaded.")
            return
+        display_limit = 1000
        info = self._memory.info(path)
        self._last_scope_info = info
-        self._entries = self._memory.list_records(scope=path, limit=200)
+        self._entries = self._memory.list_records(scope=path, limit=display_limit)
        panel.update(_format_scope_info(info))
        panel.border_title = "Detail"
        entry_list = self.query_one("#entry-list", OptionList)
-        entry_list.border_title = f"Entries ({len(self._entries)})"
+        capped = info.record_count > display_limit
+        count_label = (
+            f"Entries (showing {display_limit} of {info.record_count} — display limit)"
+            if capped
+            else f"Entries ({len(self._entries)})"
+        )
+        entry_list.border_title = count_label
        self._populate_entry_list()

    def on_option_list_option_highlighted(
@@ -376,6 +383,11 @@ class MemoryTUI(App[None]):
                return

            info_lines: list[str] = []
+            info_lines.append(
+                "[dim italic]Searched the full dataset"
+                + (f" within [bold]{scope}[/]" if scope else "")
+                + " using the recall flow (semantic + recency + importance).[/]\n"
+            )
            if not self._custom_embedder:
                info_lines.append(
                    "[dim italic]Note: Using default OpenAI embedder. "
--- a/lib/crewai/src/crewai/cli/organization/main.py
+++ b/lib/crewai/src/crewai/cli/organization/main.py
@@ -1,4 +1,4 @@
-from requests import HTTPError
+from httpx import HTTPStatusError
 from rich.console import Console
 from rich.table import Table

@@ -10,11 +10,11 @@ console = Console()


 class OrganizationCommand(BaseCommand, PlusAPIMixin):
-    def __init__(self):
+    def __init__(self) -> None:
        BaseCommand.__init__(self)
        PlusAPIMixin.__init__(self, telemetry=self._telemetry)

-    def list(self):
+    def list(self) -> None:
        try:
            response = self.plus_api_client.get_organizations()
            response.raise_for_status()
@@ -33,7 +33,7 @@ class OrganizationCommand(BaseCommand, PlusAPIMixin):
                table.add_row(org["name"], org["uuid"])

            console.print(table)
-        except HTTPError as e:
+        except HTTPStatusError as e:
            if e.response.status_code == 401:
                console.print(
                    "You are not logged in to any organization. Use 'crewai login' to login.",
@@ -50,7 +50,7 @@ class OrganizationCommand(BaseCommand, PlusAPIMixin):
            )
            raise SystemExit(1) from e

-    def switch(self, org_id):
+    def switch(self, org_id: str) -> None:
        try:
            response = self.plus_api_client.get_organizations()
            response.raise_for_status()
@@ -72,7 +72,7 @@ class OrganizationCommand(BaseCommand, PlusAPIMixin):
                f"Successfully switched to {org['name']} ({org['uuid']})",
                style="bold green",
            )
-        except HTTPError as e:
+        except HTTPStatusError as e:
            if e.response.status_code == 401:
                console.print(
                    "You are not logged in to any organization. Use 'crewai login' to login.",
@@ -87,7 +87,7 @@ class OrganizationCommand(BaseCommand, PlusAPIMixin):
            console.print(f"Failed to switch organization: {e!s}", style="bold red")
            raise SystemExit(1) from e

-    def current(self):
+    def current(self) -> None:
        settings = Settings()
        if settings.org_uuid:
            console.print(
--- a/lib/crewai/src/crewai/cli/plus_api.py
+++ b/lib/crewai/src/crewai/cli/plus_api.py
@@ -3,7 +3,6 @@ from typing import Any
 from urllib.parse import urljoin

 import httpx
-import requests

 from crewai.cli.config import Settings
 from crewai.cli.constants import DEFAULT_CREWAI_ENTERPRISE_URL
@@ -23,14 +22,15 @@ class PlusAPI:
    EPHEMERAL_TRACING_RESOURCE = "/crewai_plus/api/v1/tracing/ephemeral"
    INTEGRATIONS_RESOURCE = "/crewai_plus/api/v1/integrations"

-    def __init__(self, api_key: str) -> None:
+    def __init__(self, api_key: str | None = None) -> None:
        self.api_key = api_key
        self.headers = {
-            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
            "User-Agent": f"CrewAI-CLI/{get_crewai_version()}",
            "X-Crewai-Version": get_crewai_version(),
        }
+        if api_key:
+            self.headers["Authorization"] = f"Bearer {api_key}"
        settings = Settings()
        if settings.org_uuid:
            self.headers["X-Crewai-Organization-Id"] = settings.org_uuid
@@ -43,16 +43,21 @@ class PlusAPI:

    def _make_request(
        self, method: str, endpoint: str, **kwargs: Any
-    ) -> requests.Response:
+    ) -> httpx.Response:
        url = urljoin(self.base_url, endpoint)
-        session = requests.Session()
-        session.trust_env = False
-        return session.request(method, url, headers=self.headers, **kwargs)
+        verify = kwargs.pop("verify", True)
+        with httpx.Client(trust_env=False, verify=verify) as client:
+            return client.request(method, url, headers=self.headers, **kwargs)

-    def login_to_tool_repository(self) -> requests.Response:
-        return self._make_request("POST", f"{self.TOOLS_RESOURCE}/login")
+    def login_to_tool_repository(
+        self, user_identifier: str | None = None
+    ) -> httpx.Response:
+        payload = {}
+        if user_identifier:
+            payload["user_identifier"] = user_identifier
+        return self._make_request("POST", f"{self.TOOLS_RESOURCE}/login", json=payload)

-    def get_tool(self, handle: str) -> requests.Response:
+    def get_tool(self, handle: str) -> httpx.Response:
        return self._make_request("GET", f"{self.TOOLS_RESOURCE}/{handle}")

    async def get_agent(self, handle: str) -> httpx.Response:
@@ -68,7 +73,7 @@ class PlusAPI:
        description: str | None,
        encoded_file: str,
        available_exports: list[dict[str, Any]] | None = None,
-    ) -> requests.Response:
+    ) -> httpx.Response:
        params = {
            "handle": handle,
            "public": is_public,
@@ -79,54 +84,52 @@ class PlusAPI:
        }
        return self._make_request("POST", f"{self.TOOLS_RESOURCE}", json=params)

-    def deploy_by_name(self, project_name: str) -> requests.Response:
+    def deploy_by_name(self, project_name: str) -> httpx.Response:
        return self._make_request(
            "POST", f"{self.CREWS_RESOURCE}/by-name/{project_name}/deploy"
        )

-    def deploy_by_uuid(self, uuid: str) -> requests.Response:
+    def deploy_by_uuid(self, uuid: str) -> httpx.Response:
        return self._make_request("POST", f"{self.CREWS_RESOURCE}/{uuid}/deploy")

-    def crew_status_by_name(self, project_name: str) -> requests.Response:
+    def crew_status_by_name(self, project_name: str) -> httpx.Response:
        return self._make_request(
            "GET", f"{self.CREWS_RESOURCE}/by-name/{project_name}/status"
        )

-    def crew_status_by_uuid(self, uuid: str) -> requests.Response:
+    def crew_status_by_uuid(self, uuid: str) -> httpx.Response:
        return self._make_request("GET", f"{self.CREWS_RESOURCE}/{uuid}/status")

    def crew_by_name(
        self, project_name: str, log_type: str = "deployment"
-    ) -> requests.Response:
+    ) -> httpx.Response:
        return self._make_request(
            "GET", f"{self.CREWS_RESOURCE}/by-name/{project_name}/logs/{log_type}"
        )

-    def crew_by_uuid(
-        self, uuid: str, log_type: str = "deployment"
-    ) -> requests.Response:
+    def crew_by_uuid(self, uuid: str, log_type: str = "deployment") -> httpx.Response:
        return self._make_request(
            "GET", f"{self.CREWS_RESOURCE}/{uuid}/logs/{log_type}"
        )

-    def delete_crew_by_name(self, project_name: str) -> requests.Response:
+    def delete_crew_by_name(self, project_name: str) -> httpx.Response:
        return self._make_request(
            "DELETE", f"{self.CREWS_RESOURCE}/by-name/{project_name}"
        )

-    def delete_crew_by_uuid(self, uuid: str) -> requests.Response:
+    def delete_crew_by_uuid(self, uuid: str) -> httpx.Response:
        return self._make_request("DELETE", f"{self.CREWS_RESOURCE}/{uuid}")

-    def list_crews(self) -> requests.Response:
+    def list_crews(self) -> httpx.Response:
        return self._make_request("GET", self.CREWS_RESOURCE)

-    def create_crew(self, payload: dict[str, Any]) -> requests.Response:
+    def create_crew(self, payload: dict[str, Any]) -> httpx.Response:
        return self._make_request("POST", self.CREWS_RESOURCE, json=payload)

-    def get_organizations(self) -> requests.Response:
+    def get_organizations(self) -> httpx.Response:
        return self._make_request("GET", self.ORGANIZATIONS_RESOURCE)

-    def initialize_trace_batch(self, payload: dict[str, Any]) -> requests.Response:
+    def initialize_trace_batch(self, payload: dict[str, Any]) -> httpx.Response:
        return self._make_request(
            "POST",
            f"{self.TRACING_RESOURCE}/batches",
@@ -136,7 +139,7 @@ class PlusAPI:

    def initialize_ephemeral_trace_batch(
        self, payload: dict[str, Any]
-    ) -> requests.Response:
+    ) -> httpx.Response:
        return self._make_request(
            "POST",
            f"{self.EPHEMERAL_TRACING_RESOURCE}/batches",
@@ -145,7 +148,7 @@ class PlusAPI:

    def send_trace_events(
        self, trace_batch_id: str, payload: dict[str, Any]
-    ) -> requests.Response:
+    ) -> httpx.Response:
        return self._make_request(
            "POST",
            f"{self.TRACING_RESOURCE}/batches/{trace_batch_id}/events",
@@ -155,7 +158,7 @@ class PlusAPI:

    def send_ephemeral_trace_events(
        self, trace_batch_id: str, payload: dict[str, Any]
-    ) -> requests.Response:
+    ) -> httpx.Response:
        return self._make_request(
            "POST",
            f"{self.EPHEMERAL_TRACING_RESOURCE}/batches/{trace_batch_id}/events",
@@ -165,7 +168,7 @@ class PlusAPI:

    def finalize_trace_batch(
        self, trace_batch_id: str, payload: dict[str, Any]
-    ) -> requests.Response:
+    ) -> httpx.Response:
        return self._make_request(
            "PATCH",
            f"{self.TRACING_RESOURCE}/batches/{trace_batch_id}/finalize",
@@ -175,7 +178,7 @@ class PlusAPI:

    def finalize_ephemeral_trace_batch(
        self, trace_batch_id: str, payload: dict[str, Any]
-    ) -> requests.Response:
+    ) -> httpx.Response:
        return self._make_request(
            "PATCH",
            f"{self.EPHEMERAL_TRACING_RESOURCE}/batches/{trace_batch_id}/finalize",
@@ -185,7 +188,7 @@ class PlusAPI:

    def mark_trace_batch_as_failed(
        self, trace_batch_id: str, error_message: str
-    ) -> requests.Response:
+    ) -> httpx.Response:
        return self._make_request(
            "PATCH",
            f"{self.TRACING_RESOURCE}/batches/{trace_batch_id}",
@@ -193,13 +196,20 @@ class PlusAPI:
            timeout=30,
        )

-    def get_triggers(self) -> requests.Response:
+    def get_mcp_configs(self, slugs: list[str]) -> httpx.Response:
+        """Get MCP server configurations for the given slugs."""
+        return self._make_request(
+            "GET",
+            f"{self.INTEGRATIONS_RESOURCE}/mcp_configs",
+            params={"slugs": ",".join(slugs)},
+            timeout=30,
+        )
+
+    def get_triggers(self) -> httpx.Response:
        """Get all available triggers from integrations."""
        return self._make_request("GET", f"{self.INTEGRATIONS_RESOURCE}/apps")

-    def get_trigger_payload(
-        self, app_slug: str, trigger_slug: str
-    ) -> requests.Response:
+    def get_trigger_payload(self, app_slug: str, trigger_slug: str) -> httpx.Response:
        """Get sample payload for a specific trigger."""
        return self._make_request(
            "GET", f"{self.INTEGRATIONS_RESOURCE}/{app_slug}/{trigger_slug}/payload"
--- a/lib/crewai/src/crewai/cli/provider.py
+++ b/lib/crewai/src/crewai/cli/provider.py
@@ -8,7 +8,7 @@ from typing import Any

 import certifi
 import click
-import requests
+import httpx

 from crewai.cli.constants import JSON_URL, MODELS, PROVIDERS

@@ -165,20 +165,20 @@ def fetch_provider_data(cache_file: Path) -> dict[str, Any] | None:
    ssl_config = os.environ["SSL_CERT_FILE"] = certifi.where()

    try:
-        response = requests.get(JSON_URL, stream=True, timeout=60, verify=ssl_config)
-        response.raise_for_status()
-        data = download_data(response)
-        with open(cache_file, "w") as f:
-            json.dump(data, f)
-        return data
-    except requests.RequestException as e:
+        with httpx.stream("GET", JSON_URL, timeout=60, verify=ssl_config) as response:
+            response.raise_for_status()
+            data = download_data(response)
+            with open(cache_file, "w") as f:
+                json.dump(data, f)
+            return data
+    except httpx.HTTPError as e:
        click.secho(f"Error fetching provider data: {e}", fg="red")
    except json.JSONDecodeError:
        click.secho("Error parsing provider data. Invalid JSON format.", fg="red")
    return None


-def download_data(response: requests.Response) -> dict[str, Any]:
+def download_data(response: httpx.Response) -> dict[str, Any]:
    """Downloads data from a given HTTP response and returns the JSON content.

    Args:
@@ -194,7 +194,7 @@ def download_data(response: requests.Response) -> dict[str, Any]:
    with click.progressbar(
        length=total_size, label="Downloading", show_pos=True
    ) as bar:
-        for chunk in response.iter_content(block_size):
+        for chunk in response.iter_bytes(block_size):
            if chunk:
                data_chunks.append(chunk)
                bar.update(len(chunk))
--- a/lib/crewai/src/crewai/cli/templates/crew/pyproject.toml
+++ b/lib/crewai/src/crewai/cli/templates/crew/pyproject.toml
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
 authors = [{ name = "Your Name", email = "you@example.com" }]
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]==1.9.3"
+    "crewai[tools]==1.10.1"
 ]

 [project.scripts]
--- a/lib/crewai/src/crewai/cli/templates/flow/pyproject.toml
+++ b/lib/crewai/src/crewai/cli/templates/flow/pyproject.toml
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
 authors = [{ name = "Your Name", email = "you@example.com" }]
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]==1.9.3"
+    "crewai[tools]==1.10.1"
 ]

 [project.scripts]
--- a/lib/crewai/src/crewai/cli/templates/tool/pyproject.toml
+++ b/lib/crewai/src/crewai/cli/templates/tool/pyproject.toml
@@ -5,7 +5,7 @@ description = "Power up your crews with {{folder_name}}"
 readme = "README.md"
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]>=0.203.1"
+    "crewai[tools]==1.10.1"
 ]

 [tool.crewai]
--- a/lib/crewai/src/crewai/cli/tools/main.py
+++ b/lib/crewai/src/crewai/cli/tools/main.py
@@ -23,6 +23,7 @@ from crewai.cli.utils import (
    tree_copy,
    tree_find_and_replace,
 )
+from crewai.events.listeners.tracing.utils import get_user_id


 console = Console()
@@ -169,7 +170,9 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
        console.print(f"Successfully installed {handle}", style="bold green")

    def login(self) -> None:
-        login_response = self.plus_api_client.login_to_tool_repository()
+        login_response = self.plus_api_client.login_to_tool_repository(
+            user_identifier=get_user_id()
+        )

        if login_response.status_code != 200:
            console.print(
--- a/lib/crewai/src/crewai/crews/init.py
+++ b/lib/crewai/src/crewai/crews/init.py
@@ -1,5 +1,4 @@
 from crewai.crews.crew_output import CrewOutput


-
 __all__ = ["CrewOutput"]
--- a/lib/crewai/src/crewai/events/init.py
+++ b/lib/crewai/src/crewai/events/init.py
@@ -63,6 +63,7 @@ from crewai.events.types.logging_events import (
    AgentLogsStartedEvent,
 )
 from crewai.events.types.mcp_events import (
+    MCPConfigFetchFailedEvent,
    MCPConnectionCompletedEvent,
    MCPConnectionFailedEvent,
    MCPConnectionStartedEvent,
@@ -165,6 +166,7 @@ __all__ = [
    "LiteAgentExecutionCompletedEvent",
    "LiteAgentExecutionErrorEvent",
    "LiteAgentExecutionStartedEvent",
+    "MCPConfigFetchFailedEvent",
    "MCPConnectionCompletedEvent",
    "MCPConnectionFailedEvent",
    "MCPConnectionStartedEvent",
--- a/lib/crewai/src/crewai/events/base_event_listener.py
+++ b/lib/crewai/src/crewai/events/base_event_listener.py
@@ -23,4 +23,3 @@ class BaseEventListener(ABC):
        Args:
            crewai_event_bus: The event bus to register listeners on.
        """
-        pass
--- a/lib/crewai/src/crewai/events/event_listener.py
+++ b/lib/crewai/src/crewai/events/event_listener.py
@@ -68,6 +68,7 @@ from crewai.events.types.logging_events import (
    AgentLogsStartedEvent,
 )
 from crewai.events.types.mcp_events import (
+    MCPConfigFetchFailedEvent,
    MCPConnectionCompletedEvent,
    MCPConnectionFailedEvent,
    MCPConnectionStartedEvent,
@@ -665,6 +666,16 @@ class EventListener(BaseEventListener):
                event.error_type,
            )

+        @crewai_event_bus.on(MCPConfigFetchFailedEvent)
+        def on_mcp_config_fetch_failed(
+            _: Any, event: MCPConfigFetchFailedEvent
+        ) -> None:
+            self.formatter.handle_mcp_config_fetch_failed(
+                event.slug,
+                event.error,
+                event.error_type,
+            )
+
        @crewai_event_bus.on(MCPToolExecutionStartedEvent)
        def on_mcp_tool_execution_started(
            _: Any, event: MCPToolExecutionStartedEvent
--- a/lib/crewai/src/crewai/events/event_types.py
+++ b/lib/crewai/src/crewai/events/event_types.py
@@ -67,6 +67,7 @@ from crewai.events.types.llm_guardrail_events import (
    LLMGuardrailStartedEvent,
 )
 from crewai.events.types.mcp_events import (
+    MCPConfigFetchFailedEvent,
    MCPConnectionCompletedEvent,
    MCPConnectionFailedEvent,
    MCPConnectionStartedEvent,
@@ -181,4 +182,5 @@ EventTypes = (
    | MCPToolExecutionStartedEvent
    | MCPToolExecutionCompletedEvent
    | MCPToolExecutionFailedEvent
+    | MCPConfigFetchFailedEvent
 )
--- a/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py
+++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py
@@ -15,6 +15,7 @@ from crewai.cli.plus_api import PlusAPI
 from crewai.cli.version import get_crewai_version
 from crewai.events.listeners.tracing.types import TraceEvent
 from crewai.events.listeners.tracing.utils import (
+    get_user_id,
    is_tracing_enabled_in_context,
    should_auto_collect_first_time_traces,
 )
@@ -67,7 +68,7 @@ class TraceBatchManager:
                api_key=get_auth_token(),
            )
        except AuthError:
-            self.plus_api = PlusAPI(api_key="")
+            self.plus_api = PlusAPI()
        self.ephemeral_trace_url = None

    def initialize_batch(
@@ -120,7 +121,6 @@ class TraceBatchManager:
            payload = {
                "trace_id": self.current_batch.batch_id,
                "execution_type": execution_metadata.get("execution_type", "crew"),
-                "user_identifier": execution_metadata.get("user_context", None),
                "execution_context": {
                    "crew_fingerprint": execution_metadata.get("crew_fingerprint"),
                    "crew_name": execution_metadata.get("crew_name", None),
@@ -140,6 +140,7 @@ class TraceBatchManager:
            }
            if use_ephemeral:
                payload["ephemeral_trace_id"] = self.current_batch.batch_id
+                payload["user_identifier"] = get_user_id()

            response = (
                self.plus_api.initialize_ephemeral_trace_batch(payload)
--- a/lib/crewai/src/crewai/events/types/llm_events.py
+++ b/lib/crewai/src/crewai/events/types/llm_events.py
@@ -86,3 +86,11 @@ class LLMStreamChunkEvent(LLMEventBase):
    tool_call: ToolCall | None = None
    call_type: LLMCallType | None = None
    response_id: str | None = None
+
+
+class LLMThinkingChunkEvent(LLMEventBase):
+    """Event emitted when a thinking/reasoning chunk is received from a thinking model"""
+
+    type: str = "llm_thinking_chunk"
+    chunk: str
+    response_id: str | None = None
--- a/lib/crewai/src/crewai/events/types/mcp_events.py
+++ b/lib/crewai/src/crewai/events/types/mcp_events.py
@@ -83,3 +83,16 @@ class MCPToolExecutionFailedEvent(MCPEvent):
    error_type: str | None = None  # "timeout", "validation", "server_error", etc.
    started_at: datetime | None = None
    failed_at: datetime | None = None
+
+
+class MCPConfigFetchFailedEvent(BaseEvent):
+    """Event emitted when fetching an AMP MCP server config fails.
+
+    This covers cases where the slug is not connected, the API call
+    failed, or native MCP resolution failed after config was fetched.
+    """
+
+    type: str = "mcp_config_fetch_failed"
+    slug: str
+    error: str
+    error_type: str | None = None  # "not_connected", "api_error", "connection_failed"
--- a/lib/crewai/src/crewai/events/utils/console_formatter.py
+++ b/lib/crewai/src/crewai/events/utils/console_formatter.py
@@ -1512,6 +1512,34 @@ To enable tracing, do any one of these:
        self.print(panel)
        self.print()

+    def handle_mcp_config_fetch_failed(
+        self,
+        slug: str,
+        error: str = "",
+        error_type: str | None = None,
+    ) -> None:
+        """Handle MCP config fetch failed event (AMP resolution failures)."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("MCP Config Fetch Failed\n\n", style="red bold")
+        content.append("Server: ", style="white")
+        content.append(f"{slug}\n", style="red")
+
+        if error_type:
+            content.append("Error Type: ", style="white")
+            content.append(f"{error_type}\n", style="red")
+
+        if error:
+            content.append("\nError: ", style="white bold")
+            error_preview = error[:500] + "..." if len(error) > 500 else error
+            content.append(f"{error_preview}\n", style="red")
+
+        panel = self.create_panel(content, "❌ MCP Config Failed", "red")
+        self.print(panel)
+        self.print()
+
    def handle_mcp_tool_execution_started(
        self,
        server_name: str,
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -1,7 +1,11 @@
 from __future__ import annotations

+import asyncio
 from collections.abc import Callable, Coroutine
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import contextvars
 from datetime import datetime
+import inspect
 import json
 import threading
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -49,6 +53,8 @@ from crewai.hooks.types import (
    BeforeLLMCallHookCallable,
    BeforeLLMCallHookType,
 )
+from crewai.tools.base_tool import BaseTool
+from crewai.tools.structured_tool import CrewStructuredTool
 from crewai.utilities.agent_utils import (
    convert_tools_to_openai_schema,
    enforce_rpm_limit,
@@ -63,6 +69,7 @@ from crewai.utilities.agent_utils import (
    has_reached_max_iterations,
    is_context_length_exceeded,
    is_inside_event_loop,
+    parse_tool_call_args,
    process_llm_response,
    track_delegation_if_needed,
 )
@@ -81,8 +88,6 @@ if TYPE_CHECKING:
    from crewai.crew import Crew
    from crewai.llms.base_llm import BaseLLM
    from crewai.task import Task
-    from crewai.tools.base_tool import BaseTool
-    from crewai.tools.structured_tool import CrewStructuredTool
    from crewai.tools.tool_types import ToolResult
    from crewai.utilities.prompts import StandardPromptResult, SystemPromptResult

@@ -298,6 +303,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            super().__init__(
                suppress_flow_events=True,
                tracing=current_tracing if current_tracing else None,
+                max_method_calls=self.max_iter * 10,
            )
            self._flow_initialized = True

@@ -317,7 +323,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
    def _setup_native_tools(self) -> None:
        """Convert tools to OpenAI schema format for native function calling."""
        if self.original_tools:
-            self._openai_tools, self._available_functions = (
+            self._openai_tools, self._available_functions, self._tool_name_mapping = (
                convert_tools_to_openai_schema(self.original_tools)
            )

@@ -399,7 +405,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                self._setup_native_tools()
        return "initialized"

-    @listen("force_final_answer")
+    @listen("max_iterations_exceeded")
    def force_final_answer(self) -> Literal["agent_finished"]:
        """Force agent to provide final answer when max iterations exceeded."""
        formatted_answer = handle_max_iterations_exceeded(
@@ -590,21 +596,19 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
    def execute_tool_action(self) -> Literal["tool_completed", "tool_result_is_final"]:
        """Execute the tool action and handle the result."""

+        action = cast(AgentAction, self.state.current_answer)
+
+        fingerprint_context = {}
+        if (
+            self.agent
+            and hasattr(self.agent, "security_config")
+            and hasattr(self.agent.security_config, "fingerprint")
+        ):
+            fingerprint_context = {
+                "agent_fingerprint": str(self.agent.security_config.fingerprint)
+            }
+
        try:
-            action = cast(AgentAction, self.state.current_answer)
-
-            # Extract fingerprint context for tool execution
-            fingerprint_context = {}
-            if (
-                self.agent
-                and hasattr(self.agent, "security_config")
-                and hasattr(self.agent.security_config, "fingerprint")
-            ):
-                fingerprint_context = {
-                    "agent_fingerprint": str(self.agent.security_config.fingerprint)
-                }
-
-            # Execute the tool
            tool_result = execute_tool_and_check_finality(
                agent_action=action,
                fingerprint_context=fingerprint_context,
@@ -618,24 +622,19 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                function_calling_llm=self.function_calling_llm,
                crew=self.crew,
            )
+        except Exception as e:
+            if self.agent and self.agent.verbose:
+                self._printer.print(
+                    content=f"Error in tool execution: {e}", color="red"
+                )
+            if self.task:
+                self.task.increment_tools_errors()

-            # Handle agent action and append observation to messages
-            result = self._handle_agent_action(action, tool_result)
-            self.state.current_answer = result
+            error_observation = f"\nObservation: Error executing tool: {e}"
+            action.text += error_observation
+            action.result = str(e)
+            self._append_message_to_state(action.text)

-            # Invoke step callback if configured
-            self._invoke_step_callback(result)
-
-            # Append result message to conversation state
-            if hasattr(result, "text"):
-                self._append_message_to_state(result.text)
-
-            # Check if tool result became a final answer (result_as_answer flag)
-            if isinstance(result, AgentFinish):
-                self.state.is_finished = True
-                return "tool_result_is_final"
-
-            # Inject post-tool reasoning prompt to enforce analysis
            reasoning_prompt = self._i18n.slice("post_tool_reasoning")
            reasoning_message: LLMMessage = {
                "role": "user",
@@ -645,12 +644,26 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):

            return "tool_completed"

-        except Exception as e:
-            error_text = Text()
-            error_text.append("❌ Error in tool execution: ", style="red bold")
-            error_text.append(str(e), style="red")
-            self._console.print(error_text)
-            raise
+        result = self._handle_agent_action(action, tool_result)
+        self.state.current_answer = result
+
+        self._invoke_step_callback(result)
+
+        if hasattr(result, "text"):
+            self._append_message_to_state(result.text)
+
+        if isinstance(result, AgentFinish):
+            self.state.is_finished = True
+            return "tool_result_is_final"
+
+        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+        reasoning_message_post: LLMMessage = {
+            "role": "user",
+            "content": reasoning_prompt,
+        }
+        self.state.messages.append(reasoning_message_post)
+
+        return "tool_completed"

    @listen("native_tool_calls")
    def execute_native_tool(
@@ -668,9 +681,12 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
        if not self.state.pending_tool_calls:
            return "native_tool_completed"

+        pending_tool_calls = list(self.state.pending_tool_calls)
+        self.state.pending_tool_calls.clear()
+
        # Group all tool calls into a single assistant message
        tool_calls_to_report = []
-        for tool_call in self.state.pending_tool_calls:
+        for tool_call in pending_tool_calls:
            info = extract_tool_call_info(tool_call)
            if not info:
                continue
@@ -695,202 +711,99 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                "content": None,
                "tool_calls": tool_calls_to_report,
            }
-            if all(
-                type(tc).__qualname__ == "Part" for tc in self.state.pending_tool_calls
-            ):
-                assistant_message["raw_tool_call_parts"] = list(
-                    self.state.pending_tool_calls
-                )
+            if all(type(tc).__qualname__ == "Part" for tc in pending_tool_calls):
+                assistant_message["raw_tool_call_parts"] = list(pending_tool_calls)
            self.state.messages.append(assistant_message)

-        # Now execute each tool
-        while self.state.pending_tool_calls:
-            tool_call = self.state.pending_tool_calls.pop(0)
-            info = extract_tool_call_info(tool_call)
-            if not info:
-                continue
+        runnable_tool_calls = [
+            tool_call
+            for tool_call in pending_tool_calls
+            if extract_tool_call_info(tool_call) is not None
+        ]
+        should_parallelize = self._should_parallelize_native_tool_calls(
+            runnable_tool_calls
+        )

-            call_id, func_name, func_args = info
-
-            # Parse arguments
-            if isinstance(func_args, str):
-                try:
-                    args_dict = json.loads(func_args)
-                except json.JSONDecodeError:
-                    args_dict = {}
-            else:
-                args_dict = func_args
-
-            # Get agent_key for event tracking
-            agent_key = (
-                getattr(self.agent, "key", "unknown") if self.agent else "unknown"
-            )
-
-            # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
-            original_tool = None
-            for tool in self.original_tools or []:
-                if sanitize_tool_name(tool.name) == func_name:
-                    original_tool = tool
-                    break
-
-            # Check if tool has reached max usage count
-            max_usage_reached = False
-            if (
-                original_tool
-                and original_tool.max_usage_count is not None
-                and original_tool.current_usage_count >= original_tool.max_usage_count
-            ):
-                max_usage_reached = True
-
-            # Check cache before executing
-            from_cache = False
-            input_str = json.dumps(args_dict) if args_dict else ""
-            if self.tools_handler and self.tools_handler.cache:
-                cached_result = self.tools_handler.cache.read(
-                    tool=func_name, input=input_str
+        execution_results: list[dict[str, Any]] = []
+        if should_parallelize:
+            max_workers = min(8, len(runnable_tool_calls))
+            with ThreadPoolExecutor(max_workers=max_workers) as pool:
+                future_to_idx = {
+                    pool.submit(contextvars.copy_context().run, self._execute_single_native_tool_call, tool_call): idx
+                    for idx, tool_call in enumerate(runnable_tool_calls)
+                }
+                ordered_results: list[dict[str, Any] | None] = [None] * len(
+                    runnable_tool_calls
                )
-                if cached_result is not None:
-                    result = (
-                        str(cached_result)
-                        if not isinstance(cached_result, str)
-                        else cached_result
-                    )
-                    from_cache = True
-
-            # Emit tool usage started event
-            started_at = datetime.now()
-            crewai_event_bus.emit(
-                self,
-                event=ToolUsageStartedEvent(
-                    tool_name=func_name,
-                    tool_args=args_dict,
-                    from_agent=self.agent,
-                    from_task=self.task,
-                    agent_key=agent_key,
-                ),
-            )
-            error_event_emitted = False
-
-            track_delegation_if_needed(func_name, args_dict, self.task)
-
-            structured_tool: CrewStructuredTool | None = None
-            for structured in self.tools or []:
-                if sanitize_tool_name(structured.name) == func_name:
-                    structured_tool = structured
-                    break
-
-            hook_blocked = False
-            before_hook_context = ToolCallHookContext(
-                tool_name=func_name,
-                tool_input=args_dict,
-                tool=structured_tool,  # type: ignore[arg-type]
-                agent=self.agent,
-                task=self.task,
-                crew=self.crew,
-            )
-            before_hooks = get_before_tool_call_hooks()
-            try:
-                for hook in before_hooks:
-                    hook_result = hook(before_hook_context)
-                    if hook_result is False:
-                        hook_blocked = True
-                        break
-            except Exception as hook_error:
-                if self.agent.verbose:
-                    self._printer.print(
-                        content=f"Error in before_tool_call hook: {hook_error}",
-                        color="red",
-                    )
-
-            if hook_blocked:
-                result = f"Tool execution blocked by hook. Tool: {func_name}"
-            elif not from_cache and not max_usage_reached:
-                result = "Tool not found"
-                if func_name in self._available_functions:
+                for future in as_completed(future_to_idx):
+                    idx = future_to_idx[future]
                    try:
-                        tool_func = self._available_functions[func_name]
-                        raw_result = tool_func(**args_dict)
-
-                        # Add to cache after successful execution (before string conversion)
-                        if self.tools_handler and self.tools_handler.cache:
-                            should_cache = True
-                            if original_tool:
-                                should_cache = original_tool.cache_function(
-                                    args_dict, raw_result
-                                )
-                            if should_cache:
-                                self.tools_handler.cache.add(
-                                    tool=func_name, input=input_str, output=raw_result
-                                )
-
-                        # Convert to string for message
-                        result = (
-                            str(raw_result)
-                            if not isinstance(raw_result, str)
-                            else raw_result
-                        )
+                        ordered_results[idx] = future.result()
                    except Exception as e:
-                        result = f"Error executing tool: {e}"
-                        if self.task:
-                            self.task.increment_tools_errors()
-                        # Emit tool usage error event
-                        crewai_event_bus.emit(
-                            self,
-                            event=ToolUsageErrorEvent(
-                                tool_name=func_name,
-                                tool_args=args_dict,
-                                from_agent=self.agent,
-                                from_task=self.task,
-                                agent_key=agent_key,
-                                error=e,
-                            ),
-                        )
-                        error_event_emitted = True
-            elif max_usage_reached and original_tool:
-                # Return error message when max usage limit is reached
-                result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
+                        tool_call = runnable_tool_calls[idx]
+                        info = extract_tool_call_info(tool_call)
+                        call_id = info[0] if info else "unknown"
+                        func_name = info[1] if info else "unknown"
+                        ordered_results[idx] = {
+                            "call_id": call_id,
+                            "func_name": func_name,
+                            "result": f"Error executing tool: {e}",
+                            "from_cache": False,
+                            "original_tool": None,
+                        }
+                execution_results = [
+                    result for result in ordered_results if result is not None
+                ]
+        else:
+            # Execute sequentially so result_as_answer tools can short-circuit
+            # immediately without running remaining calls.
+            for tool_call in runnable_tool_calls:
+                execution_result = self._execute_single_native_tool_call(tool_call)
+                call_id = cast(str, execution_result["call_id"])
+                func_name = cast(str, execution_result["func_name"])
+                result = cast(str, execution_result["result"])
+                from_cache = cast(bool, execution_result["from_cache"])
+                original_tool = execution_result["original_tool"]

-            # Execute after_tool_call hooks (even if blocked, to allow logging/monitoring)
-            after_hook_context = ToolCallHookContext(
-                tool_name=func_name,
-                tool_input=args_dict,
-                tool=structured_tool,  # type: ignore[arg-type]
-                agent=self.agent,
-                task=self.task,
-                crew=self.crew,
-                tool_result=result,
-            )
-            after_hooks = get_after_tool_call_hooks()
-            try:
-                for after_hook in after_hooks:
-                    after_hook_result = after_hook(after_hook_context)
-                    if after_hook_result is not None:
-                        result = after_hook_result
-                        after_hook_context.tool_result = result
-            except Exception as hook_error:
-                if self.agent.verbose:
+                tool_message: LLMMessage = {
+                    "role": "tool",
+                    "tool_call_id": call_id,
+                    "name": func_name,
+                    "content": result,
+                }
+                self.state.messages.append(tool_message)
+
+                # Log the tool execution
+                if self.agent and self.agent.verbose:
+                    cache_info = " (from cache)" if from_cache else ""
                    self._printer.print(
-                        content=f"Error in after_tool_call hook: {hook_error}",
-                        color="red",
+                        content=f"Tool {func_name} executed with result{cache_info}: {result[:200]}...",
+                        color="green",
                    )

-            if not error_event_emitted:
-                crewai_event_bus.emit(
-                    self,
-                    event=ToolUsageFinishedEvent(
+                if (
+                    original_tool
+                    and hasattr(original_tool, "result_as_answer")
+                    and original_tool.result_as_answer
+                ):
+                    self.state.current_answer = AgentFinish(
+                        thought="Tool result is the final answer",
                        output=result,
-                        tool_name=func_name,
-                        tool_args=args_dict,
-                        from_agent=self.agent,
-                        from_task=self.task,
-                        agent_key=agent_key,
-                        started_at=started_at,
-                        finished_at=datetime.now(),
-                    ),
-                )
+                        text=result,
+                    )
+                    self.state.is_finished = True
+                    return "tool_result_is_final"

-            # Append tool result message
-            tool_message: LLMMessage = {
+            return "native_tool_completed"
+
+        for execution_result in execution_results:
+            call_id = cast(str, execution_result["call_id"])
+            func_name = cast(str, execution_result["func_name"])
+            result = cast(str, execution_result["result"])
+            from_cache = cast(bool, execution_result["from_cache"])
+            original_tool = execution_result["original_tool"]
+
+            tool_message = {
                "role": "tool",
                "tool_call_id": call_id,
                "name": func_name,
@@ -922,6 +835,249 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):

        return "native_tool_completed"

+    def _should_parallelize_native_tool_calls(self, tool_calls: list[Any]) -> bool:
+        """Determine if native tool calls are safe to run in parallel."""
+        if len(tool_calls) <= 1:
+            return False
+
+        for tool_call in tool_calls:
+            info = extract_tool_call_info(tool_call)
+            if not info:
+                continue
+            _, func_name, _ = info
+
+            mapping = getattr(self, "_tool_name_mapping", None)
+            original_tool: BaseTool | None = None
+            if mapping and func_name in mapping:
+                mapped = mapping[func_name]
+                if isinstance(mapped, BaseTool):
+                    original_tool = mapped
+            if original_tool is None:
+                for tool in self.original_tools or []:
+                    if sanitize_tool_name(tool.name) == func_name:
+                        original_tool = tool
+                        break
+
+            if not original_tool:
+                continue
+
+            if getattr(original_tool, "result_as_answer", False):
+                return False
+            if getattr(original_tool, "max_usage_count", None) is not None:
+                return False
+
+        return True
+
+    def _execute_single_native_tool_call(self, tool_call: Any) -> dict[str, Any]:
+        """Execute a single native tool call and return metadata/result."""
+        info = extract_tool_call_info(tool_call)
+        if not info:
+            call_id = (
+                getattr(tool_call, "id", None)
+                or (tool_call.get("id") if isinstance(tool_call, dict) else None)
+                or "unknown"
+            )
+            return {
+                "call_id": call_id,
+                "func_name": "unknown",
+                "result": "Error: Invalid native tool call format",
+                "from_cache": False,
+                "original_tool": None,
+            }
+
+        call_id, func_name, func_args = info
+
+        # Parse arguments
+        parsed_args, parse_error = parse_tool_call_args(func_args, func_name, call_id)
+        if parse_error is not None:
+            return parse_error
+        args_dict: dict[str, Any] = parsed_args or {}
+
+        # Get agent_key for event tracking
+        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+
+        original_tool: BaseTool | None = None
+        mapping = getattr(self, "_tool_name_mapping", None)
+        if mapping and func_name in mapping:
+            mapped = mapping[func_name]
+            if isinstance(mapped, BaseTool):
+                original_tool = mapped
+        if original_tool is None:
+            for tool in self.original_tools or []:
+                if sanitize_tool_name(tool.name) == func_name:
+                    original_tool = tool
+                    break
+
+        # Check if tool has reached max usage count
+        max_usage_reached = False
+        if (
+            original_tool
+            and original_tool.max_usage_count is not None
+            and original_tool.current_usage_count >= original_tool.max_usage_count
+        ):
+            max_usage_reached = True
+
+        # Check cache before executing
+        from_cache = False
+        input_str = json.dumps(args_dict) if args_dict else ""
+        if self.tools_handler and self.tools_handler.cache:
+            cached_result = self.tools_handler.cache.read(
+                tool=func_name, input=input_str
+            )
+            if cached_result is not None:
+                result = (
+                    str(cached_result)
+                    if not isinstance(cached_result, str)
+                    else cached_result
+                )
+                from_cache = True
+
+        # Emit tool usage started event
+        started_at = datetime.now()
+        crewai_event_bus.emit(
+            self,
+            event=ToolUsageStartedEvent(
+                tool_name=func_name,
+                tool_args=args_dict,
+                from_agent=self.agent,
+                from_task=self.task,
+                agent_key=agent_key,
+            ),
+        )
+        error_event_emitted = False
+
+        track_delegation_if_needed(func_name, args_dict, self.task)
+
+        structured_tool: CrewStructuredTool | None = None
+        if original_tool is not None:
+            for structured in self.tools or []:
+                if getattr(structured, "_original_tool", None) is original_tool:
+                    structured_tool = structured
+                    break
+        if structured_tool is None:
+            for structured in self.tools or []:
+                if sanitize_tool_name(structured.name) == func_name:
+                    structured_tool = structured
+                    break
+
+        hook_blocked = False
+        before_hook_context = ToolCallHookContext(
+            tool_name=func_name,
+            tool_input=args_dict,
+            tool=structured_tool,  # type: ignore[arg-type]
+            agent=self.agent,
+            task=self.task,
+            crew=self.crew,
+        )
+        before_hooks = get_before_tool_call_hooks()
+        try:
+            for hook in before_hooks:
+                hook_result = hook(before_hook_context)
+                if hook_result is False:
+                    hook_blocked = True
+                    break
+        except Exception as hook_error:
+            if self.agent.verbose:
+                self._printer.print(
+                    content=f"Error in before_tool_call hook: {hook_error}",
+                    color="red",
+                )
+
+        if hook_blocked:
+            result = f"Tool execution blocked by hook. Tool: {func_name}"
+        elif not from_cache and not max_usage_reached:
+            result = "Tool not found"
+            if func_name in self._available_functions:
+                try:
+                    tool_func = self._available_functions[func_name]
+                    raw_result = tool_func(**args_dict)
+
+                    # Add to cache after successful execution (before string conversion)
+                    if self.tools_handler and self.tools_handler.cache:
+                        should_cache = True
+                        if original_tool:
+                            should_cache = original_tool.cache_function(
+                                args_dict, raw_result
+                            )
+                        if should_cache:
+                            self.tools_handler.cache.add(
+                                tool=func_name, input=input_str, output=raw_result
+                            )
+
+                    # Convert to string for message
+                    result = (
+                        str(raw_result)
+                        if not isinstance(raw_result, str)
+                        else raw_result
+                    )
+                except Exception as e:
+                    result = f"Error executing tool: {e}"
+                    if self.task:
+                        self.task.increment_tools_errors()
+                    # Emit tool usage error event
+                    crewai_event_bus.emit(
+                        self,
+                        event=ToolUsageErrorEvent(
+                            tool_name=func_name,
+                            tool_args=args_dict,
+                            from_agent=self.agent,
+                            from_task=self.task,
+                            agent_key=agent_key,
+                            error=e,
+                        ),
+                    )
+                    error_event_emitted = True
+        elif max_usage_reached and original_tool:
+            # Return error message when max usage limit is reached
+            result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
+
+        # Execute after_tool_call hooks (even if blocked, to allow logging/monitoring)
+        after_hook_context = ToolCallHookContext(
+            tool_name=func_name,
+            tool_input=args_dict,
+            tool=structured_tool,  # type: ignore[arg-type]
+            agent=self.agent,
+            task=self.task,
+            crew=self.crew,
+            tool_result=result,
+        )
+        after_hooks = get_after_tool_call_hooks()
+        try:
+            for after_hook in after_hooks:
+                after_hook_result = after_hook(after_hook_context)
+                if after_hook_result is not None:
+                    result = after_hook_result
+                    after_hook_context.tool_result = result
+        except Exception as hook_error:
+            if self.agent.verbose:
+                self._printer.print(
+                    content=f"Error in after_tool_call hook: {hook_error}",
+                    color="red",
+                )
+
+        if not error_event_emitted:
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageFinishedEvent(
+                    output=result,
+                    tool_name=func_name,
+                    tool_args=args_dict,
+                    from_agent=self.agent,
+                    from_task=self.task,
+                    agent_key=agent_key,
+                    started_at=started_at,
+                    finished_at=datetime.now(),
+                ),
+            )
+
+        return {
+            "call_id": call_id,
+            "func_name": func_name,
+            "result": result,
+            "from_cache": from_cache,
+            "original_tool": original_tool,
+        }
+
    def _extract_tool_name(self, tool_call: Any) -> str:
        """Extract tool name from various tool call formats."""
        if hasattr(tool_call, "function"):
@@ -954,11 +1110,11 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
    def check_max_iterations(
        self,
    ) -> Literal[
-        "force_final_answer", "continue_reasoning", "continue_reasoning_native"
+        "max_iterations_exceeded", "continue_reasoning", "continue_reasoning_native"
    ]:
        """Check if max iterations reached before proceeding with reasoning."""
        if has_reached_max_iterations(self.state.iterations, self.max_iter):
-            return "force_final_answer"
+            return "max_iterations_exceeded"
        if self.state.use_native_tools:
            return "continue_reasoning_native"
        return "continue_reasoning"
@@ -1252,7 +1408,9 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            formatted_answer: Current agent response.
        """
        if self.step_callback:
-            self.step_callback(formatted_answer)
+            cb_result = self.step_callback(formatted_answer)
+            if inspect.iscoroutine(cb_result):
+                asyncio.run(cb_result)

    def _append_message_to_state(
        self, text: str, role: Literal["user", "assistant", "system"] = "assistant"
--- a/lib/crewai/src/crewai/flow/flow.py
+++ b/lib/crewai/src/crewai/flow/flow.py
@@ -10,13 +10,15 @@ import asyncio
 from collections.abc import (
    Callable,
    ItemsView,
+    Iterable,
    Iterator,
    KeysView,
    Sequence,
    ValuesView,
 )
-from concurrent.futures import Future
+from concurrent.futures import Future, ThreadPoolExecutor
 import copy
+import enum
 import inspect
 import logging
 import threading
@@ -27,8 +29,10 @@ from typing import (
    Generic,
    Literal,
    ParamSpec,
+    SupportsIndex,
    TypeVar,
    cast,
+    overload,
 )
 from uuid import uuid4

@@ -77,7 +81,12 @@ from crewai.flow.flow_wrappers import (
    StartMethod,
 )
 from crewai.flow.persistence.base import FlowPersistence
-from crewai.flow.types import FlowExecutionData, FlowMethodName, InputHistoryEntry, PendingListenerKey
+from crewai.flow.types import (
+    FlowExecutionData,
+    FlowMethodName,
+    InputHistoryEntry,
+    PendingListenerKey,
+)
 from crewai.flow.utils import (
    _extract_all_methods,
    _extract_all_methods_recursive,
@@ -426,8 +435,7 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
    """

    def __init__(self, lst: list[T], lock: threading.Lock) -> None:
-        # Do NOT call super().__init__() -- we don't want to copy data into
-        # the builtin list storage. All access goes through self._list.
+        super().__init__()  # empty builtin list; all access goes through self._list
        self._list = lst
        self._lock = lock

@@ -435,11 +443,11 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.append(item)

-    def extend(self, items: list[T]) -> None:
+    def extend(self, items: Iterable[T]) -> None:
        with self._lock:
            self._list.extend(items)

-    def insert(self, index: int, item: T) -> None:
+    def insert(self, index: SupportsIndex, item: T) -> None:
        with self._lock:
            self._list.insert(index, item)

@@ -447,7 +455,7 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.remove(item)

-    def pop(self, index: int = -1) -> T:
+    def pop(self, index: SupportsIndex = -1) -> T:
        with self._lock:
            return self._list.pop(index)

@@ -455,15 +463,23 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.clear()

-    def __setitem__(self, index: int, value: T) -> None:
+    @overload
+    def __setitem__(self, index: SupportsIndex, value: T) -> None: ...
+    @overload
+    def __setitem__(self, index: slice, value: Iterable[T]) -> None: ...
+    def __setitem__(self, index: Any, value: Any) -> None:
        with self._lock:
            self._list[index] = value

-    def __delitem__(self, index: int) -> None:
+    def __delitem__(self, index: SupportsIndex | slice) -> None:
        with self._lock:
            del self._list[index]

-    def __getitem__(self, index: int) -> T:
+    @overload
+    def __getitem__(self, index: SupportsIndex) -> T: ...
+    @overload
+    def __getitem__(self, index: slice) -> list[T]: ...
+    def __getitem__(self, index: Any) -> Any:
        return self._list[index]

    def __len__(self) -> int:
@@ -481,7 +497,51 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
    def __bool__(self) -> bool:
        return bool(self._list)

-    def __eq__(self, other: object) -> bool:  # type: ignore[override]
+    def index(self, value: T, start: SupportsIndex = 0, stop: SupportsIndex | None = None) -> int:  # type: ignore[override]
+        if stop is None:
+            return self._list.index(value, start)
+        return self._list.index(value, start, stop)
+
+    def count(self, value: T) -> int:
+        return self._list.count(value)
+
+    def sort(self, *, key: Any = None, reverse: bool = False) -> None:
+        with self._lock:
+            self._list.sort(key=key, reverse=reverse)
+
+    def reverse(self) -> None:
+        with self._lock:
+            self._list.reverse()
+
+    def copy(self) -> list[T]:
+        return self._list.copy()
+
+    def __add__(self, other: list[T]) -> list[T]:
+        return self._list + other
+
+    def __radd__(self, other: list[T]) -> list[T]:
+        return other + self._list
+
+    def __iadd__(self, other: Iterable[T]) -> LockedListProxy[T]:
+        with self._lock:
+            self._list += list(other)
+        return self
+
+    def __mul__(self, n: SupportsIndex) -> list[T]:
+        return self._list * n
+
+    def __rmul__(self, n: SupportsIndex) -> list[T]:
+        return self._list * n
+
+    def __imul__(self, n: SupportsIndex) -> LockedListProxy[T]:
+        with self._lock:
+            self._list *= n
+        return self
+
+    def __reversed__(self) -> Iterator[T]:
+        return reversed(self._list)
+
+    def __eq__(self, other: object) -> bool:
        """Compare based on the underlying list contents."""
        if isinstance(other, LockedListProxy):
            # Avoid deadlocks by acquiring locks in a consistent order.
@@ -492,7 +552,7 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            return self._list == other

-    def __ne__(self, other: object) -> bool:  # type: ignore[override]
+    def __ne__(self, other: object) -> bool:
        return not self.__eq__(other)


@@ -505,8 +565,7 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    """

    def __init__(self, d: dict[str, T], lock: threading.Lock) -> None:
-        # Do NOT call super().__init__() -- we don't want to copy data into
-        # the builtin dict storage. All access goes through self._dict.
+        super().__init__()  # empty builtin dict; all access goes through self._dict
        self._dict = d
        self._lock = lock

@@ -518,11 +577,11 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            del self._dict[key]

-    def pop(self, key: str, *default: T) -> T:
+    def pop(self, key: str, *default: T) -> T:  # type: ignore[override]
        with self._lock:
            return self._dict.pop(key, *default)

-    def update(self, other: dict[str, T]) -> None:
+    def update(self, other: dict[str, T]) -> None:  # type: ignore[override]
        with self._lock:
            self._dict.update(other)

@@ -530,7 +589,7 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._dict.clear()

-    def setdefault(self, key: str, default: T) -> T:
+    def setdefault(self, key: str, default: T) -> T:  # type: ignore[override]
        with self._lock:
            return self._dict.setdefault(key, default)

@@ -546,16 +605,16 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    def __contains__(self, key: object) -> bool:
        return key in self._dict

-    def keys(self) -> KeysView[str]:
+    def keys(self) -> KeysView[str]:  # type: ignore[override]
        return self._dict.keys()

-    def values(self) -> ValuesView[T]:
+    def values(self) -> ValuesView[T]:  # type: ignore[override]
        return self._dict.values()

-    def items(self) -> ItemsView[str, T]:
+    def items(self) -> ItemsView[str, T]:  # type: ignore[override]
        return self._dict.items()

-    def get(self, key: str, default: T | None = None) -> T | None:
+    def get(self, key: str, default: T | None = None) -> T | None:  # type: ignore[override]
        return self._dict.get(key, default)

    def __repr__(self) -> str:
@@ -564,7 +623,24 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    def __bool__(self) -> bool:
        return bool(self._dict)

-    def __eq__(self, other: object) -> bool:  # type: ignore[override]
+    def copy(self) -> dict[str, T]:
+        return self._dict.copy()
+
+    def __or__(self, other: dict[str, T]) -> dict[str, T]:
+        return self._dict | other
+
+    def __ror__(self, other: dict[str, T]) -> dict[str, T]:
+        return other | self._dict
+
+    def __ior__(self, other: dict[str, T]) -> LockedDictProxy[T]:
+        with self._lock:
+            self._dict |= other
+        return self
+
+    def __reversed__(self) -> Iterator[str]:
+        return reversed(self._dict)
+
+    def __eq__(self, other: object) -> bool:
        """Compare based on the underlying dict contents."""
        if isinstance(other, LockedDictProxy):
            # Avoid deadlocks by acquiring locks in a consistent order.
@@ -575,7 +651,7 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            return self._dict == other

-    def __ne__(self, other: object) -> bool:  # type: ignore[override]
+    def __ne__(self, other: object) -> bool:
        return not self.__eq__(other)


@@ -605,6 +681,10 @@ class StateProxy(Generic[T]):
        if name in ("_proxy_state", "_proxy_lock"):
            object.__setattr__(self, name, value)
        else:
+            if isinstance(value, LockedListProxy):
+                value = value._list
+            elif isinstance(value, LockedDictProxy):
+                value = value._dict
            with object.__getattribute__(self, "_proxy_lock"):
                setattr(object.__getattribute__(self, "_proxy_state"), name, value)

@@ -677,6 +757,7 @@ class FlowMeta(type):
                    condition_type = getattr(
                        attr_value, "__condition_type__", OR_CONDITION
                    )
+
                    if (
                        hasattr(attr_value, "__trigger_condition__")
                        and attr_value.__trigger_condition__ is not None
@@ -737,7 +818,9 @@ class Flow(Generic[T], metaclass=FlowMeta):
    name: str | None = None
    tracing: bool | None = None
    stream: bool = False
-    memory: Any = None  # Memory | MemoryScope | MemorySlice | None; auto-created if not set
+    memory: Any = (
+        None  # Memory | MemoryScope | MemorySlice | None; auto-created if not set
+    )
    input_provider: Any = None  # InputProvider | None; per-flow override for self.ask()

    def __class_getitem__(cls: type[Flow[T]], item: type[T]) -> type[Flow[T]]:
@@ -752,6 +835,7 @@ class Flow(Generic[T], metaclass=FlowMeta):
        persistence: FlowPersistence | None = None,
        tracing: bool | None = None,
        suppress_flow_events: bool = False,
+        max_method_calls: int = 100,
        **kwargs: Any,
    ) -> None:
        """Initialize a new Flow instance.
@@ -760,6 +844,7 @@ class Flow(Generic[T], metaclass=FlowMeta):
            persistence: Optional persistence backend for storing flow states
            tracing: Whether to enable tracing. True=always enable, False=always disable, None=check environment/user settings
            suppress_flow_events: Whether to suppress flow event emissions (internal use)
+            max_method_calls: Maximum times a single method can be called per execution before raising RecursionError
            **kwargs: Additional state values to initialize or override
        """
        # Initialize basic instance attributes
@@ -775,6 +860,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
        self._completed_methods: set[FlowMethodName] = (
            set()
        )  # Track completed methods for reload
+        self._method_call_counts: dict[FlowMethodName, int] = {}
+        self._max_method_calls = max_method_calls
        self._persistence: FlowPersistence | None = persistence
        self._is_execution_resuming: bool = False
        self._event_futures: list[Future[None]] = []
@@ -881,7 +968,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
        """
        if self.memory is None:
            raise ValueError("No memory configured for this flow")
-        return self.memory.extract_memories(content)
+        result: list[str] = self.memory.extract_memories(content)
+        return result

    def _mark_or_listener_fired(self, listener_name: FlowMethodName) -> bool:
        """Mark an OR listener as fired atomically.
@@ -1352,8 +1440,10 @@ class Flow(Generic[T], metaclass=FlowMeta):
            ValueError: If structured state model lacks 'id' field
            TypeError: If state is neither BaseModel nor dictionary
        """
+        init_state = self.initial_state
+
        # Handle case where initial_state is None but we have a type parameter
-        if self.initial_state is None and hasattr(self, "_initial_state_t"):
+        if init_state is None and hasattr(self, "_initial_state_t"):
            state_type = self._initial_state_t
            if isinstance(state_type, type):
                if issubclass(state_type, FlowState):
@@ -1377,12 +1467,12 @@ class Flow(Generic[T], metaclass=FlowMeta):
                    return cast(T, {"id": str(uuid4())})

        # Handle case where no initial state is provided
-        if self.initial_state is None:
+        if init_state is None:
            return cast(T, {"id": str(uuid4())})

        # Handle case where initial_state is a type (class)
-        if isinstance(self.initial_state, type):
-            state_class: type[T] = self.initial_state
+        if isinstance(init_state, type):
+            state_class = init_state
            if issubclass(state_class, FlowState):
                return state_class()
            if issubclass(state_class, BaseModel):
@@ -1393,19 +1483,19 @@ class Flow(Generic[T], metaclass=FlowMeta):
                if not getattr(model_instance, "id", None):
                    object.__setattr__(model_instance, "id", str(uuid4()))
                return model_instance
-            if self.initial_state is dict:
+            if init_state is dict:
                return cast(T, {"id": str(uuid4())})

        # Handle dictionary instance case
-        if isinstance(self.initial_state, dict):
-            new_state = dict(self.initial_state)  # Copy to avoid mutations
+        if isinstance(init_state, dict):
+            new_state = dict(init_state)  # Copy to avoid mutations
            if "id" not in new_state:
                new_state["id"] = str(uuid4())
            return cast(T, new_state)

        # Handle BaseModel instance case
-        if isinstance(self.initial_state, BaseModel):
-            model = cast(BaseModel, self.initial_state)
+        if isinstance(init_state, BaseModel):
+            model = cast(BaseModel, init_state)
            if not hasattr(model, "id"):
                raise ValueError("Flow state model must have an 'id' field")

@@ -1719,7 +1809,12 @@ class Flow(Generic[T], metaclass=FlowMeta):
        async def _run_flow() -> Any:
            return await self.kickoff_async(inputs, input_files)

-        return asyncio.run(_run_flow())
+        try:
+            asyncio.get_running_loop()
+            with ThreadPoolExecutor(max_workers=1) as pool:
+                return pool.submit(asyncio.run, _run_flow()).result()
+        except RuntimeError:
+            return asyncio.run(_run_flow())

    async def kickoff_async(
        self,
@@ -1803,9 +1898,15 @@ class Flow(Generic[T], metaclass=FlowMeta):
                self._method_outputs.clear()
                self._pending_and_listeners.clear()
                self._clear_or_listeners()
+                self._method_call_counts.clear()
            else:
-                # We're restoring from persistence, set the flag
-                self._is_execution_resuming = True
+                # Only enter resumption mode if there are completed methods to
+                # replay.  When _completed_methods is empty (e.g. a pure
+                # state-reload via kickoff(inputs={"id": ...})), the flow
+                # executes from scratch and the flag would incorrectly
+                # suppress cyclic re-execution on the second iteration.
+                if self._completed_methods:
+                    self._is_execution_resuming = True

            if inputs:
                # Override the id in the state if it exists in inputs
@@ -2173,6 +2274,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
            from crewai.flow.async_feedback.types import HumanFeedbackPending

            if isinstance(e, HumanFeedbackPending):
+                e.context.method_name = method_name
+
                # Auto-save pending feedback (create default persistence if needed)
                if self._persistence is None:
                    from crewai.flow.persistence import SQLiteFlowPersistence
@@ -2272,14 +2375,23 @@ class Flow(Generic[T], metaclass=FlowMeta):
                    router_name, router_input, current_triggering_event_id
                )
                if router_result:  # Only add non-None results
-                    router_results.append(FlowMethodName(str(router_result)))
+                    router_result_str = (
+                        router_result.value
+                        if isinstance(router_result, enum.Enum)
+                        else str(router_result)
+                    )
+                    router_results.append(FlowMethodName(router_result_str))
                    # If this was a human_feedback router, map the outcome to the feedback
                    if self.last_human_feedback is not None:
-                        router_result_to_feedback[str(router_result)] = (
+                        router_result_to_feedback[router_result_str] = (
                            self.last_human_feedback
                        )
                current_trigger = (
-                    FlowMethodName(str(router_result))
+                    FlowMethodName(
+                        router_result.value
+                        if isinstance(router_result, enum.Enum)
+                        else str(router_result)
+                    )
                    if router_result is not None
                    else FlowMethodName("")  # Update for next iteration of router chain
                )
@@ -2528,6 +2640,16 @@ class Flow(Generic[T], metaclass=FlowMeta):
            - Skips execution if method was already completed (e.g., after reload)
            - Catches and logs any exceptions during execution, preventing individual listener failures from breaking the entire flow
        """
+        count = self._method_call_counts.get(listener_name, 0) + 1
+        if count > self._max_method_calls:
+            raise RecursionError(
+                f"Method '{listener_name}' has been called {self._max_method_calls} times in "
+                f"this flow execution, which indicates an infinite loop. "
+                f"This commonly happens when a @listen label matches the "
+                f"method's own name."
+            )
+        self._method_call_counts[listener_name] = count
+
        if listener_name in self._completed_methods:
            if self._is_execution_resuming:
                # During resumption, skip execution but continue listeners
@@ -2696,7 +2818,10 @@ class Flow(Generic[T], metaclass=FlowMeta):
                    return topic
            ```
        """
-        from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
+        from concurrent.futures import (
+            ThreadPoolExecutor,
+            TimeoutError as FuturesTimeoutError,
+        )
        from datetime import datetime

        from crewai.events.types.flow_events import (
@@ -2765,14 +2890,16 @@ class Flow(Generic[T], metaclass=FlowMeta):
            response = None

        # Record in history
-        self._input_history.append({
-            "message": message,
-            "response": response,
-            "method_name": method_name,
-            "timestamp": datetime.now(),
-            "metadata": metadata,
-            "response_metadata": response_metadata,
-        })
+        self._input_history.append(
+            {
+                "message": message,
+                "response": response,
+                "method_name": method_name,
+                "timestamp": datetime.now(),
+                "metadata": metadata,
+                "response_metadata": response_metadata,
+            }
+        )

        # Emit input received event
        crewai_event_bus.emit(
--- a/lib/crewai/src/crewai/flow/human_feedback.py
+++ b/lib/crewai/src/crewai/flow/human_feedback.py
@@ -408,7 +408,7 @@ def human_feedback(
                emit=list(emit) if emit else None,
                default_outcome=default_outcome,
                metadata=metadata or {},
-                llm=llm if isinstance(llm, str) else None,
+                llm=llm if isinstance(llm, str) else getattr(llm, "model", None),
            )

            # Determine effective provider:
--- a/lib/crewai/src/crewai/lite_agent.py
+++ b/lib/crewai/src/crewai/lite_agent.py
@@ -2,10 +2,10 @@ from __future__ import annotations

 import asyncio
 from collections.abc import Callable
-import time
 from functools import wraps
 import inspect
 import json
+import time
 from types import MethodType
 from typing import (
    TYPE_CHECKING,
@@ -49,15 +49,20 @@ from crewai.events.types.agent_events import (
    LiteAgentExecutionErrorEvent,
    LiteAgentExecutionStartedEvent,
 )
+from crewai.events.types.logging_events import AgentLogsExecutionEvent
 from crewai.events.types.memory_events import (
    MemoryRetrievalCompletedEvent,
    MemoryRetrievalFailedEvent,
    MemoryRetrievalStartedEvent,
 )
-from crewai.events.types.logging_events import AgentLogsExecutionEvent
 from crewai.flow.flow_trackable import FlowTrackable
 from crewai.hooks.llm_hooks import get_after_llm_call_hooks, get_before_llm_call_hooks
-from crewai.hooks.types import AfterLLMCallHookType, BeforeLLMCallHookType
+from crewai.hooks.types import (
+    AfterLLMCallHookCallable,
+    AfterLLMCallHookType,
+    BeforeLLMCallHookCallable,
+    BeforeLLMCallHookType,
+)
 from crewai.lite_agent_output import LiteAgentOutput
 from crewai.llm import LLM
 from crewai.llms.base_llm import BaseLLM
@@ -270,11 +275,11 @@ class LiteAgent(FlowTrackable, BaseModel):
    _guardrail: GuardrailCallable | None = PrivateAttr(default=None)
    _guardrail_retry_count: int = PrivateAttr(default=0)
    _callbacks: list[TokenCalcHandler] = PrivateAttr(default_factory=list)
-    _before_llm_call_hooks: list[BeforeLLMCallHookType] = PrivateAttr(
-        default_factory=get_before_llm_call_hooks
+    _before_llm_call_hooks: list[BeforeLLMCallHookType | BeforeLLMCallHookCallable] = (
+        PrivateAttr(default_factory=get_before_llm_call_hooks)
    )
-    _after_llm_call_hooks: list[AfterLLMCallHookType] = PrivateAttr(
-        default_factory=get_after_llm_call_hooks
+    _after_llm_call_hooks: list[AfterLLMCallHookType | AfterLLMCallHookCallable] = (
+        PrivateAttr(default_factory=get_after_llm_call_hooks)
    )
    _memory: Any = PrivateAttr(default=None)

@@ -440,12 +445,16 @@ class LiteAgent(FlowTrackable, BaseModel):
        return self.role

    @property
-    def before_llm_call_hooks(self) -> list[BeforeLLMCallHookType]:
+    def before_llm_call_hooks(
+        self,
+    ) -> list[BeforeLLMCallHookType | BeforeLLMCallHookCallable]:
        """Get the before_llm_call hooks for this agent."""
        return self._before_llm_call_hooks

    @property
-    def after_llm_call_hooks(self) -> list[AfterLLMCallHookType]:
+    def after_llm_call_hooks(
+        self,
+    ) -> list[AfterLLMCallHookType | AfterLLMCallHookCallable]:
        """Get the after_llm_call hooks for this agent."""
        return self._after_llm_call_hooks

@@ -482,11 +491,12 @@ class LiteAgent(FlowTrackable, BaseModel):
        # Inject memory tools once if memory is configured (mirrors Agent._prepare_kickoff)
        if self._memory is not None:
            from crewai.tools.memory_tools import create_memory_tools
-            from crewai.utilities.agent_utils import sanitize_tool_name
+            from crewai.utilities.string_utils import sanitize_tool_name

            existing_names = {sanitize_tool_name(t.name) for t in self._parsed_tools}
            memory_tools = [
-                mt for mt in create_memory_tools(self._memory)
+                mt
+                for mt in create_memory_tools(self._memory)
                if sanitize_tool_name(mt.name) not in existing_names
            ]
            if memory_tools:
@@ -565,9 +575,10 @@ class LiteAgent(FlowTrackable, BaseModel):
            if memory_block:
                formatted = self.i18n.slice("memory").format(memory=memory_block)
                if self._messages and self._messages[0].get("role") == "system":
-                    self._messages[0]["content"] = (
-                        self._messages[0].get("content", "") + "\n\n" + formatted
-                    )
+                    existing_content = self._messages[0].get("content", "")
+                    if not isinstance(existing_content, str):
+                        existing_content = ""
+                    self._messages[0]["content"] = existing_content + "\n\n" + formatted
            crewai_event_bus.emit(
                self,
                event=MemoryRetrievalCompletedEvent(
@@ -588,16 +599,12 @@ class LiteAgent(FlowTrackable, BaseModel):
            )

    def _save_to_memory(self, output_text: str) -> None:
-        """Extract discrete memories from the run and remember each. No-op if _memory is None."""
-        if self._memory is None:
+        """Extract discrete memories from the run and remember each. No-op if _memory is None or read-only."""
+        if self._memory is None or self._memory.read_only:
            return
        input_str = self._get_last_user_content() or "User request"
        try:
-            raw = (
-                f"Input: {input_str}\n"
-                f"Agent: {self.role}\n"
-                f"Result: {output_text}"
-            )
+            raw = f"Input: {input_str}\nAgent: {self.role}\nResult: {output_text}"
            extracted = self._memory.extract_memories(raw)
            if extracted:
                self._memory.remember_many(extracted, agent_role=self.role)
@@ -622,13 +629,20 @@ class LiteAgent(FlowTrackable, BaseModel):
        )

        # Execute the agent using invoke loop
-        agent_finish = self._invoke_loop()
+        active_response_format = response_format or self.response_format
+        agent_finish = self._invoke_loop(response_model=active_response_format)
        if self._memory is not None:
-            self._save_to_memory(agent_finish.output)
+            output_text = (
+                agent_finish.output.model_dump_json()
+                if isinstance(agent_finish.output, BaseModel)
+                else agent_finish.output
+            )
+            self._save_to_memory(output_text)
        formatted_result: BaseModel | None = None

-        active_response_format = response_format or self.response_format
-        if active_response_format:
+        if isinstance(agent_finish.output, BaseModel):
+            formatted_result = agent_finish.output
+        elif active_response_format:
            try:
                model_schema = generate_model_description(active_response_format)
                schema = json.dumps(model_schema, indent=2)
@@ -660,8 +674,13 @@ class LiteAgent(FlowTrackable, BaseModel):
            usage_metrics = self._token_process.get_summary()

        # Create output
+        raw_output = (
+            agent_finish.output.model_dump_json()
+            if isinstance(agent_finish.output, BaseModel)
+            else agent_finish.output
+        )
        output = LiteAgentOutput(
-            raw=agent_finish.output,
+            raw=raw_output,
            pydantic=formatted_result,
            agent_role=self.role,
            usage_metrics=usage_metrics.model_dump() if usage_metrics else None,
@@ -838,10 +857,15 @@ class LiteAgent(FlowTrackable, BaseModel):

        return formatted_messages

-    def _invoke_loop(self) -> AgentFinish:
+    def _invoke_loop(
+        self, response_model: type[BaseModel] | None = None
+    ) -> AgentFinish:
        """
        Run the agent's thought process until it reaches a conclusion or max iterations.

+        Args:
+            response_model: Optional Pydantic model for native structured output.
+
        Returns:
            AgentFinish: The final result of the agent execution.
        """
@@ -870,12 +894,19 @@ class LiteAgent(FlowTrackable, BaseModel):
                        printer=self._printer,
                        from_agent=self,
                        executor_context=self,
+                        response_model=response_model,
                        verbose=self.verbose,
                    )

                except Exception as e:
                    raise e

+                if isinstance(answer, BaseModel):
+                    formatted_answer = AgentFinish(
+                        thought="", output=answer, text=answer.model_dump_json()
+                    )
+                    break
+
                formatted_answer = process_llm_response(
                    cast(str, answer), self.use_stop_words
                )
@@ -901,7 +932,7 @@ class LiteAgent(FlowTrackable, BaseModel):
                    )

                self._append_message(formatted_answer.text, role="assistant")
-            except OutputParserError as e:  # noqa: PERF203
+            except OutputParserError as e:
                if self.verbose:
                    self._printer.print(
                        content="Failed to parse LLM output. Retrying...",
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -427,7 +427,7 @@ class LLM(BaseLLM):
                f"installed.\n\n"
                f"To fix this, either:\n"
                f"  1. Install LiteLLM for broad model support: "
-                f"uv add litellm\n"
+                f"uv add 'crewai[litellm]'\n"
                f"or\n"
                f"pip install litellm\n\n"
                f"For more details, see: "
--- a/lib/crewai/src/crewai/llms/base_llm.py
+++ b/lib/crewai/src/crewai/llms/base_llm.py
@@ -26,6 +26,7 @@ from crewai.events.types.llm_events import (
    LLMCallStartedEvent,
    LLMCallType,
    LLMStreamChunkEvent,
+    LLMThinkingChunkEvent,
 )
 from crewai.events.types.tool_usage_events import (
    ToolUsageErrorEvent,
@@ -368,9 +369,6 @@ class BaseLLM(ABC):
        """Emit LLM call started event."""
        from crewai.utilities.serialization import to_serializable

-        if not hasattr(crewai_event_bus, "emit"):
-            raise ValueError("crewai_event_bus does not have an emit method") from None
-
        crewai_event_bus.emit(
            self,
            event=LLMCallStartedEvent(
@@ -416,9 +414,6 @@ class BaseLLM(ABC):
        from_agent: Agent | None = None,
    ) -> None:
        """Emit LLM call failed event."""
-        if not hasattr(crewai_event_bus, "emit"):
-            raise ValueError("crewai_event_bus does not have an emit method") from None
-
        crewai_event_bus.emit(
            self,
            event=LLMCallFailedEvent(
@@ -449,9 +444,6 @@ class BaseLLM(ABC):
            call_type: The type of LLM call (LLM_CALL or TOOL_CALL).
            response_id: Unique ID for a particular LLM response, chunks have same response_id.
        """
-        if not hasattr(crewai_event_bus, "emit"):
-            raise ValueError("crewai_event_bus does not have an emit method") from None
-
        crewai_event_bus.emit(
            self,
            event=LLMStreamChunkEvent(
@@ -465,6 +457,32 @@ class BaseLLM(ABC):
            ),
        )

+    def _emit_thinking_chunk_event(
+        self,
+        chunk: str,
+        from_task: Task | None = None,
+        from_agent: Agent | None = None,
+        response_id: str | None = None,
+    ) -> None:
+        """Emit thinking/reasoning chunk event from a thinking model.
+
+        Args:
+            chunk: The thinking text content.
+            from_task: The task that initiated the call.
+            from_agent: The agent that initiated the call.
+            response_id: Unique ID for a particular LLM response.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=LLMThinkingChunkEvent(
+                chunk=chunk,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_id=response_id,
+                call_id=get_current_call_id(),
+            ),
+        )
+
    def _handle_tool_execution(
        self,
        function_name: str,
--- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
@@ -234,7 +234,7 @@ class BedrockCompletion(BaseLLM):
        aws_access_key_id: str | None = None,
        aws_secret_access_key: str | None = None,
        aws_session_token: str | None = None,
-        region_name: str = "us-east-1",
+        region_name: str | None = None,
        temperature: float | None = None,
        max_tokens: int | None = None,
        top_p: float | None = None,
@@ -287,15 +287,6 @@ class BedrockCompletion(BaseLLM):
            **kwargs,
        )

-        # Initialize Bedrock client with proper configuration
-        session = Session(
-            aws_access_key_id=aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"),
-            aws_secret_access_key=aws_secret_access_key
-            or os.getenv("AWS_SECRET_ACCESS_KEY"),
-            aws_session_token=aws_session_token or os.getenv("AWS_SESSION_TOKEN"),
-            region_name=region_name,
-        )
-
        # Configure client with timeouts and retries following AWS best practices
        config = Config(
            read_timeout=300,
@@ -306,8 +297,12 @@ class BedrockCompletion(BaseLLM):
            tcp_keepalive=True,
        )

-        self.client = session.client("bedrock-runtime", config=config)
-        self.region_name = region_name
+        self.region_name = (
+            region_name
+            or os.getenv("AWS_DEFAULT_REGION")
+            or os.getenv("AWS_REGION_NAME")
+            or "us-east-1"
+        )

        self.aws_access_key_id = aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID")
        self.aws_secret_access_key = aws_secret_access_key or os.getenv(
@@ -315,6 +310,16 @@ class BedrockCompletion(BaseLLM):
        )
        self.aws_session_token = aws_session_token or os.getenv("AWS_SESSION_TOKEN")

+        # Initialize Bedrock client with proper configuration
+        session = Session(
+            aws_access_key_id=self.aws_access_key_id,
+            aws_secret_access_key=self.aws_secret_access_key,
+            aws_session_token=self.aws_session_token,
+            region_name=self.region_name,
+        )
+
+        self.client = session.client("bedrock-runtime", config=config)
+
        self._async_exit_stack = AsyncExitStack() if AIOBOTOCORE_AVAILABLE else None
        self._async_client_initialized = False

--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -61,6 +61,7 @@ class GeminiCompletion(BaseLLM):
        interceptor: BaseInterceptor[Any, Any] | None = None,
        use_vertexai: bool | None = None,
        response_format: type[BaseModel] | None = None,
+        thinking_config: types.ThinkingConfig | None = None,
        **kwargs: Any,
    ):
        """Initialize Google Gemini chat completion client.
@@ -93,6 +94,10 @@ class GeminiCompletion(BaseLLM):
                         api_version="v1" is automatically configured.
            response_format: Pydantic model for structured output. Used as default when
                           response_model is not passed to call()/acall() methods.
+            thinking_config: ThinkingConfig for thinking models (gemini-2.5+, gemini-3+).
+                           Controls thought output via include_thoughts, thinking_budget,
+                           and thinking_level. When None, thinking models automatically
+                           get include_thoughts=True so thought content is surfaced.
            **kwargs: Additional parameters
        """
        if interceptor is not None:
@@ -139,6 +144,14 @@ class GeminiCompletion(BaseLLM):
            version_match and float(version_match.group(1)) >= 2.0
        )

+        self.thinking_config = thinking_config
+        if (
+            self.thinking_config is None
+            and version_match
+            and float(version_match.group(1)) >= 2.5
+        ):
+            self.thinking_config = types.ThinkingConfig(include_thoughts=True)
+
    @property
    def stop(self) -> list[str]:
        """Get stop sequences sent to the API."""
@@ -520,6 +533,9 @@ class GeminiCompletion(BaseLLM):
        if self.safety_settings:
            config_params["safety_settings"] = self.safety_settings

+        if self.thinking_config is not None:
+            config_params["thinking_config"] = self.thinking_config
+
        return types.GenerateContentConfig(**config_params)

    def _convert_tools_for_interference(  # type: ignore[override]
@@ -618,9 +634,17 @@ class GeminiCompletion(BaseLLM):
                function_response_part = types.Part.from_function_response(
                    name=tool_name, response=response_data
                )
-                contents.append(
-                    types.Content(role="user", parts=[function_response_part])
-                )
+                if (
+                    contents
+                    and contents[-1].role == "user"
+                    and contents[-1].parts
+                    and contents[-1].parts[-1].function_response is not None
+                ):
+                    contents[-1].parts.append(function_response_part)
+                else:
+                    contents.append(
+                        types.Content(role="user", parts=[function_response_part])
+                    )
            elif role == "assistant" and message.get("tool_calls"):
                raw_parts: list[Any] | None = message.get("raw_tool_call_parts")
                if raw_parts and all(isinstance(p, types.Part) for p in raw_parts):
@@ -894,7 +918,7 @@ class GeminiCompletion(BaseLLM):
        content = self._extract_text_from_response(response)

        effective_response_model = None if self.tools else response_model
-        if not effective_response_model:
+        if not response_model:
            content = self._apply_stop_words(content)

        return self._finalize_completion_response(
@@ -931,15 +955,6 @@ class GeminiCompletion(BaseLLM):
        if chunk.usage_metadata:
            usage_data = self._extract_token_usage(chunk)

-        if chunk.text:
-            full_response += chunk.text
-            self._emit_stream_chunk_event(
-                chunk=chunk.text,
-                from_task=from_task,
-                from_agent=from_agent,
-                response_id=response_id,
-            )
-
        if chunk.candidates:
            candidate = chunk.candidates[0]
            if candidate.content and candidate.content.parts:
@@ -976,6 +991,21 @@ class GeminiCompletion(BaseLLM):
                            call_type=LLMCallType.TOOL_CALL,
                            response_id=response_id,
                        )
+                    elif part.thought and part.text:
+                        self._emit_thinking_chunk_event(
+                            chunk=part.text,
+                            from_task=from_task,
+                            from_agent=from_agent,
+                            response_id=response_id,
+                        )
+                    elif part.text:
+                        full_response += part.text
+                        self._emit_stream_chunk_event(
+                            chunk=part.text,
+                            from_task=from_task,
+                            from_agent=from_agent,
+                            response_id=response_id,
+                        )

        return full_response, function_calls, usage_data

@@ -1329,7 +1359,7 @@ class GeminiCompletion(BaseLLM):
        text_parts = [
            part.text
            for part in candidate.content.parts
-            if hasattr(part, "text") and part.text
+            if part.text and not part.thought
        ]

        return "".join(text_parts)
--- a/lib/crewai/src/crewai/mcp/init.py
+++ b/lib/crewai/src/crewai/mcp/init.py
@@ -18,6 +18,7 @@ from crewai.mcp.filters import (
    create_dynamic_tool_filter,
    create_static_tool_filter,
 )
+from crewai.mcp.tool_resolver import MCPToolResolver
 from crewai.mcp.transports.base import BaseTransport, TransportType


@@ -28,6 +29,7 @@ __all__ = [
    "MCPServerHTTP",
    "MCPServerSSE",
    "MCPServerStdio",
+    "MCPToolResolver",
    "StaticToolFilter",
    "ToolFilter",
    "ToolFilterContext",
--- a/lib/crewai/src/crewai/mcp/client.py
+++ b/lib/crewai/src/crewai/mcp/client.py
@@ -6,7 +6,7 @@ from contextlib import AsyncExitStack
 from datetime import datetime
 import logging
 import time
-from typing import Any
+from typing import Any, NamedTuple

 from typing_extensions import Self

@@ -34,6 +34,13 @@ from crewai.mcp.transports.stdio import StdioTransport
 from crewai.utilities.string_utils import sanitize_tool_name


+class _MCPToolResult(NamedTuple):
+    """Internal result from an MCP tool call, carrying the ``isError`` flag."""
+
+    content: str
+    is_error: bool
+
+
 # MCP Connection timeout constants (in seconds)
 MCP_CONNECTION_TIMEOUT = 30  # Increased for slow servers
 MCP_TOOL_EXECUTION_TIMEOUT = 30
@@ -420,6 +427,7 @@ class MCPClient:
        return [
            {
                "name": sanitize_tool_name(tool.name),
+                "original_name": tool.name,
                "description": getattr(tool, "description", ""),
                "inputSchema": getattr(tool, "inputSchema", {}),
            }
@@ -461,29 +469,46 @@ class MCPClient:
        )

        try:
-            result = await self._retry_operation(
+            tool_result: _MCPToolResult = await self._retry_operation(
                lambda: self._call_tool_impl(tool_name, cleaned_arguments),
                timeout=self.execution_timeout,
            )

-            completed_at = datetime.now()
-            execution_duration_ms = (completed_at - started_at).total_seconds() * 1000
-            crewai_event_bus.emit(
-                self,
-                MCPToolExecutionCompletedEvent(
-                    server_name=server_name,
-                    server_url=server_url,
-                    transport_type=transport_type,
-                    tool_name=tool_name,
-                    tool_args=cleaned_arguments,
-                    result=result,
-                    started_at=started_at,
-                    completed_at=completed_at,
-                    execution_duration_ms=execution_duration_ms,
-                ),
-            )
+            finished_at = datetime.now()
+            execution_duration_ms = (finished_at - started_at).total_seconds() * 1000

-            return result
+            if tool_result.is_error:
+                crewai_event_bus.emit(
+                    self,
+                    MCPToolExecutionFailedEvent(
+                        server_name=server_name,
+                        server_url=server_url,
+                        transport_type=transport_type,
+                        tool_name=tool_name,
+                        tool_args=cleaned_arguments,
+                        error=tool_result.content,
+                        error_type="tool_error",
+                        started_at=started_at,
+                        failed_at=finished_at,
+                    ),
+                )
+            else:
+                crewai_event_bus.emit(
+                    self,
+                    MCPToolExecutionCompletedEvent(
+                        server_name=server_name,
+                        server_url=server_url,
+                        transport_type=transport_type,
+                        tool_name=tool_name,
+                        tool_args=cleaned_arguments,
+                        result=tool_result.content,
+                        started_at=started_at,
+                        completed_at=finished_at,
+                        execution_duration_ms=execution_duration_ms,
+                    ),
+                )
+
+            return tool_result.content
        except Exception as e:
            failed_at = datetime.now()
            error_type = (
@@ -564,23 +589,27 @@ class MCPClient:

        return cleaned

-    async def _call_tool_impl(self, tool_name: str, arguments: dict[str, Any]) -> Any:
+    async def _call_tool_impl(
+        self, tool_name: str, arguments: dict[str, Any]
+    ) -> _MCPToolResult:
        """Internal implementation of call_tool."""
        result = await asyncio.wait_for(
            self.session.call_tool(tool_name, arguments),
            timeout=self.execution_timeout,
        )

+        is_error = getattr(result, "isError", False) or False
+
        # Extract result content
        if hasattr(result, "content") and result.content:
            if isinstance(result.content, list) and len(result.content) > 0:
                content_item = result.content[0]
                if hasattr(content_item, "text"):
-                    return str(content_item.text)
-                return str(content_item)
-            return str(result.content)
+                    return _MCPToolResult(str(content_item.text), is_error)
+                return _MCPToolResult(str(content_item), is_error)
+            return _MCPToolResult(str(result.content), is_error)

-        return str(result)
+        return _MCPToolResult(str(result), is_error)

    async def list_prompts(self) -> list[dict[str, Any]]:
        """List available prompts from MCP server.
--- a/lib/crewai/src/crewai/mcp/tool_resolver.py
+++ b/lib/crewai/src/crewai/mcp/tool_resolver.py
@@ -0,0 +1,592 @@
+"""MCP tool resolution for CrewAI agents.
+
+This module extracts all MCP-related tool resolution logic from the Agent class
+into a standalone MCPToolResolver. It handles three flavours of MCP reference:
+
+  1. Native configs:   MCPServerStdio / MCPServerHTTP / MCPServerSSE objects.
+  2. HTTPS URLs:       e.g. "https://mcp.example.com/api"
+  3. AMP references:   e.g. "notion" or "notion#search" (legacy "crewai-amp:" prefix also works)
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from typing import TYPE_CHECKING, Any, Final, cast
+from urllib.parse import urlparse
+
+from crewai.mcp.client import MCPClient
+from crewai.mcp.config import (
+    MCPServerConfig,
+    MCPServerHTTP,
+    MCPServerSSE,
+    MCPServerStdio,
+)
+from crewai.mcp.transports.http import HTTPTransport
+from crewai.mcp.transports.sse import SSETransport
+from crewai.mcp.transports.stdio import StdioTransport
+
+
+if TYPE_CHECKING:
+    from crewai.tools.base_tool import BaseTool
+    from crewai.utilities.logger import Logger
+
+MCP_CONNECTION_TIMEOUT: Final[int] = 10
+MCP_TOOL_EXECUTION_TIMEOUT: Final[int] = 30
+MCP_DISCOVERY_TIMEOUT: Final[int] = 15
+MCP_MAX_RETRIES: Final[int] = 3
+
+_mcp_schema_cache: dict[str, Any] = {}
+_cache_ttl: Final[int] = 300  # 5 minutes
+
+
+class MCPToolResolver:
+    """Resolves MCP server references / configs into CrewAI ``BaseTool`` instances.
+
+    Typical lifecycle::
+
+        resolver = MCPToolResolver(agent=my_agent, logger=my_agent._logger)
+        tools = resolver.resolve(my_agent.mcps)
+        # … agent executes tasks using *tools* …
+        resolver.cleanup()
+
+    The resolver owns the MCP client connections it creates and is responsible
+    for tearing them down via :meth:`cleanup`.
+    """
+
+    def __init__(self, agent: Any, logger: Logger) -> None:
+        self._agent = agent
+        self._logger = logger
+        self._clients: list[Any] = []
+
+    @property
+    def clients(self) -> list[Any]:
+        return list(self._clients)
+
+    def resolve(self, mcps: list[str | MCPServerConfig]) -> list[BaseTool]:
+        """Convert MCP server references/configs to CrewAI tools."""
+        all_tools: list[BaseTool] = []
+        amp_refs: list[tuple[str, str | None]] = []
+
+        for mcp_config in mcps:
+            if isinstance(mcp_config, str) and mcp_config.startswith("https://"):
+                all_tools.extend(self._resolve_external(mcp_config))
+            elif isinstance(mcp_config, str):
+                amp_refs.append(self._parse_amp_ref(mcp_config))
+            else:
+                tools, client = self._resolve_native(mcp_config)
+                all_tools.extend(tools)
+                if client:
+                    self._clients.append(client)
+
+        if amp_refs:
+            tools, clients = self._resolve_amp(amp_refs)
+            all_tools.extend(tools)
+            self._clients.extend(clients)
+
+        return all_tools
+
+    def cleanup(self) -> None:
+        """Disconnect all MCP client connections."""
+        if not self._clients:
+            return
+
+        async def _disconnect_all() -> None:
+            for client in self._clients:
+                if client and hasattr(client, "connected") and client.connected:
+                    await client.disconnect()
+
+        try:
+            asyncio.run(_disconnect_all())
+        except Exception as e:
+            self._logger.log("error", f"Error during MCP client cleanup: {e}")
+        finally:
+            self._clients.clear()
+
+    @staticmethod
+    def _parse_amp_ref(mcp_config: str) -> tuple[str, str | None]:
+        """Parse an AMP reference into *(slug, optional tool name)*.
+
+        Accepts both bare slugs (``"notion"``, ``"notion#search"``) and the
+        legacy ``"crewai-amp:notion"`` form.
+        """
+        bare = mcp_config.removeprefix("crewai-amp:")
+        slug, _, specific_tool = bare.partition("#")
+        return slug, specific_tool or None
+
+    def _resolve_amp(
+        self, amp_refs: list[tuple[str, str | None]]
+    ) -> tuple[list[BaseTool], list[Any]]:
+        """Fetch AMP configs in bulk and return their tools and clients.
+
+        Resolves each unique slug only once (single connection per server),
+        then applies per-ref tool filters to select specific tools.
+        """
+        from crewai.events.event_bus import crewai_event_bus
+        from crewai.events.types.mcp_events import MCPConfigFetchFailedEvent
+
+        unique_slugs = list(dict.fromkeys(slug for slug, _ in amp_refs))
+        amp_configs_map = self._fetch_amp_mcp_configs(unique_slugs)
+
+        all_tools: list[BaseTool] = []
+        all_clients: list[Any] = []
+
+        resolved_cache: dict[str, tuple[list[BaseTool], Any | None]] = {}
+
+        for slug in unique_slugs:
+            config_dict = amp_configs_map.get(slug)
+            if not config_dict:
+                crewai_event_bus.emit(
+                    self,
+                    MCPConfigFetchFailedEvent(
+                        slug=slug,
+                        error=f"Config for '{slug}' not found. Make sure it is connected in your account.",
+                        error_type="not_connected",
+                    ),
+                )
+                continue
+
+            mcp_server_config = self._build_mcp_config_from_dict(config_dict)
+
+            try:
+                tools, client = self._resolve_native(mcp_server_config)
+                resolved_cache[slug] = (tools, client)
+                if client:
+                    all_clients.append(client)
+            except Exception as e:
+                crewai_event_bus.emit(
+                    self,
+                    MCPConfigFetchFailedEvent(
+                        slug=slug,
+                        error=str(e),
+                        error_type="connection_failed",
+                    ),
+                )
+
+        for slug, specific_tool in amp_refs:
+            cached = resolved_cache.get(slug)
+            if not cached:
+                continue
+
+            slug_tools, _ = cached
+            if specific_tool:
+                all_tools.extend(
+                    t for t in slug_tools if t.name.endswith(f"_{specific_tool}")
+                )
+            else:
+                all_tools.extend(slug_tools)
+
+        return all_tools, all_clients
+
+    def _fetch_amp_mcp_configs(self, slugs: list[str]) -> dict[str, dict[str, Any]]:
+        """Fetch MCP server configurations via CrewAI+ API.
+
+        Sends a GET request to the CrewAI+ mcps/configs endpoint with
+        comma-separated slugs. CrewAI+ proxies the request to crewai-oauth.
+
+        API-level failures return ``{}``; individual slugs will then
+        surface as ``MCPConfigFetchFailedEvent`` in :meth:`_resolve_amp`.
+        """
+        import httpx
+
+        try:
+            from crewai_tools.tools.crewai_platform_tools.misc import (
+                get_platform_integration_token,
+            )
+
+            from crewai.cli.plus_api import PlusAPI
+
+            plus_api = PlusAPI(api_key=get_platform_integration_token())
+            response = plus_api.get_mcp_configs(slugs)
+
+            if response.status_code == 200:
+                configs: dict[str, dict[str, Any]] = response.json().get("configs", {})
+                return configs
+
+            self._logger.log(
+                "debug",
+                f"Failed to fetch MCP configs: HTTP {response.status_code}",
+            )
+            return {}
+
+        except httpx.HTTPError as e:
+            self._logger.log("debug", f"Failed to fetch MCP configs: {e}")
+            return {}
+        except Exception as e:
+            self._logger.log("debug", f"Cannot fetch AMP MCP configs: {e}")
+            return {}
+
+    def _resolve_external(self, mcp_ref: str) -> list[BaseTool]:
+        """Resolve an HTTPS MCP server URL into tools."""
+        from crewai.tools.mcp_tool_wrapper import MCPToolWrapper
+
+        if "#" in mcp_ref:
+            server_url, specific_tool = mcp_ref.split("#", 1)
+        else:
+            server_url, specific_tool = mcp_ref, None
+
+        server_params = {"url": server_url}
+        server_name = self._extract_server_name(server_url)
+
+        try:
+            tool_schemas = self._get_mcp_tool_schemas(server_params)
+
+            if not tool_schemas:
+                self._logger.log(
+                    "warning", f"No tools discovered from MCP server: {server_url}"
+                )
+                return []
+
+            tools = []
+            for tool_name, schema in tool_schemas.items():
+                if specific_tool and tool_name != specific_tool:
+                    continue
+
+                try:
+                    wrapper = MCPToolWrapper(
+                        mcp_server_params=server_params,
+                        tool_name=tool_name,
+                        tool_schema=schema,
+                        server_name=server_name,
+                    )
+                    tools.append(wrapper)
+                except Exception as e:
+                    self._logger.log(
+                        "warning",
+                        f"Failed to create MCP tool wrapper for {tool_name}: {e}",
+                    )
+                    continue
+
+            if specific_tool and not tools:
+                self._logger.log(
+                    "warning",
+                    f"Specific tool '{specific_tool}' not found on MCP server: {server_url}",
+                )
+
+            return cast(list[BaseTool], tools)
+
+        except Exception as e:
+            self._logger.log(
+                "warning", f"Failed to connect to MCP server {server_url}: {e}"
+            )
+            return []
+
+    def _resolve_native(
+        self, mcp_config: MCPServerConfig
+    ) -> tuple[list[BaseTool], Any | None]:
+        """Resolve an ``MCPServerConfig`` into tools, returning the client for cleanup."""
+        from crewai.tools.base_tool import BaseTool
+        from crewai.tools.mcp_native_tool import MCPNativeTool
+
+        transport: StdioTransport | HTTPTransport | SSETransport
+        if isinstance(mcp_config, MCPServerStdio):
+            transport = StdioTransport(
+                command=mcp_config.command,
+                args=mcp_config.args,
+                env=mcp_config.env,
+            )
+            server_name = f"{mcp_config.command}_{'_'.join(mcp_config.args)}"
+        elif isinstance(mcp_config, MCPServerHTTP):
+            transport = HTTPTransport(
+                url=mcp_config.url,
+                headers=mcp_config.headers,
+                streamable=mcp_config.streamable,
+            )
+            server_name = self._extract_server_name(mcp_config.url)
+        elif isinstance(mcp_config, MCPServerSSE):
+            transport = SSETransport(
+                url=mcp_config.url,
+                headers=mcp_config.headers,
+            )
+            server_name = self._extract_server_name(mcp_config.url)
+        else:
+            raise ValueError(f"Unsupported MCP server config type: {type(mcp_config)}")
+
+        client = MCPClient(
+            transport=transport,
+            cache_tools_list=mcp_config.cache_tools_list,
+        )
+
+        async def _setup_client_and_list_tools() -> list[dict[str, Any]]:
+            try:
+                if not client.connected:
+                    await client.connect()
+
+                tools_list = await client.list_tools()
+
+                try:
+                    await client.disconnect()
+                    await asyncio.sleep(0.1)
+                except Exception as e:
+                    self._logger.log("error", f"Error during disconnect: {e}")
+
+                return tools_list
+            except Exception as e:
+                if client.connected:
+                    await client.disconnect()
+                    await asyncio.sleep(0.1)
+                raise RuntimeError(
+                    f"Error during setup client and list tools: {e}"
+                ) from e
+
+        try:
+            try:
+                asyncio.get_running_loop()
+                import concurrent.futures
+
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(
+                        asyncio.run, _setup_client_and_list_tools()
+                    )
+                    tools_list = future.result()
+            except RuntimeError:
+                try:
+                    tools_list = asyncio.run(_setup_client_and_list_tools())
+                except RuntimeError as e:
+                    error_msg = str(e).lower()
+                    if "cancel scope" in error_msg or "task" in error_msg:
+                        raise ConnectionError(
+                            "MCP connection failed due to event loop cleanup issues. "
+                            "This may be due to authentication errors or server unavailability."
+                        ) from e
+                except asyncio.CancelledError as e:
+                    raise ConnectionError(
+                        "MCP connection was cancelled. This may indicate an authentication "
+                        "error or server unavailability."
+                    ) from e
+
+            if mcp_config.tool_filter:
+                filtered_tools = []
+                for tool in tools_list:
+                    if callable(mcp_config.tool_filter):
+                        try:
+                            from crewai.mcp.filters import ToolFilterContext
+
+                            context = ToolFilterContext(
+                                agent=self._agent,
+                                server_name=server_name,
+                                run_context=None,
+                            )
+                            if mcp_config.tool_filter(context, tool):  # type: ignore[call-arg, arg-type]
+                                filtered_tools.append(tool)
+                        except (TypeError, AttributeError):
+                            if mcp_config.tool_filter(tool):  # type: ignore[call-arg, arg-type]
+                                filtered_tools.append(tool)
+                    else:
+                        filtered_tools.append(tool)
+                tools_list = filtered_tools
+
+            tools = []
+            for tool_def in tools_list:
+                tool_name = tool_def.get("name", "")
+                original_tool_name = tool_def.get("original_name", tool_name)
+                if not tool_name:
+                    continue
+
+                args_schema = None
+                if tool_def.get("inputSchema"):
+                    args_schema = self._json_schema_to_pydantic(
+                        tool_name, tool_def["inputSchema"]
+                    )
+
+                tool_schema = {
+                    "description": tool_def.get("description", ""),
+                    "args_schema": args_schema,
+                }
+
+                try:
+                    native_tool = MCPNativeTool(
+                        mcp_client=client,
+                        tool_name=tool_name,
+                        tool_schema=tool_schema,
+                        server_name=server_name,
+                        original_tool_name=original_tool_name,
+                    )
+                    tools.append(native_tool)
+                except Exception as e:
+                    self._logger.log("error", f"Failed to create native MCP tool: {e}")
+                    continue
+
+            return cast(list[BaseTool], tools), client
+        except Exception as e:
+            if client.connected:
+                asyncio.run(client.disconnect())
+
+            raise RuntimeError(f"Failed to get native MCP tools: {e}") from e
+
+    @staticmethod
+    def _build_mcp_config_from_dict(
+        config_dict: dict[str, Any],
+    ) -> MCPServerConfig:
+        """Convert a config dict from crewai-oauth into an MCPServerConfig."""
+        config_type = config_dict.get("type", "http")
+
+        if config_type == "sse":
+            return MCPServerSSE(
+                url=config_dict["url"],
+                headers=config_dict.get("headers"),
+                cache_tools_list=config_dict.get("cache_tools_list", False),
+            )
+
+        return MCPServerHTTP(
+            url=config_dict["url"],
+            headers=config_dict.get("headers"),
+            streamable=config_dict.get("streamable", True),
+            cache_tools_list=config_dict.get("cache_tools_list", False),
+        )
+
+    @staticmethod
+    def _extract_server_name(server_url: str) -> str:
+        """Extract clean server name from URL for tool prefixing."""
+        parsed = urlparse(server_url)
+        domain = parsed.netloc.replace(".", "_")
+        path = parsed.path.replace("/", "_").strip("_")
+        return f"{domain}_{path}" if path else domain
+
+    def _get_mcp_tool_schemas(
+        self, server_params: dict[str, Any]
+    ) -> dict[str, dict[str, Any]]:
+        """Get tool schemas from MCP server with caching."""
+        server_url = server_params["url"]
+
+        cache_key = server_url
+        current_time = time.time()
+
+        if cache_key in _mcp_schema_cache:
+            cached_data, cache_time = _mcp_schema_cache[cache_key]
+            if current_time - cache_time < _cache_ttl:
+                self._logger.log(
+                    "debug", f"Using cached MCP tool schemas for {server_url}"
+                )
+                return cached_data  # type: ignore[no-any-return]
+
+        try:
+            schemas = asyncio.run(self._get_mcp_tool_schemas_async(server_params))
+            _mcp_schema_cache[cache_key] = (schemas, current_time)
+            return schemas
+        except Exception as e:
+            self._logger.log(
+                "warning", f"Failed to get MCP tool schemas from {server_url}: {e}"
+            )
+            return {}
+
+    async def _get_mcp_tool_schemas_async(
+        self, server_params: dict[str, Any]
+    ) -> dict[str, dict[str, Any]]:
+        """Async implementation of MCP tool schema retrieval."""
+        server_url = server_params["url"]
+        return await self._retry_mcp_discovery(
+            self._discover_mcp_tools_with_timeout, server_url
+        )
+
+    async def _retry_mcp_discovery(
+        self, operation_func: Any, server_url: str
+    ) -> dict[str, dict[str, Any]]:
+        """Retry MCP discovery with exponential backoff."""
+        last_error = None
+
+        for attempt in range(MCP_MAX_RETRIES):
+            result, error, should_retry = await self._attempt_mcp_discovery(
+                operation_func, server_url
+            )
+
+            if result is not None:
+                return result
+
+            if not should_retry:
+                raise RuntimeError(error)
+
+            last_error = error
+            if attempt < MCP_MAX_RETRIES - 1:
+                wait_time = 2**attempt
+                await asyncio.sleep(wait_time)
+
+        raise RuntimeError(
+            f"Failed to discover MCP tools after {MCP_MAX_RETRIES} attempts: {last_error}"
+        )
+
+    @staticmethod
+    async def _attempt_mcp_discovery(
+        operation_func: Any, server_url: str
+    ) -> tuple[dict[str, dict[str, Any]] | None, str, bool]:
+        """Attempt single MCP discovery; returns *(result, error_message, should_retry)*."""
+        try:
+            result = await operation_func(server_url)
+            return result, "", False
+
+        except ImportError:
+            return (
+                None,
+                "MCP library not available. Please install with: pip install mcp",
+                False,
+            )
+
+        except asyncio.TimeoutError:
+            return (
+                None,
+                f"MCP discovery timed out after {MCP_DISCOVERY_TIMEOUT} seconds",
+                True,
+            )
+
+        except Exception as e:
+            error_str = str(e).lower()
+
+            if "authentication" in error_str or "unauthorized" in error_str:
+                return None, f"Authentication failed for MCP server: {e!s}", False
+            if "connection" in error_str or "network" in error_str:
+                return None, f"Network connection failed: {e!s}", True
+            if "json" in error_str or "parsing" in error_str:
+                return None, f"Server response parsing error: {e!s}", True
+            return None, f"MCP discovery error: {e!s}", False
+
+    async def _discover_mcp_tools_with_timeout(
+        self, server_url: str
+    ) -> dict[str, dict[str, Any]]:
+        """Discover MCP tools with timeout wrapper."""
+        return await asyncio.wait_for(
+            self._discover_mcp_tools(server_url), timeout=MCP_DISCOVERY_TIMEOUT
+        )
+
+    async def _discover_mcp_tools(self, server_url: str) -> dict[str, dict[str, Any]]:
+        """Discover tools from an MCP server (HTTPS / streamable-HTTP path)."""
+        from mcp import ClientSession
+        from mcp.client.streamable_http import streamablehttp_client
+
+        from crewai.utilities.string_utils import sanitize_tool_name
+
+        async with streamablehttp_client(server_url) as (read, write, _):
+            async with ClientSession(read, write) as session:
+                await asyncio.wait_for(
+                    session.initialize(), timeout=MCP_CONNECTION_TIMEOUT
+                )
+
+                tools_result = await asyncio.wait_for(
+                    session.list_tools(),
+                    timeout=MCP_DISCOVERY_TIMEOUT - MCP_CONNECTION_TIMEOUT,
+                )
+
+                schemas = {}
+                for tool in tools_result.tools:
+                    args_schema = None
+                    if hasattr(tool, "inputSchema") and tool.inputSchema:
+                        args_schema = self._json_schema_to_pydantic(
+                            sanitize_tool_name(tool.name), tool.inputSchema
+                        )
+
+                    schemas[sanitize_tool_name(tool.name)] = {
+                        "description": getattr(tool, "description", ""),
+                        "args_schema": args_schema,
+                    }
+                return schemas
+
+    @staticmethod
+    def _json_schema_to_pydantic(tool_name: str, json_schema: dict[str, Any]) -> type:
+        """Convert JSON Schema to a Pydantic model for tool arguments."""
+        from crewai.utilities.pydantic_schema_utils import create_model_from_schema
+
+        model_name = f"{tool_name.replace('-', '_').replace(' ', '_')}Schema"
+        return create_model_from_schema(
+            json_schema,
+            model_name=model_name,
+            enrich_descriptions=True,
+        )
--- a/lib/crewai/src/crewai/memory/init.py
+++ b/lib/crewai/src/crewai/memory/init.py
@@ -1,6 +1,14 @@
-"""Memory module: unified Memory with LLM analysis and pluggable storage."""
+"""Memory module: unified Memory with LLM analysis and pluggable storage.
+
+Heavy dependencies are lazily imported so that
+``import crewai`` does not initialise at runtime — critical for
+Celery pre-fork and similar deployment patterns.
+"""
+
+from __future__ import annotations
+
+from typing import Any

-from crewai.memory.encoding_flow import EncodingFlow
 from crewai.memory.memory_scope import MemoryScope, MemorySlice
 from crewai.memory.types import (
    MemoryMatch,
@@ -10,7 +18,25 @@ from crewai.memory.types import (
    embed_text,
    embed_texts,
 )
-from crewai.memory.unified_memory import Memory
+
+
+_LAZY_IMPORTS: dict[str, tuple[str, str]] = {
+    "Memory": ("crewai.memory.unified_memory", "Memory"),
+    "EncodingFlow": ("crewai.memory.encoding_flow", "EncodingFlow"),
+}
+
+
+def __getattr__(name: str) -> Any:
+    """Lazily import Memory / EncodingFlow to avoid pulling in lancedb at import time."""
+    if name in _LAZY_IMPORTS:
+        import importlib
+
+        module_path, attr = _LAZY_IMPORTS[name]
+        mod = importlib.import_module(module_path)
+        val = getattr(mod, attr)
+        globals()[name] = val
+        return val
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


 __all__ = [
--- a/lib/crewai/src/crewai/memory/memory_scope.py
+++ b/lib/crewai/src/crewai/memory/memory_scope.py
@@ -3,11 +3,9 @@
 from __future__ import annotations

 from datetime import datetime
-from typing import TYPE_CHECKING, Any
+from typing import Any, Literal

-
-if TYPE_CHECKING:
-    from crewai.memory.unified_memory import Memory
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator

 from crewai.memory.types import (
    _RECALL_OVERSAMPLE_FACTOR,
@@ -15,22 +13,38 @@ from crewai.memory.types import (
    MemoryRecord,
    ScopeInfo,
 )
+from crewai.memory.unified_memory import Memory


-class MemoryScope:
+class MemoryScope(BaseModel):
    """View of Memory restricted to a root path. All operations are scoped under that path."""

-    def __init__(self, memory: Memory, root_path: str) -> None:
-        """Initialize scope.
+    model_config = ConfigDict(arbitrary_types_allowed=True)

-        Args:
-            memory: The underlying Memory instance.
-            root_path: Root path for this scope (e.g. /agent/1).
-        """
-        self._memory = memory
-        self._root = root_path.rstrip("/") or ""
-        if self._root and not self._root.startswith("/"):
-            self._root = "/" + self._root
+    root_path: str = Field(default="/")
+
+    _memory: Memory = PrivateAttr()
+    _root: str = PrivateAttr()
+
+    @model_validator(mode="wrap")
+    @classmethod
+    def _accept_memory(cls, data: Any, handler: Any) -> MemoryScope:
+        """Extract memory dependency and normalize root path before validation."""
+        if isinstance(data, MemoryScope):
+            return data
+        memory = data.pop("memory")
+        instance: MemoryScope = handler(data)
+        instance._memory = memory
+        root = instance.root_path.rstrip("/") or ""
+        if root and not root.startswith("/"):
+            root = "/" + root
+        instance._root = root
+        return instance
+
+    @property
+    def read_only(self) -> bool:
+        """Whether the underlying memory is read-only."""
+        return self._memory.read_only

    def _scope_path(self, scope: str | None) -> str:
        if not scope or scope == "/":
@@ -52,7 +66,7 @@ class MemoryScope:
        importance: float | None = None,
        source: str | None = None,
        private: bool = False,
-    ) -> MemoryRecord:
+    ) -> MemoryRecord | None:
        """Remember content; scope is relative to this scope's root."""
        path = self._scope_path(scope)
        return self._memory.remember(
@@ -71,7 +85,7 @@ class MemoryScope:
        scope: str | None = None,
        categories: list[str] | None = None,
        limit: int = 10,
-        depth: str = "deep",
+        depth: Literal["shallow", "deep"] = "deep",
        source: str | None = None,
        include_private: bool = False,
    ) -> list[MemoryMatch]:
@@ -138,34 +152,34 @@ class MemoryScope:
        """Return a narrower scope under this scope."""
        child = path.strip("/")
        if not child:
-            return MemoryScope(self._memory, self._root or "/")
+            return MemoryScope(memory=self._memory, root_path=self._root or "/")
        base = self._root.rstrip("/") or ""
        new_root = f"{base}/{child}" if base else f"/{child}"
-        return MemoryScope(self._memory, new_root)
+        return MemoryScope(memory=self._memory, root_path=new_root)


-class MemorySlice:
-    """View over multiple scopes: recall searches all, remember requires explicit scope unless read_only."""
+class MemorySlice(BaseModel):
+    """View over multiple scopes: recall searches all, remember is a no-op when read_only."""

-    def __init__(
-        self,
-        memory: Memory,
-        scopes: list[str],
-        categories: list[str] | None = None,
-        read_only: bool = True,
-    ) -> None:
-        """Initialize slice.
+    model_config = ConfigDict(arbitrary_types_allowed=True)

-        Args:
-            memory: The underlying Memory instance.
-            scopes: List of scope paths to include.
-            categories: Optional category filter for recall.
-            read_only: If True, remember() raises PermissionError.
-        """
-        self._memory = memory
-        self._scopes = [s.rstrip("/") or "/" for s in scopes]
-        self._categories = categories
-        self._read_only = read_only
+    scopes: list[str] = Field(default_factory=list)
+    categories: list[str] | None = Field(default=None)
+    read_only: bool = Field(default=True)
+
+    _memory: Memory = PrivateAttr()
+
+    @model_validator(mode="wrap")
+    @classmethod
+    def _accept_memory(cls, data: Any, handler: Any) -> MemorySlice:
+        """Extract memory dependency and normalize scopes before validation."""
+        if isinstance(data, MemorySlice):
+            return data
+        memory = data.pop("memory")
+        data["scopes"] = [s.rstrip("/") or "/" for s in data.get("scopes", [])]
+        instance: MemorySlice = handler(data)
+        instance._memory = memory
+        return instance

    def remember(
        self,
@@ -176,10 +190,10 @@ class MemorySlice:
        importance: float | None = None,
        source: str | None = None,
        private: bool = False,
-    ) -> MemoryRecord:
-        """Remember into an explicit scope. Required when read_only=False."""
-        if self._read_only:
-            raise PermissionError("This MemorySlice is read-only")
+    ) -> MemoryRecord | None:
+        """Remember into an explicit scope. No-op when read_only=True."""
+        if self.read_only:
+            return None
        return self._memory.remember(
            content,
            scope=scope,
@@ -196,14 +210,14 @@ class MemorySlice:
        scope: str | None = None,
        categories: list[str] | None = None,
        limit: int = 10,
-        depth: str = "deep",
+        depth: Literal["shallow", "deep"] = "deep",
        source: str | None = None,
        include_private: bool = False,
    ) -> list[MemoryMatch]:
        """Recall across all slice scopes; results merged and re-ranked."""
-        cats = categories or self._categories
+        cats = categories or self.categories
        all_matches: list[MemoryMatch] = []
-        for sc in self._scopes:
+        for sc in self.scopes:
            matches = self._memory.recall(
                query,
                scope=sc,
@@ -231,7 +245,7 @@ class MemorySlice:
    def list_scopes(self, path: str = "/") -> list[str]:
        """List scopes across all slice roots."""
        out: list[str] = []
-        for sc in self._scopes:
+        for sc in self.scopes:
            full = f"{sc.rstrip('/')}{path}" if sc != "/" else path
            out.extend(self._memory.list_scopes(full))
        return sorted(set(out))
@@ -243,15 +257,23 @@ class MemorySlice:
        oldest: datetime | None = None
        newest: datetime | None = None
        children: list[str] = []
-        for sc in self._scopes:
+        for sc in self.scopes:
            full = f"{sc.rstrip('/')}{path}" if sc != "/" else path
            inf = self._memory.info(full)
            total_records += inf.record_count
            all_categories.update(inf.categories)
            if inf.oldest_record:
-                oldest = inf.oldest_record if oldest is None else min(oldest, inf.oldest_record)
+                oldest = (
+                    inf.oldest_record
+                    if oldest is None
+                    else min(oldest, inf.oldest_record)
+                )
            if inf.newest_record:
-                newest = inf.newest_record if newest is None else max(newest, inf.newest_record)
+                newest = (
+                    inf.newest_record
+                    if newest is None
+                    else max(newest, inf.newest_record)
+                )
            children.extend(inf.child_scopes)
        return ScopeInfo(
            path=path,
@@ -265,7 +287,7 @@ class MemorySlice:
    def list_categories(self, path: str | None = None) -> dict[str, int]:
        """Categories and counts across slice scopes."""
        counts: dict[str, int] = {}
-        for sc in self._scopes:
+        for sc in self.scopes:
            full = (f"{sc.rstrip('/')}{path}" if sc != "/" else path) if path else sc
            for k, v in self._memory.list_categories(full).items():
                counts[k] = counts.get(k, 0) + v
--- a/lib/crewai/src/crewai/memory/recall_flow.py
+++ b/lib/crewai/src/crewai/memory/recall_flow.py
@@ -2,7 +2,6 @@

 Implements adaptive-depth retrieval with:
 - LLM query distillation into targeted sub-queries
- Keyword-driven category filtering
 - Time-based filtering from temporal hints
 - Parallel multi-query, multi-scope search
 - Confidence-based routing with iterative deepening (budget loop)
@@ -37,7 +36,6 @@ class RecallState(BaseModel):
    query: str = ""
    scope: str | None = None
    categories: list[str] | None = None
-    inferred_categories: list[str] = Field(default_factory=list)
    time_cutoff: datetime | None = None
    source: str | None = None
    include_private: bool = False
@@ -82,11 +80,8 @@ class RecallFlow(Flow[RecallState]):
    # ------------------------------------------------------------------

    def _merged_categories(self) -> list[str] | None:
-        """Merge caller-supplied and LLM-inferred categories."""
-        merged = list(
-            set((self.state.categories or []) + self.state.inferred_categories)
-        )
-        return merged or None
+        """Return caller-supplied categories, or None if empty."""
+        return self.state.categories or None

    def _do_search(self) -> list[dict[str, Any]]:
        """Run parallel search across (embeddings x scopes) with filters.
@@ -212,10 +207,6 @@ class RecallFlow(Flow[RecallState]):
            )
            self.state.query_analysis = analysis

-            # Wire keywords -> category filter
-            if analysis.keywords:
-                self.state.inferred_categories = analysis.keywords
-
            # Parse time_filter into a datetime cutoff
            if analysis.time_filter:
                try:
--- a/lib/crewai/src/crewai/memory/storage/lancedb_storage.py
+++ b/lib/crewai/src/crewai/memory/storage/lancedb_storage.py
@@ -53,6 +53,7 @@ class LanceDBStorage:
        path: str | Path | None = None,
        table_name: str = "memories",
        vector_dim: int | None = None,
+        compact_every: int = 100,
    ) -> None:
        """Initialize LanceDB storage.

@@ -64,6 +65,10 @@ class LanceDBStorage:
            vector_dim: Dimensionality of the embedding vector. When ``None``
                  (default), the dimension is auto-detected from the existing
                  table schema or from the first saved embedding.
+            compact_every: Number of ``save()`` calls between automatic
+                  background compactions.  Each ``save()`` creates one new
+                  fragment file; compaction merges them, keeping query
+                  performance consistent.  Set to 0 to disable.
        """
        if path is None:
            storage_dir = os.environ.get("CREWAI_STORAGE_DIR")
@@ -78,6 +83,22 @@ class LanceDBStorage:
        self._table_name = table_name
        self._db = lancedb.connect(str(self._path))

+        # On macOS and Linux the default per-process open-file limit is 256.
+        # A LanceDB table stores one file per fragment (one fragment per save()
+        # call by default).  With hundreds of fragments, a single full-table
+        # scan opens all of them simultaneously, exhausting the limit.
+        # Raise it proactively so scans on large tables never hit OS error 24.
+        try:
+            import resource
+            soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
+            if soft < 4096:
+                resource.setrlimit(resource.RLIMIT_NOFILE, (min(hard, 4096), hard))
+        except Exception:  # noqa: S110
+            pass  # Windows or already at the max hard limit — safe to ignore
+
+        self._compact_every = compact_every
+        self._save_count = 0
+
        # Get or create a shared write lock for this database path.
        resolved = str(self._path.resolve())
        with LanceDBStorage._path_locks_guard:
@@ -91,6 +112,11 @@ class LanceDBStorage:
        try:
            self._table: lancedb.table.Table | None = self._db.open_table(self._table_name)
            self._vector_dim: int = self._infer_dim_from_table(self._table)
+            # Best-effort: create the scope index if it doesn't exist yet.
+            self._ensure_scope_index()
+            # Compact in the background if the table has accumulated many
+            # fragments from previous runs (each save() creates one).
+            self._compact_if_needed()
        except Exception:
            self._table = None
            self._vector_dim = vector_dim or 0  # 0 = not yet known
@@ -178,6 +204,56 @@ class LanceDBStorage:
        table.delete("id = '__schema_placeholder__'")
        return table

+    def _ensure_scope_index(self) -> None:
+        """Create a BTREE scalar index on the ``scope`` column if not present.
+
+        A scalar index lets LanceDB skip a full table scan when filtering by
+        scope prefix, which is the hot path for ``list_records``,
+        ``get_scope_info``, and ``list_scopes``.  The call is best-effort:
+        if the table is empty or the index already exists the exception is
+        swallowed silently.
+        """
+        if self._table is None:
+            return
+        try:
+            self._table.create_scalar_index("scope", index_type="BTREE", replace=False)
+        except Exception:  # noqa: S110
+            pass  # index already exists, table empty, or unsupported version
+
+    # ------------------------------------------------------------------
+    # Automatic background compaction
+    # ------------------------------------------------------------------
+
+    def _compact_if_needed(self) -> None:
+        """Spawn a background compaction on startup.
+
+        Called whenever an existing table is opened so that fragments
+        accumulated in previous sessions are silently merged before the
+        first query.  ``optimize()`` returns quickly when the table is
+        already compact, so the cost is negligible in the common case.
+        """
+        if self._table is None or self._compact_every <= 0:
+            return
+        self._compact_async()
+
+    def _compact_async(self) -> None:
+        """Fire-and-forget: compact the table in a daemon background thread."""
+        threading.Thread(
+            target=self._compact_safe,
+            daemon=True,
+            name="lancedb-compact",
+        ).start()
+
+    def _compact_safe(self) -> None:
+        """Run ``table.optimize()`` in a background thread, absorbing errors."""
+        try:
+            if self._table is not None:
+                self._table.optimize()
+                # Refresh the scope index so new fragments are covered.
+                self._ensure_scope_index()
+        except Exception:
+            _logger.debug("LanceDB background compaction failed", exc_info=True)
+
    def _ensure_table(self, vector_dim: int | None = None) -> lancedb.table.Table:
        """Return the table, creating it lazily if needed.

@@ -239,6 +315,7 @@ class LanceDBStorage:
            if r.embedding and len(r.embedding) > 0:
                dim = len(r.embedding)
                break
+        is_new_table = self._table is None
        with self._write_lock:
            self._ensure_table(vector_dim=dim)
            rows = [self._record_to_row(r) for r in records]
@@ -246,6 +323,13 @@ class LanceDBStorage:
                if r["vector"] is None or len(r["vector"]) != self._vector_dim:
                    r["vector"] = [0.0] * self._vector_dim
            self._retry_write("add", rows)
+        # Create the scope index on the first save so it covers the initial dataset.
+        if is_new_table:
+            self._ensure_scope_index()
+        # Auto-compact every N saves so fragment files don't pile up.
+        self._save_count += 1
+        if self._compact_every > 0 and self._save_count % self._compact_every == 0:
+            self._compact_async()

    def update(self, record: MemoryRecord) -> None:
        """Update a record by ID. Preserves created_at, updates last_accessed."""
@@ -261,6 +345,10 @@ class LanceDBStorage:
    def touch_records(self, record_ids: list[str]) -> None:
        """Update last_accessed to now for the given record IDs.

+        Uses a single batch ``table.update()`` call instead of N
+        delete-and-re-add cycles, which is both faster and avoids
+        unnecessary write amplification.
+
        Args:
            record_ids: IDs of records to touch.
        """
@@ -268,25 +356,20 @@ class LanceDBStorage:
            return
        with self._write_lock:
            now = datetime.utcnow().isoformat()
-            for rid in record_ids:
-                safe_id = str(rid).replace("'", "''")
-                rows = (
-                    self._table.search([0.0] * self._vector_dim)
-                    .where(f"id = '{safe_id}'")
-                    .limit(1)
-                    .to_list()
-                )
-                if rows:
-                    rows[0]["last_accessed"] = now
-                    self._retry_write("delete", f"id = '{safe_id}'")
-                    self._retry_write("add", [rows[0]])
+            safe_ids = [str(rid).replace("'", "''") for rid in record_ids]
+            ids_expr = ", ".join(f"'{rid}'" for rid in safe_ids)
+            self._retry_write(
+                "update",
+                where=f"id IN ({ids_expr})",
+                values={"last_accessed": now},
+            )

    def get_record(self, record_id: str) -> MemoryRecord | None:
        """Return a single record by ID, or None if not found."""
        if self._table is None:
            return None
        safe_id = str(record_id).replace("'", "''")
-        rows = self._table.search([0.0] * self._vector_dim).where(f"id = '{safe_id}'").limit(1).to_list()
+        rows = self._table.search().where(f"id = '{safe_id}'").limit(1).to_list()
        if not rows:
            return None
        return self._row_to_record(rows[0])
@@ -374,13 +457,31 @@ class LanceDBStorage:
            self._retry_write("delete", where_expr)
            return before - self._table.count_rows()

-    def _scan_rows(self, scope_prefix: str | None = None, limit: int = _SCAN_ROWS_LIMIT) -> list[dict[str, Any]]:
-        """Scan rows optionally filtered by scope prefix."""
+    def _scan_rows(
+        self,
+        scope_prefix: str | None = None,
+        limit: int = _SCAN_ROWS_LIMIT,
+        columns: list[str] | None = None,
+    ) -> list[dict[str, Any]]:
+        """Scan rows optionally filtered by scope prefix.
+
+        Uses a full table scan (no vector query) so the limit is applied after
+        the scope filter, not to ANN candidates before filtering.
+
+        Args:
+            scope_prefix: Optional scope path prefix to filter by.
+            limit: Maximum number of rows to return (applied after filtering).
+            columns: Optional list of column names to fetch.  Pass only the
+                columns you need for metadata operations to avoid reading the
+                heavy ``vector`` column unnecessarily.
+        """
        if self._table is None:
            return []
-        q = self._table.search([0.0] * self._vector_dim)
+        q = self._table.search()
        if scope_prefix is not None and scope_prefix.strip("/"):
            q = q.where(f"scope LIKE '{scope_prefix.rstrip('/')}%'")
+        if columns is not None:
+            q = q.select(columns)
        return q.limit(limit).to_list()

    def list_records(
@@ -406,7 +507,10 @@ class LanceDBStorage:
        prefix = scope if scope != "/" else ""
        if prefix and not prefix.startswith("/"):
            prefix = "/" + prefix
-        rows = self._scan_rows(prefix or None)
+        rows = self._scan_rows(
+            prefix or None,
+            columns=["scope", "categories_str", "created_at"],
+        )
        if not rows:
            return ScopeInfo(
                path=scope or "/",
@@ -453,7 +557,7 @@ class LanceDBStorage:
    def list_scopes(self, parent: str = "/") -> list[str]:
        parent = parent.rstrip("/") or ""
        prefix = (parent + "/") if parent else "/"
-        rows = self._scan_rows(prefix if prefix != "/" else None)
+        rows = self._scan_rows(prefix if prefix != "/" else None, columns=["scope"])
        children: set[str] = set()
        for row in rows:
            sc = str(row.get("scope", ""))
@@ -465,7 +569,7 @@ class LanceDBStorage:
        return sorted(children)

    def list_categories(self, scope_prefix: str | None = None) -> dict[str, int]:
-        rows = self._scan_rows(scope_prefix)
+        rows = self._scan_rows(scope_prefix, columns=["categories_str"])
        counts: dict[str, int] = {}
        for row in rows:
            cat_str = row.get("categories_str") or "[]"
@@ -498,6 +602,21 @@ class LanceDBStorage:
        if prefix:
            self._table.delete(f"scope >= '{prefix}' AND scope < '{prefix}/\uFFFF'")

+    def optimize(self) -> None:
+        """Compact the table synchronously and refresh the scope index.
+
+        Under normal usage this is called automatically in the background
+        (every ``compact_every`` saves and on startup when the table is
+        fragmented).  Call this explicitly only when you need the compaction
+        to be complete before the next operation — for example immediately
+        after a large bulk import, before a latency-sensitive recall.
+        It is a no-op if the table does not exist.
+        """
+        if self._table is None:
+            return
+        self._table.optimize()
+        self._ensure_scope_index()
+
    async def asave(self, records: list[MemoryRecord]) -> None:
        self.save(records)

--- a/lib/crewai/src/crewai/memory/types.py
+++ b/lib/crewai/src/crewai/memory/types.py
@@ -87,6 +87,22 @@ class MemoryMatch(BaseModel):
        description="Information the system looked for but could not find.",
    )

+    def format(self) -> str:
+        """Format this match as a human-readable string including metadata.
+
+        Returns:
+            A multi-line string with score, content, categories, and non-empty
+            metadata fields.
+        """
+        lines = [f"- (score={self.score:.2f}) {self.record.content}"]
+        if self.record.categories:
+            lines.append(f"  categories: {', '.join(self.record.categories)}")
+        if self.record.metadata:
+            for key, value in self.record.metadata.items():
+                if value is not None:
+                    lines.append(f"  {key}: {value}")
+        return "\n".join(lines)
+

 class ScopeInfo(BaseModel):
    """Information about a scope in the memory hierarchy."""
@@ -291,7 +307,7 @@ def embed_text(embedder: Any, text: str) -> list[float]:
        return []
    first = result[0]
    if hasattr(first, "tolist"):
-        return first.tolist()
+        return list(first.tolist())
    if isinstance(first, list):
        return [float(x) for x in first]
    return list(first)
--- a/lib/crewai/src/crewai/memory/unified_memory.py
+++ b/lib/crewai/src/crewai/memory/unified_memory.py
@@ -6,7 +6,9 @@ from concurrent.futures import Future, ThreadPoolExecutor
 from datetime import datetime
 import threading
 import time
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Annotated, Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field, PlainValidator, PrivateAttr

 from crewai.events.event_bus import crewai_event_bus
 from crewai.events.types.memory_events import (
@@ -21,7 +23,6 @@ from crewai.llms.base_llm import BaseLLM
 from crewai.memory.analyze import extract_memories_from_content
 from crewai.memory.recall_flow import RecallFlow
 from crewai.memory.storage.backend import StorageBackend
-from crewai.memory.storage.lancedb_storage import LanceDBStorage
 from crewai.memory.types import (
    MemoryConfig,
    MemoryMatch,
@@ -30,16 +31,28 @@ from crewai.memory.types import (
    compute_composite_score,
    embed_text,
 )
+from crewai.rag.embeddings.factory import build_embedder
+from crewai.rag.embeddings.providers.openai.types import OpenAIProviderSpec


-def _default_embedder() -> Any:
+if TYPE_CHECKING:
+    from chromadb.utils.embedding_functions.openai_embedding_function import (
+        OpenAIEmbeddingFunction,
+    )
+
+
+def _passthrough(v: Any) -> Any:
+    """PlainValidator that accepts any value, bypassing strict union discrimination."""
+    return v
+
+
+def _default_embedder() -> OpenAIEmbeddingFunction:
    """Build default OpenAI embedder for memory."""
-    from crewai.rag.embeddings.factory import build_embedder
-
-    return build_embedder({"provider": "openai", "config": {}})
+    spec: OpenAIProviderSpec = {"provider": "openai", "config": {}}
+    return build_embedder(spec)


-class Memory:
+class Memory(BaseModel):
    """Unified memory: standalone, LLM-analyzed, with intelligent recall flow.

    Works without agent/crew. Uses LLM to infer scope, categories, importance on save.
@@ -47,109 +60,119 @@ class Memory:
    pluggable storage (LanceDB default).
    """

-    def __init__(
-        self,
-        llm: BaseLLM | str = "gpt-4o-mini",
-        storage: StorageBackend | str = "lancedb",
-        embedder: Any = None,
-        # -- Scoring weights --
-        # These three weights control how recall results are ranked.
-        # The composite score is: semantic_weight * similarity + recency_weight * decay + importance_weight * importance.
-        # They should sum to ~1.0 for intuitive scoring.
-        recency_weight: float = 0.3,
-        semantic_weight: float = 0.5,
-        importance_weight: float = 0.2,
-        # How quickly old memories lose relevance. The recency score halves every
-        # N days (exponential decay). Lower = faster forgetting; higher = longer relevance.
-        recency_half_life_days: int = 30,
-        # -- Consolidation --
-        # When remembering new content, if an existing record has similarity >= this
-        # threshold, the LLM is asked to merge/update/delete. Set to 1.0 to disable.
-        consolidation_threshold: float = 0.85,
-        # Max existing records to compare against when checking for consolidation.
-        consolidation_limit: int = 5,
-        # -- Save defaults --
-        # Importance assigned to new memories when no explicit value is given and
-        # the LLM analysis path is skipped (all fields provided by the caller).
-        default_importance: float = 0.5,
-        # -- Recall depth control --
-        # These thresholds govern the RecallFlow router that decides between
-        # returning results immediately ("synthesize") vs. doing an extra
-        # LLM-driven exploration round ("explore_deeper").
-        #   confidence >= confidence_threshold_high  => always synthesize
-        #   confidence <  confidence_threshold_low   => explore deeper (if budget > 0)
-        #   complex query + confidence < complex_query_threshold => explore deeper
-        confidence_threshold_high: float = 0.8,
-        confidence_threshold_low: float = 0.5,
-        complex_query_threshold: float = 0.7,
-        # How many LLM-driven exploration rounds the RecallFlow is allowed to run.
-        # 0 = always shallow (vector search only); higher = more thorough but slower.
-        exploration_budget: int = 1,
-        # Queries shorter than this skip LLM analysis (saving ~1-3s).
-        # Longer queries (full task descriptions) benefit from LLM distillation.
-        query_analysis_threshold: int = 200,
-    ) -> None:
-        """Initialize Memory.
+    model_config = ConfigDict(arbitrary_types_allowed=True)

-        Args:
-            llm: LLM for analysis (model name or BaseLLM instance).
-            storage: Backend: "lancedb" or a StorageBackend instance.
-            embedder: Embedding callable, provider config dict, or None (default OpenAI).
-            recency_weight: Weight for recency in the composite relevance score.
-            semantic_weight: Weight for semantic similarity in the composite relevance score.
-            importance_weight: Weight for importance in the composite relevance score.
-            recency_half_life_days: Recency score halves every N days (exponential decay).
-            consolidation_threshold: Similarity above which consolidation is triggered on save.
-            consolidation_limit: Max existing records to compare during consolidation.
-            default_importance: Default importance when not provided or inferred.
-            confidence_threshold_high: Recall confidence above which results are returned directly.
-            confidence_threshold_low: Recall confidence below which deeper exploration is triggered.
-            complex_query_threshold: For complex queries, explore deeper below this confidence.
-            exploration_budget: Number of LLM-driven exploration rounds during deep recall.
-            query_analysis_threshold: Queries shorter than this skip LLM analysis during deep recall.
-        """
-        self._config = MemoryConfig(
-            recency_weight=recency_weight,
-            semantic_weight=semantic_weight,
-            importance_weight=importance_weight,
-            recency_half_life_days=recency_half_life_days,
-            consolidation_threshold=consolidation_threshold,
-            consolidation_limit=consolidation_limit,
-            default_importance=default_importance,
-            confidence_threshold_high=confidence_threshold_high,
-            confidence_threshold_low=confidence_threshold_low,
-            complex_query_threshold=complex_query_threshold,
-            exploration_budget=exploration_budget,
-            query_analysis_threshold=query_analysis_threshold,
-        )
+    llm: Annotated[BaseLLM | str, PlainValidator(_passthrough)] = Field(
+        default="gpt-4o-mini",
+        description="LLM for analysis (model name or BaseLLM instance).",
+    )
+    storage: Annotated[StorageBackend | str, PlainValidator(_passthrough)] = Field(
+        default="lancedb",
+        description="Storage backend instance or path string.",
+    )
+    embedder: Any = Field(
+        default=None,
+        description="Embedding callable, provider config dict, or None for default OpenAI.",
+    )
+    recency_weight: float = Field(
+        default=0.3,
+        description="Weight for recency in the composite relevance score.",
+    )
+    semantic_weight: float = Field(
+        default=0.5,
+        description="Weight for semantic similarity in the composite relevance score.",
+    )
+    importance_weight: float = Field(
+        default=0.2,
+        description="Weight for importance in the composite relevance score.",
+    )
+    recency_half_life_days: int = Field(
+        default=30,
+        description="Recency score halves every N days (exponential decay).",
+    )
+    consolidation_threshold: float = Field(
+        default=0.85,
+        description="Similarity above which consolidation is triggered on save.",
+    )
+    consolidation_limit: int = Field(
+        default=5,
+        description="Max existing records to compare during consolidation.",
+    )
+    default_importance: float = Field(
+        default=0.5,
+        description="Default importance when not provided or inferred.",
+    )
+    confidence_threshold_high: float = Field(
+        default=0.8,
+        description="Recall confidence above which results are returned directly.",
+    )
+    confidence_threshold_low: float = Field(
+        default=0.5,
+        description="Recall confidence below which deeper exploration is triggered.",
+    )
+    complex_query_threshold: float = Field(
+        default=0.7,
+        description="For complex queries, explore deeper below this confidence.",
+    )
+    exploration_budget: int = Field(
+        default=1,
+        description="Number of LLM-driven exploration rounds during deep recall.",
+    )
+    query_analysis_threshold: int = Field(
+        default=200,
+        description="Queries shorter than this skip LLM analysis during deep recall.",
+    )
+    read_only: bool = Field(
+        default=False,
+        description="If True, remember() and remember_many() are silent no-ops.",
+    )

-        # Store raw config for lazy initialization. LLM and embedder are only
-        # built on first access so that Memory() never fails at construction
-        # time (e.g. when auto-created by Flow without an API key set).
-        self._llm_config: BaseLLM | str = llm
-        self._llm_instance: BaseLLM | None = None if isinstance(llm, str) else llm
-        self._embedder_config: Any = embedder
-        self._embedder_instance: Any = (
-            embedder if (embedder is not None and not isinstance(embedder, dict)) else None
-        )
-
-        # Storage is initialized eagerly (local, no API key needed).
-        if storage == "lancedb":
-            self._storage = LanceDBStorage()
-        elif isinstance(storage, str):
-            self._storage = LanceDBStorage(path=storage)
-        else:
-            self._storage = storage
-
-        # Background save queue. max_workers=1 serializes saves to avoid
-        # concurrent storage mutations (two saves finding the same similar
-        # record and both trying to update/delete it). Within each save,
-        # the parallel LLM calls still run on their own thread pool.
-        self._save_pool = ThreadPoolExecutor(
+    _config: MemoryConfig = PrivateAttr()
+    _llm_instance: BaseLLM | None = PrivateAttr(default=None)
+    _embedder_instance: Any = PrivateAttr(default=None)
+    _storage: StorageBackend = PrivateAttr()
+    _save_pool: ThreadPoolExecutor = PrivateAttr(
+        default_factory=lambda: ThreadPoolExecutor(
            max_workers=1, thread_name_prefix="memory-save"
        )
-        self._pending_saves: list[Future[Any]] = []
-        self._pending_lock = threading.Lock()
+    )
+    _pending_saves: list[Future[Any]] = PrivateAttr(default_factory=list)
+    _pending_lock: threading.Lock = PrivateAttr(default_factory=threading.Lock)
+
+    def model_post_init(self, __context: Any) -> None:
+        """Initialize runtime state from field values."""
+        self._config = MemoryConfig(
+            recency_weight=self.recency_weight,
+            semantic_weight=self.semantic_weight,
+            importance_weight=self.importance_weight,
+            recency_half_life_days=self.recency_half_life_days,
+            consolidation_threshold=self.consolidation_threshold,
+            consolidation_limit=self.consolidation_limit,
+            default_importance=self.default_importance,
+            confidence_threshold_high=self.confidence_threshold_high,
+            confidence_threshold_low=self.confidence_threshold_low,
+            complex_query_threshold=self.complex_query_threshold,
+            exploration_budget=self.exploration_budget,
+            query_analysis_threshold=self.query_analysis_threshold,
+        )
+
+        self._llm_instance = None if isinstance(self.llm, str) else self.llm
+        self._embedder_instance = (
+            self.embedder
+            if (self.embedder is not None and not isinstance(self.embedder, dict))
+            else None
+        )
+
+        if isinstance(self.storage, str):
+            from crewai.memory.storage.lancedb_storage import LanceDBStorage
+
+            self._storage = (
+                LanceDBStorage()
+                if self.storage == "lancedb"
+                else LanceDBStorage(path=self.storage)
+            )
+        else:
+            self._storage = self.storage

    _MEMORY_DOCS_URL = "https://docs.crewai.com/concepts/memory"

@@ -160,12 +183,13 @@ class Memory:
            from crewai.llm import LLM

            try:
-                self._llm_instance = LLM(model=self._llm_config)
+                model_name = self.llm if isinstance(self.llm, str) else str(self.llm)
+                self._llm_instance = LLM(model=model_name)
            except Exception as e:
                raise RuntimeError(
                    f"Memory requires an LLM for analysis but initialization failed: {e}\n\n"
                    "To fix this, do one of the following:\n"
-                    '  - Set OPENAI_API_KEY for the default model (gpt-4o-mini)\n'
+                    "  - Set OPENAI_API_KEY for the default model (gpt-4o-mini)\n"
                    '  - Pass a different model: Memory(llm="anthropic/claude-3-haiku-20240307")\n'
                    '  - Pass any LLM instance: Memory(llm=LLM(model="your-model"))\n'
                    "  - To skip LLM analysis, pass all fields explicitly to remember()\n"
@@ -179,10 +203,8 @@ class Memory:
        """Lazy embedder initialization -- only created when first needed."""
        if self._embedder_instance is None:
            try:
-                if isinstance(self._embedder_config, dict):
-                    from crewai.rag.embeddings.factory import build_embedder
-
-                    self._embedder_instance = build_embedder(self._embedder_config)
+                if isinstance(self.embedder, dict):
+                    self._embedder_instance = build_embedder(self.embedder)
                else:
                    self._embedder_instance = _default_embedder()
            except Exception as e:
@@ -317,7 +339,7 @@ class Memory:
        source: str | None = None,
        private: bool = False,
        agent_role: str | None = None,
-    ) -> MemoryRecord:
+    ) -> MemoryRecord | None:
        """Store a single item in memory (synchronous).

        Routes through the same serialized save pool as ``remember_many``
@@ -335,11 +357,13 @@ class Memory:
            agent_role: Optional agent role for event metadata.

        Returns:
-            The created MemoryRecord.
+            The created MemoryRecord, or None if this memory is read-only.

        Raises:
            Exception: On save failure (events emitted).
        """
+        if self.read_only:
+            return None
        _source_type = "unified_memory"
        try:
            crewai_event_bus.emit(
@@ -356,7 +380,13 @@ class Memory:
            # then immediately wait for the result.
            future = self._submit_save(
                self._encode_batch,
-                [content], scope, categories, metadata, importance, source, private,
+                [content],
+                scope,
+                categories,
+                metadata,
+                importance,
+                source,
+                private,
            )
            records = future.result()
            record = records[0] if records else None
@@ -420,13 +450,19 @@ class Memory:
        Returns:
            Empty list (records are not available until the background save completes).
        """
-        if not contents:
+        if not contents or self.read_only:
            return []

        self._submit_save(
            self._background_encode_batch,
-            contents, scope, categories, metadata,
-            importance, source, private, agent_role,
+            contents,
+            scope,
+            categories,
+            metadata,
+            importance,
+            source,
+            private,
+            agent_role,
        )
        return []

@@ -566,14 +602,13 @@ class Memory:
                    # Privacy filter
                    if not include_private:
                        raw = [
-                            (r, s) for r, s in raw
+                            (r, s)
+                            for r, s in raw
                            if not r.private or r.source == source
                        ]
                    results = []
                    for r, s in raw:
-                        composite, reasons = compute_composite_score(
-                            r, s, self._config
-                        )
+                        composite, reasons = compute_composite_score(r, s, self._config)
                        results.append(
                            MemoryMatch(
                                record=r,
@@ -739,7 +774,9 @@ class Memory:
            limit: Maximum number of records to return.
            offset: Number of records to skip (for pagination).
        """
-        return self._storage.list_records(scope_prefix=scope, limit=limit, offset=offset)
+        return self._storage.list_records(
+            scope_prefix=scope, limit=limit, offset=offset
+        )

    def info(self, path: str = "/") -> ScopeInfo:
        """Return scope info for path."""
@@ -781,7 +818,7 @@ class Memory:
        importance: float | None = None,
        source: str | None = None,
        private: bool = False,
-    ) -> MemoryRecord:
+    ) -> MemoryRecord | None:
        """Async remember: delegates to sync for now."""
        return self.remember(
            content,
--- a/lib/crewai/src/crewai/rag/embeddings/factory.py
+++ b/lib/crewai/src/crewai/rag/embeddings/factory.py
@@ -216,6 +216,10 @@ def build_embedder_from_dict(
 def build_embedder_from_dict(spec: ONNXProviderSpec) -> ONNXMiniLM_L6_V2: ...


+@overload
+def build_embedder_from_dict(spec: dict[str, Any]) -> EmbeddingFunction[Any]: ...
+
+
 def build_embedder_from_dict(spec):  # type: ignore[no-untyped-def]
    """Build an embedding function instance from a dictionary specification.

@@ -341,6 +345,10 @@ def build_embedder(spec: Text2VecProviderSpec) -> Text2VecEmbeddingFunction: ...
 def build_embedder(spec: ONNXProviderSpec) -> ONNXMiniLM_L6_V2: ...


+@overload
+def build_embedder(spec: dict[str, Any]) -> EmbeddingFunction[Any]: ...
+
+
 def build_embedder(spec):  # type: ignore[no-untyped-def]
    """Build an embedding function from either a provider spec or a provider instance.

--- a/lib/crewai/src/crewai/task.py
+++ b/lib/crewai/src/crewai/task.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import asyncio
 from concurrent.futures import Future
 from copy import copy as shallow_copy
 import datetime
@@ -585,16 +586,29 @@ class Task(BaseModel):

            self._post_agent_execution(agent)

-            if not self._guardrails and not self._guardrail:
+            if isinstance(result, BaseModel):
+                raw = result.model_dump_json()
+                if self.output_pydantic:
+                    pydantic_output = result
+                    json_output = None
+                elif self.output_json:
+                    pydantic_output = None
+                    json_output = result.model_dump()
+                else:
+                    pydantic_output = None
+                    json_output = None
+            elif not self._guardrails and not self._guardrail:
+                raw = result
                pydantic_output, json_output = self._export_output(result)
            else:
+                raw = result
                pydantic_output, json_output = None, None

            task_output = TaskOutput(
                name=self.name or self.description,
                description=self.description,
                expected_output=self.expected_output,
-                raw=result,
+                raw=raw,
                pydantic=pydantic_output,
                json_dict=json_output,
                agent=agent.role,
@@ -624,11 +638,15 @@ class Task(BaseModel):
            self.end_time = datetime.datetime.now()

            if self.callback:
-                self.callback(self.output)
+                cb_result = self.callback(self.output)
+                if inspect.isawaitable(cb_result):
+                    await cb_result

            crew = self.agent.crew  # type: ignore[union-attr]
            if crew and crew.task_callback and crew.task_callback != self.callback:
-                crew.task_callback(self.output)
+                cb_result = crew.task_callback(self.output)
+                if inspect.isawaitable(cb_result):
+                    await cb_result

            if self.output_file:
                content = (
@@ -682,16 +700,29 @@ class Task(BaseModel):

            self._post_agent_execution(agent)

-            if not self._guardrails and not self._guardrail:
+            if isinstance(result, BaseModel):
+                raw = result.model_dump_json()
+                if self.output_pydantic:
+                    pydantic_output = result
+                    json_output = None
+                elif self.output_json:
+                    pydantic_output = None
+                    json_output = result.model_dump()
+                else:
+                    pydantic_output = None
+                    json_output = None
+            elif not self._guardrails and not self._guardrail:
+                raw = result
                pydantic_output, json_output = self._export_output(result)
            else:
+                raw = result
                pydantic_output, json_output = None, None

            task_output = TaskOutput(
                name=self.name or self.description,
                description=self.description,
                expected_output=self.expected_output,
-                raw=result,
+                raw=raw,
                pydantic=pydantic_output,
                json_dict=json_output,
                agent=agent.role,
@@ -722,11 +753,15 @@ class Task(BaseModel):
            self.end_time = datetime.datetime.now()

            if self.callback:
-                self.callback(self.output)
+                cb_result = self.callback(self.output)
+                if inspect.iscoroutine(cb_result):
+                    asyncio.run(cb_result)

            crew = self.agent.crew  # type: ignore[union-attr]
            if crew and crew.task_callback and crew.task_callback != self.callback:
-                crew.task_callback(self.output)
+                cb_result = crew.task_callback(self.output)
+                if inspect.iscoroutine(cb_result):
+                    asyncio.run(cb_result)

            if self.output_file:
                content = (
--- a/lib/crewai/src/crewai/telemetry/init.py
+++ b/lib/crewai/src/crewai/telemetry/init.py
@@ -1,5 +1,4 @@
 from crewai.telemetry.telemetry import Telemetry


-
 __all__ = ["Telemetry"]
--- a/lib/crewai/src/crewai/telemetry/telemetry.py
+++ b/lib/crewai/src/crewai/telemetry/telemetry.py
@@ -173,6 +173,12 @@ class Telemetry:

        self._original_handlers: dict[int, Any] = {}

+        if threading.current_thread() is not threading.main_thread():
+            logger.debug(
+                "Skipping signal handler registration: not running in main thread"
+            )
+            return
+
        self._register_signal_handler(signal.SIGTERM, SigTermEvent, shutdown=True)
        self._register_signal_handler(signal.SIGINT, SigIntEvent, shutdown=True)
        if hasattr(signal, "SIGHUP"):
--- a/lib/crewai/src/crewai/tools/init.py
+++ b/lib/crewai/src/crewai/tools/init.py
@@ -1,7 +1,6 @@
 from crewai.tools.base_tool import BaseTool, EnvVar, tool


-
 __all__ = [
    "BaseTool",
    "EnvVar",
--- a/lib/crewai/src/crewai/tools/base_tool.py
+++ b/lib/crewai/src/crewai/tools/base_tool.py
@@ -23,7 +23,7 @@ from pydantic import (
 )
 from typing_extensions import TypeIs

-from crewai.tools.structured_tool import CrewStructuredTool
+from crewai.tools.structured_tool import CrewStructuredTool, build_schema_hint
 from crewai.utilities.printer import Printer
 from crewai.utilities.pydantic_schema_utils import generate_model_description
 from crewai.utilities.string_utils import sanitize_tool_name
@@ -150,14 +150,39 @@ class BaseTool(BaseModel, ABC):

        super().model_post_init(__context)

+    def _validate_kwargs(self, kwargs: dict[str, Any]) -> dict[str, Any]:
+        """Validate keyword arguments against args_schema if present.
+
+        Args:
+            kwargs: The keyword arguments to validate.
+
+        Returns:
+            Validated (and possibly coerced) keyword arguments.
+
+        Raises:
+            ValueError: If validation against args_schema fails.
+        """
+        if self.args_schema is not None and self.args_schema.model_fields:
+            try:
+                validated = self.args_schema.model_validate(kwargs)
+                return validated.model_dump()
+            except Exception as e:
+                hint = build_schema_hint(self.args_schema)
+                raise ValueError(
+                    f"Tool '{self.name}' arguments validation failed: {e}{hint}"
+                ) from e
+        return kwargs
+
    def run(
        self,
        *args: Any,
        **kwargs: Any,
    ) -> Any:
+        if not args:
+            kwargs = self._validate_kwargs(kwargs)
+
        result = self._run(*args, **kwargs)

-        # If _run is async, we safely run it
        if asyncio.iscoroutine(result):
            result = asyncio.run(result)

@@ -179,6 +204,8 @@ class BaseTool(BaseModel, ABC):
        Returns:
            The result of the tool execution.
        """
+        if not args:
+            kwargs = self._validate_kwargs(kwargs)
        result = await self._arun(*args, **kwargs)
        self.current_usage_count += 1
        return result
@@ -331,6 +358,9 @@ class Tool(BaseTool, Generic[P, R]):
        Returns:
            The result of the tool execution.
        """
+        if not args:
+            kwargs = self._validate_kwargs(kwargs)  # type: ignore[assignment]
+
        result = self.func(*args, **kwargs)

        if asyncio.iscoroutine(result):
@@ -361,6 +391,8 @@ class Tool(BaseTool, Generic[P, R]):
        Returns:
            The result of the tool execution.
        """
+        if not args:
+            kwargs = self._validate_kwargs(kwargs)  # type: ignore[assignment]
        result = await self._arun(*args, **kwargs)
        self.current_usage_count += 1
        return result
--- a/lib/crewai/src/crewai/tools/mcp_native_tool.py
+++ b/lib/crewai/src/crewai/tools/mcp_native_tool.py
@@ -27,14 +27,16 @@ class MCPNativeTool(BaseTool):
        tool_name: str,
        tool_schema: dict[str, Any],
        server_name: str,
+        original_tool_name: str | None = None,
    ) -> None:
        """Initialize native MCP tool.

        Args:
            mcp_client: MCPClient instance with active session.
-            tool_name: Original name of the tool on the MCP server.
+            tool_name: Name of the tool (may be prefixed).
            tool_schema: Schema information for the tool.
            server_name: Name of the MCP server for prefixing.
+            original_tool_name: Original name of the tool on the MCP server.
        """
        # Create tool name with server prefix to avoid conflicts
        prefixed_name = f"{server_name}_{tool_name}"
@@ -57,7 +59,7 @@ class MCPNativeTool(BaseTool):

        # Set instance attributes after super().__init__
        self._mcp_client = mcp_client
-        self._original_tool_name = tool_name
+        self._original_tool_name = original_tool_name or tool_name
        self._server_name = server_name
        # self._logger = logging.getLogger(__name__)

--- a/lib/crewai/src/crewai/tools/memory_tools.py
+++ b/lib/crewai/src/crewai/tools/memory_tools.py
@@ -20,14 +20,6 @@ class RecallMemorySchema(BaseModel):
            "or multiple items to search for several things at once."
        ),
    )
-    scope: str | None = Field(
-        default=None,
-        description="Optional scope to narrow the search (e.g. /project/alpha)",
-    )
-    depth: str = Field(
-        default="shallow",
-        description="'shallow' for fast vector search, 'deep' for LLM-analyzed retrieval",
-    )


 class RecallMemoryTool(BaseTool):
@@ -41,32 +33,27 @@ class RecallMemoryTool(BaseTool):
    def _run(
        self,
        queries: list[str] | str,
-        scope: str | None = None,
-        depth: str = "shallow",
        **kwargs: Any,
    ) -> str:
        """Search memory for relevant information.

        Args:
            queries: One or more search queries (string or list of strings).
-            scope: Optional scope prefix to narrow the search.
-            depth: "shallow" for fast vector search, "deep" for LLM-analyzed retrieval.

        Returns:
            Formatted string of matching memories, or a message if none found.
        """
        if isinstance(queries, str):
            queries = [queries]
-        actual_depth = depth if depth in ("shallow", "deep") else "shallow"

        all_lines: list[str] = []
        seen_ids: set[str] = set()
        for query in queries:
-            matches = self.memory.recall(query, scope=scope, limit=5, depth=actual_depth)
+            matches = self.memory.recall(query, limit=20)
            for m in matches:
                if m.record.id not in seen_ids:
                    seen_ids.add(m.record.id)
-                    all_lines.append(f"- (score={m.score:.2f}) {m.record.content}")
+                    all_lines.append(m.format())

        if not all_lines:
            return "No relevant memories found."
@@ -117,20 +104,28 @@ class RememberTool(BaseTool):
 def create_memory_tools(memory: Any) -> list[BaseTool]:
    """Create Recall and Remember tools for the given memory instance.

+    When memory is read-only (``_read_only=True``), only the RecallMemoryTool
+    is returned — the RememberTool is omitted so agents are never offered a
+    save capability they cannot use.
+
    Args:
        memory: A Memory, MemoryScope, or MemorySlice instance.

    Returns:
-        List containing a RecallMemoryTool and a RememberTool.
+        List containing a RecallMemoryTool and, if not read-only, a RememberTool.
    """
    i18n = get_i18n()
-    return [
+    tools: list[BaseTool] = [
        RecallMemoryTool(
            memory=memory,
            description=i18n.tools("recall_memory"),
        ),
-        RememberTool(
-            memory=memory,
-            description=i18n.tools("save_to_memory"),
-        ),
    ]
+    if not memory.read_only:
+        tools.append(
+            RememberTool(
+                memory=memory,
+                description=i18n.tools("save_to_memory"),
+            )
+        )
+    return tools
--- a/lib/crewai/src/crewai/tools/structured_tool.py
+++ b/lib/crewai/src/crewai/tools/structured_tool.py
@@ -17,6 +17,27 @@ if TYPE_CHECKING:
    from crewai.tools.base_tool import BaseTool


+def build_schema_hint(args_schema: type[BaseModel]) -> str:
+    """Build a human-readable hint from a Pydantic model's JSON schema.
+
+    Args:
+        args_schema: The Pydantic model class to extract schema from.
+
+    Returns:
+        A formatted string with expected arguments and required fields,
+        or empty string if schema extraction fails.
+    """
+    try:
+        schema = args_schema.model_json_schema()
+        return (
+            f"\nExpected arguments: "
+            f"{json.dumps(schema.get('properties', {}))}"
+            f"\nRequired: {json.dumps(schema.get('required', []))}"
+        )
+    except Exception:
+        return ""
+
+
 class ToolUsageLimitExceededError(Exception):
    """Exception raised when a tool has reached its maximum usage limit."""

@@ -208,7 +229,8 @@ class CrewStructuredTool:
            validated_args = self.args_schema.model_validate(raw_args)
            return validated_args.model_dump()
        except Exception as e:
-            raise ValueError(f"Arguments validation failed: {e}") from e
+            hint = build_schema_hint(self.args_schema)
+            raise ValueError(f"Arguments validation failed: {e}{hint}") from e

    async def ainvoke(
        self,
--- a/lib/crewai/src/crewai/translations/en.json
+++ b/lib/crewai/src/crewai/translations/en.json
@@ -7,7 +7,7 @@
  "slices": {
    "observation": "\nObservation:",
    "task": "\nCurrent Task: {input}\n\nBegin! This is VERY important to you, use the tools available and give your best Final Answer, your job depends on it!\n\nThought:",
-    "memory": "\n\n# Useful context: \n{memory}",
+    "memory": "\n\n# Memories from past conversations:\n{memory}\n\nIMPORTANT: The memories above are an automatic selection and may be INCOMPLETE. If the task involves counting, listing, or summing items (e.g. 'how many', 'total', 'list all'), you MUST use the Search memory tool with several different queries before answering — do NOT rely solely on the memories shown above. Enumerate each distinct item you find before giving a final count.",
    "role_playing": "You are {role}. {backstory}\nYour personal goal is: {goal}",
    "tools": "\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple JSON object, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce all necessary information is gathered, return the following format:\n\n```\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n```",
    "no_tools": "",
@@ -60,12 +60,12 @@
      "description": "See image to understand its content, you can optionally ask a question about the image",
      "default_action": "Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe."
    },
-    "recall_memory": "Search through the team's shared memory for relevant information. Pass one or more queries to search for multiple things at once. Use this when you need to find facts, decisions, preferences, or past results that may have been stored previously.",
+    "recall_memory": "Search through the team's shared memory for relevant information. Pass one or more queries to search for multiple things at once. Use this when you need to find facts, decisions, preferences, or past results that may have been stored previously. IMPORTANT: For questions that require counting, summing, or listing items across multiple conversations (e.g. 'how many X', 'total Y', 'list all Z'), you MUST search multiple times with different phrasings to ensure you find ALL relevant items before giving a final count or total. Do not rely on a single search — items may be described differently across conversations.",
    "save_to_memory": "Store one or more important facts, decisions, observations, or lessons in memory so they can be recalled later by you or other agents. Pass multiple items at once when you have several things worth remembering."
  },
  "memory": {
    "query_system": "You analyze a query for searching memory.\nGiven the query and available scopes, output:\n1. keywords: Key entities or keywords that can be used to filter by category.\n2. suggested_scopes: Which available scopes are most relevant (empty for all).\n3. complexity: 'simple' or 'complex'.\n4. recall_queries: 1-3 short, targeted search phrases distilled from the query. Each should be a concise phrase optimized for semantic vector search. If the query is already short and focused, return it as-is in a single-item list. For long task descriptions, extract the distinct things worth searching for.\n5. time_filter: If the query references a time period (like 'last week', 'yesterday', 'in January'), return an ISO 8601 date string for the earliest relevant date (e.g. '2026-02-01'). Return null if no time constraint is implied.",
-    "extract_memories_system": "You extract discrete, reusable memory statements from raw content (e.g. a task description and its result).\n\nFor the given content, output a list of memory statements. Each memory must:\n- Be one clear sentence or short statement\n- Be understandable without the original context\n- Capture a decision, fact, outcome, preference, lesson, or observation worth remembering\n- NOT be a vague summary or a restatement of the task description\n- NOT duplicate the same idea in different words\n\nIf there is nothing worth remembering (e.g. empty result, no decisions or facts), return an empty list.\nOutput a JSON object with a single key \"memories\" whose value is a list of strings.",
+    "extract_memories_system": "You extract discrete, reusable memory statements from raw content (e.g. a task description and its result, or a conversation between a user and an assistant).\n\nFor the given content, output a list of memory statements. Each memory must:\n- Be one clear sentence or short statement\n- Be understandable without the original context\n- Capture a decision, fact, outcome, preference, lesson, or observation worth remembering\n- NOT be a vague summary or a restatement of the task description\n- NOT duplicate the same idea in different words\n\nWhen the content is a conversation, pay special attention to facts stated by the user (first-person statements). These personal facts are HIGH PRIORITY and must always be extracted:\n- What the user did, bought, made, visited, attended, or completed\n- Names of people, pets, places, brands, and specific items the user mentions\n- Quantities, durations, dates, and measurements the user states\n- Subordinate clauses and casual asides often contain important personal details (e.g. \"by the way, it took me 4 hours\" or \"my Golden Retriever Max\")\n\nPreserve exact names and numbers — never generalize (e.g. keep \"lavender gin fizz\" not just \"cocktail\", keep \"12 largemouth bass\" not just \"fish caught\", keep \"Golden Retriever\" not just \"dog\").\n\nAdditional extraction rules:\n- Presupposed facts: When the user reveals a fact indirectly in a question (e.g. \"What collar suits a Golden Retriever like Max?\" presupposes Max is a Golden Retriever), extract that fact as a separate memory.\n- Date precision: Always preserve the full date including day-of-month when stated (e.g. \"February 14th\" not just \"February\", \"March 5\" not just \"March\").\n- Life events in passing: When the user mentions a life event (birth, wedding, graduation, move, adoption) while discussing something else, extract the life event as its own memory (e.g. \"my friend David had a baby boy named Jasper\" is a birth fact, even if mentioned while planning to send congratulations).\n\nIf there is nothing worth remembering (e.g. empty result, no decisions or facts), return an empty list.\nOutput a JSON object with a single key \"memories\" whose value is a list of strings.",
    "extract_memories_user": "Content:\n{content}\n\nExtract memory statements as described. Return structured output.",
    "query_user": "Query: {query}\n\nAvailable scopes: {available_scopes}\n{scope_desc}\n\nReturn the analysis as structured output.",
    "save_system": "You analyze content to be stored in a hierarchical memory system.\nGiven the content and the existing scopes and categories, output:\n1. suggested_scope: The best matching existing scope path, or a new path if none fit (use / for root).\n2. categories: A list of categories (reuse existing when relevant, add new ones if needed).\n3. importance: A number from 0.0 to 1.0 indicating how significant this memory is.\n4. extracted_metadata: A JSON object with any entities, dates, or topics you can extract.",
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import asyncio
 from collections.abc import Callable, Sequence
 import concurrent.futures
+import inspect
 import json
 import re
 from typing import TYPE_CHECKING, Any, Final, Literal, TypedDict
@@ -138,7 +139,11 @@ def render_text_description_and_args(

 def convert_tools_to_openai_schema(
    tools: Sequence[BaseTool | CrewStructuredTool],
-) -> tuple[list[dict[str, Any]], dict[str, Callable[..., Any]]]:
+) -> tuple[
+    list[dict[str, Any]],
+    dict[str, Callable[..., Any]],
+    dict[str, BaseTool | CrewStructuredTool],
+]:
    """Convert CrewAI tools to OpenAI function calling format.

    This function converts CrewAI BaseTool and CrewStructuredTool objects
@@ -151,23 +156,21 @@ def convert_tools_to_openai_schema(
    Returns:
        Tuple containing:
        - List of OpenAI-format tool schema dictionaries
-        - Dict mapping tool names to their callable run() methods
-
-    Example:
-        >>> tools = [CalculatorTool(), SearchTool()]
-        >>> schemas, functions = convert_tools_to_openai_schema(tools)
-        >>> # schemas can be passed to llm.call(tools=schemas)
-        >>> # functions can be passed to llm.call(available_functions=functions)
+        - Dict mapping sanitized tool names to their callable run() methods
+        - Dict mapping sanitized tool names to their original tool objects
    """
    openai_tools: list[dict[str, Any]] = []
    available_functions: dict[str, Callable[..., Any]] = {}
+    tool_name_mapping: dict[str, BaseTool | CrewStructuredTool] = {}

    for tool in tools:
        # Get the JSON schema for tool parameters
        parameters: dict[str, Any] = {}
        if hasattr(tool, "args_schema") and tool.args_schema is not None:
            try:
-                schema_output = generate_model_description(tool.args_schema)
+                schema_output = generate_model_description(
+                    tool.args_schema, strip_null_types=False
+                )
                parameters = schema_output.get("json_schema", {}).get("schema", {})
                # Remove title and description from schema root as they're redundant
                parameters.pop("title", None)
@@ -183,6 +186,14 @@ def convert_tools_to_openai_schema(

        sanitized_name = sanitize_tool_name(tool.name)

+        if sanitized_name in available_functions:
+            counter = 2
+            candidate = sanitize_tool_name(f"{sanitized_name}_{counter}")
+            while candidate in available_functions:
+                counter += 1
+                candidate = sanitize_tool_name(f"{sanitized_name}_{counter}")
+            sanitized_name = candidate
+
        schema: dict[str, Any] = {
            "type": "function",
            "function": {
@@ -194,8 +205,9 @@ def convert_tools_to_openai_schema(
        }
        openai_tools.append(schema)
        available_functions[sanitized_name] = tool.run  # type: ignore[union-attr]
+        tool_name_mapping[sanitized_name] = tool

-    return openai_tools, available_functions
+    return openai_tools, available_functions, tool_name_mapping


 def has_reached_max_iterations(iterations: int, max_iterations: int) -> bool:
@@ -501,7 +513,9 @@ def handle_agent_action_core(
        - TODO: Remove messages parameter and its usage.
    """
    if step_callback:
-        step_callback(tool_result)
+        cb_result = step_callback(tool_result)
+        if inspect.iscoroutine(cb_result):
+            asyncio.run(cb_result)

    formatted_answer.text += f"\nObservation: {tool_result.result}"
    formatted_answer.result = tool_result.result
@@ -1143,6 +1157,36 @@ def extract_tool_call_info(
    return None


+def parse_tool_call_args(
+    func_args: dict[str, Any] | str,
+    func_name: str,
+    call_id: str,
+    original_tool: Any = None,
+) -> tuple[dict[str, Any], None] | tuple[None, dict[str, Any]]:
+    """Parse tool call arguments from a JSON string or dict.
+
+    Returns:
+        ``(args_dict, None)`` on success, or ``(None, error_result)`` on
+        JSON parse failure where ``error_result`` is a ready-to-return dict
+        with the same shape as ``_execute_single_native_tool_call`` return values.
+    """
+    if isinstance(func_args, str):
+        try:
+            return json.loads(func_args), None
+        except json.JSONDecodeError as e:
+            return None, {
+                "call_id": call_id,
+                "func_name": func_name,
+                "result": (
+                    f"Error: Failed to parse tool arguments as JSON: {e}. "
+                    f"Please provide valid JSON arguments for the '{func_name}' tool."
+                ),
+                "from_cache": False,
+                "original_tool": original_tool,
+            }
+    return func_args, None
+
+
 def _setup_before_llm_call_hooks(
    executor_context: CrewAgentExecutor | AgentExecutor | LiteAgent | None,
    printer: Printer,
--- a/lib/crewai/src/crewai/utilities/llm_utils.py
+++ b/lib/crewai/src/crewai/utilities/llm_utils.py
@@ -69,7 +69,7 @@ def create_llm(
 UNACCEPTED_ATTRIBUTES: Final[list[str]] = [
    "AWS_ACCESS_KEY_ID",
    "AWS_SECRET_ACCESS_KEY",
-    "AWS_REGION_NAME",
+    "AWS_DEFAULT_REGION",
 ]


@@ -146,7 +146,7 @@ def _llm_via_environment_or_fallback() -> LLM | None:
    unaccepted_attributes = [
        "AWS_ACCESS_KEY_ID",
        "AWS_SECRET_ACCESS_KEY",
-        "AWS_REGION_NAME",
+        "AWS_DEFAULT_REGION",
    ]
    set_provider = model_name.partition("/")[0] if "/" in model_name else "openai"

--- a/lib/crewai/src/crewai/utilities/pydantic_schema_utils.py
+++ b/lib/crewai/src/crewai/utilities/pydantic_schema_utils.py
@@ -417,7 +417,11 @@ def strip_null_from_types(schema: dict[str, Any]) -> dict[str, Any]:
    return schema


-def generate_model_description(model: type[BaseModel]) -> ModelDescription:
+def generate_model_description(
+    model: type[BaseModel],
+    *,
+    strip_null_types: bool = True,
+) -> ModelDescription:
    """Generate JSON schema description of a Pydantic model.

    This function takes a Pydantic model class and returns its JSON schema,
@@ -426,6 +430,9 @@ def generate_model_description(model: type[BaseModel]) -> ModelDescription:

    Args:
        model: A Pydantic model class.
+        strip_null_types: When ``True`` (default), remove ``null`` from
+            ``anyOf`` / ``type`` arrays.  Set to ``False`` to allow sending ``null`` for
+            optional fields.

    Returns:
        A ModelDescription with JSON schema representation of the model.
@@ -442,7 +449,9 @@ def generate_model_description(model: type[BaseModel]) -> ModelDescription:
    json_schema = fix_discriminator_mappings(json_schema)
    json_schema = convert_oneof_to_anyof(json_schema)
    json_schema = ensure_all_properties_required(json_schema)
-    json_schema = strip_null_from_types(json_schema)
+
+    if strip_null_types:
+        json_schema = strip_null_from_types(json_schema)

    return {
        "type": "json_schema",
@@ -482,10 +491,66 @@ FORMAT_TYPE_MAP: dict[str, type[Any]] = {
 }


+def build_rich_field_description(prop_schema: dict[str, Any]) -> str:
+    """Build a comprehensive field description including constraints.
+
+    Embeds format, enum, pattern, min/max, and example constraints into the
+    description text so that LLMs can understand tool parameter requirements
+    without inspecting the raw JSON Schema.
+
+    Args:
+        prop_schema: Property schema with description and constraints.
+
+    Returns:
+        Enhanced description with format, enum, and other constraints.
+    """
+    parts: list[str] = []
+
+    description = prop_schema.get("description", "")
+    if description:
+        parts.append(description)
+
+    format_type = prop_schema.get("format")
+    if format_type:
+        parts.append(f"Format: {format_type}")
+
+    enum_values = prop_schema.get("enum")
+    if enum_values:
+        enum_str = ", ".join(repr(v) for v in enum_values)
+        parts.append(f"Allowed values: [{enum_str}]")
+
+    pattern = prop_schema.get("pattern")
+    if pattern:
+        parts.append(f"Pattern: {pattern}")
+
+    minimum = prop_schema.get("minimum")
+    maximum = prop_schema.get("maximum")
+    if minimum is not None:
+        parts.append(f"Minimum: {minimum}")
+    if maximum is not None:
+        parts.append(f"Maximum: {maximum}")
+
+    min_length = prop_schema.get("minLength")
+    max_length = prop_schema.get("maxLength")
+    if min_length is not None:
+        parts.append(f"Min length: {min_length}")
+    if max_length is not None:
+        parts.append(f"Max length: {max_length}")
+
+    examples = prop_schema.get("examples")
+    if examples:
+        examples_str = ", ".join(repr(e) for e in examples[:3])
+        parts.append(f"Examples: {examples_str}")
+
+    return ". ".join(parts) if parts else ""
+
+
 def create_model_from_schema(  # type: ignore[no-any-unimported]
    json_schema: dict[str, Any],
    *,
    root_schema: dict[str, Any] | None = None,
+    model_name: str | None = None,
+    enrich_descriptions: bool = False,
    __config__: ConfigDict | None = None,
    __base__: type[BaseModel] | None = None,
    __module__: str = __name__,
@@ -503,6 +568,13 @@ def create_model_from_schema(  # type: ignore[no-any-unimported]
        json_schema: A dictionary representing the JSON schema.
        root_schema: The root schema containing $defs. If not provided, the
            current schema is treated as the root schema.
+        model_name: Override for the model name. If not provided, the schema
+            ``title`` field is used, falling back to ``"DynamicModel"``.
+        enrich_descriptions: When True, augment field descriptions with
+            constraint info (format, enum, pattern, min/max, examples) via
+            :func:`build_rich_field_description`.  Useful for LLM-facing tool
+            schemas where constraints in the description help the model
+            understand parameter requirements.
        __config__: Pydantic configuration for the generated model.
        __base__: Base class for the generated model. Defaults to BaseModel.
        __module__: Module name for the generated model class.
@@ -539,10 +611,14 @@ def create_model_from_schema(  # type: ignore[no-any-unimported]
        if "title" not in json_schema and "title" in (root_schema or {}):
            json_schema["title"] = (root_schema or {}).get("title")

-    model_name = json_schema.get("title") or "DynamicModel"
+    effective_name = model_name or json_schema.get("title") or "DynamicModel"
    field_definitions = {
        name: _json_schema_to_pydantic_field(
-            name, prop, json_schema.get("required", []), effective_root
+            name,
+            prop,
+            json_schema.get("required", []),
+            effective_root,
+            enrich_descriptions=enrich_descriptions,
        )
        for name, prop in (json_schema.get("properties", {}) or {}).items()
    }
@@ -550,7 +626,7 @@ def create_model_from_schema(  # type: ignore[no-any-unimported]
    effective_config = __config__ or ConfigDict(extra="forbid")

    return create_model_base(
-        model_name,
+        effective_name,
        __config__=effective_config,
        __base__=__base__,
        __module__=__module__,
@@ -565,6 +641,8 @@ def _json_schema_to_pydantic_field(
    json_schema: dict[str, Any],
    required: list[str],
    root_schema: dict[str, Any],
+    *,
+    enrich_descriptions: bool = False,
 ) -> Any:
    """Convert a JSON schema property to a Pydantic field definition.

@@ -573,20 +651,29 @@ def _json_schema_to_pydantic_field(
        json_schema: The JSON schema for this field.
        required: List of required field names.
        root_schema: The root schema for resolving $ref.
+        enrich_descriptions: When True, embed constraints in the description.

    Returns:
        A tuple of (type, Field) for use with create_model.
    """
-    type_ = _json_schema_to_pydantic_type(json_schema, root_schema, name_=name.title())
-    description = json_schema.get("description")
-    examples = json_schema.get("examples")
+    type_ = _json_schema_to_pydantic_type(
+        json_schema, root_schema, name_=name.title(), enrich_descriptions=enrich_descriptions
+    )
    is_required = name in required

    field_params: dict[str, Any] = {}
    schema_extra: dict[str, Any] = {}

-    if description:
-        field_params["description"] = description
+    if enrich_descriptions:
+        rich_desc = build_rich_field_description(json_schema)
+        if rich_desc:
+            field_params["description"] = rich_desc
+    else:
+        description = json_schema.get("description")
+        if description:
+            field_params["description"] = description
+
+    examples = json_schema.get("examples")
    if examples:
        schema_extra["examples"] = examples

@@ -702,6 +789,7 @@ def _json_schema_to_pydantic_type(
    root_schema: dict[str, Any],
    *,
    name_: str | None = None,
+    enrich_descriptions: bool = False,
 ) -> Any:
    """Convert a JSON schema to a Python/Pydantic type.

@@ -709,6 +797,7 @@ def _json_schema_to_pydantic_type(
        json_schema: The JSON schema to convert.
        root_schema: The root schema for resolving $ref.
        name_: Optional name for nested models.
+        enrich_descriptions: Propagated to nested model creation.

    Returns:
        A Python type corresponding to the JSON schema.
@@ -716,7 +805,9 @@ def _json_schema_to_pydantic_type(
    ref = json_schema.get("$ref")
    if ref:
        ref_schema = _resolve_ref(ref, root_schema)
-        return _json_schema_to_pydantic_type(ref_schema, root_schema, name_=name_)
+        return _json_schema_to_pydantic_type(
+            ref_schema, root_schema, name_=name_, enrich_descriptions=enrich_descriptions
+        )

    enum_values = json_schema.get("enum")
    if enum_values:
@@ -731,7 +822,10 @@ def _json_schema_to_pydantic_type(
    if any_of_schemas:
        any_of_types = [
            _json_schema_to_pydantic_type(
-                schema, root_schema, name_=f"{name_ or 'Union'}Option{i}"
+                schema,
+                root_schema,
+                name_=f"{name_ or 'Union'}Option{i}",
+                enrich_descriptions=enrich_descriptions,
            )
            for i, schema in enumerate(any_of_schemas)
        ]
@@ -741,10 +835,14 @@ def _json_schema_to_pydantic_type(
    if all_of_schemas:
        if len(all_of_schemas) == 1:
            return _json_schema_to_pydantic_type(
-                all_of_schemas[0], root_schema, name_=name_
+                all_of_schemas[0], root_schema, name_=name_,
+                enrich_descriptions=enrich_descriptions,
            )
        merged = _merge_all_of_schemas(all_of_schemas, root_schema)
-        return _json_schema_to_pydantic_type(merged, root_schema, name_=name_)
+        return _json_schema_to_pydantic_type(
+            merged, root_schema, name_=name_,
+            enrich_descriptions=enrich_descriptions,
+        )

    type_ = json_schema.get("type")

@@ -760,7 +858,8 @@ def _json_schema_to_pydantic_type(
        items_schema = json_schema.get("items")
        if items_schema:
            item_type = _json_schema_to_pydantic_type(
-                items_schema, root_schema, name_=name_
+                items_schema, root_schema, name_=name_,
+                enrich_descriptions=enrich_descriptions,
            )
            return list[item_type]  # type: ignore[valid-type]
        return list
@@ -770,7 +869,10 @@ def _json_schema_to_pydantic_type(
            json_schema_ = json_schema.copy()
            if json_schema_.get("title") is None:
                json_schema_["title"] = name_ or "DynamicModel"
-            return create_model_from_schema(json_schema_, root_schema=root_schema)
+            return create_model_from_schema(
+                json_schema_, root_schema=root_schema,
+                enrich_descriptions=enrich_descriptions,
+            )
        return dict
    if type_ == "null":
        return None
--- a/lib/crewai/src/crewai/utilities/string_utils.py
+++ b/lib/crewai/src/crewai/utilities/string_utils.py
@@ -2,6 +2,7 @@
 # https://github.com/un33k/python-slugify
 # MIT License

+import hashlib
 import re
 from typing import Any, Final
 import unicodedata
@@ -40,7 +41,9 @@ def sanitize_tool_name(name: str, max_length: int = _MAX_TOOL_NAME_LENGTH) -> st
    name = name.strip("_")

    if len(name) > max_length:
-        name = name[:max_length].rstrip("_")
+        name_hash = hashlib.sha256(name.encode()).hexdigest()[:8]
+        suffix = f"_{name_hash}"
+        name = name[: max_length - len(suffix)].rstrip("_") + suffix

    return name

--- a/lib/crewai/tests/agents/test_agent_executor.py
+++ b/lib/crewai/tests/agents/test_agent_executor.py
@@ -4,6 +4,7 @@ Tests the Flow-based agent executor implementation including state management,
 flow methods, routing logic, and error handling.
 """

+import time
 from unittest.mock import Mock, patch

 import pytest
@@ -122,7 +123,7 @@ class TestAgentExecutor:
        executor.state.iterations = 10

        result = executor.check_max_iterations()
-        assert result == "force_final_answer"
+        assert result == "max_iterations_exceeded"

    def test_route_by_answer_type_action(self, mock_dependencies):
        """Test routing for AgentAction."""
@@ -462,3 +463,176 @@ class TestFlowInvoke:

        assert result == {"output": "Done"}
        assert len(executor.state.messages) >= 2
+
+
+class TestNativeToolExecution:
+    """Test native tool execution behavior."""
+
+    @pytest.fixture
+    def mock_dependencies(self):
+        llm = Mock()
+        llm.supports_stop_words.return_value = True
+
+        task = Mock()
+        task.name = "Test Task"
+        task.description = "Test"
+        task.human_input = False
+        task.response_model = None
+
+        crew = Mock()
+        crew._memory = None
+        crew.verbose = False
+        crew._train = False
+
+        agent = Mock()
+        agent.id = "test-agent-id"
+        agent.role = "Test Agent"
+        agent.verbose = False
+        agent.key = "test-key"
+
+        prompt = {"prompt": "Test {input} {tool_names} {tools}"}
+
+        tools_handler = Mock()
+        tools_handler.cache = None
+
+        return {
+            "llm": llm,
+            "task": task,
+            "crew": crew,
+            "agent": agent,
+            "prompt": prompt,
+            "max_iter": 10,
+            "tools": [],
+            "tools_names": "",
+            "stop_words": [],
+            "tools_description": "",
+            "tools_handler": tools_handler,
+        }
+
+    def test_execute_native_tool_runs_parallel_for_multiple_calls(
+        self, mock_dependencies
+    ):
+        executor = AgentExecutor(**mock_dependencies)
+
+        def slow_one() -> str:
+            time.sleep(0.2)
+            return "one"
+
+        def slow_two() -> str:
+            time.sleep(0.2)
+            return "two"
+
+        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
+        executor.state.pending_tool_calls = [
+            {
+                "id": "call_1",
+                "function": {"name": "slow_one", "arguments": "{}"},
+            },
+            {
+                "id": "call_2",
+                "function": {"name": "slow_two", "arguments": "{}"},
+            },
+        ]
+
+        started = time.perf_counter()
+        result = executor.execute_native_tool()
+        elapsed = time.perf_counter() - started
+
+        assert result == "native_tool_completed"
+        assert elapsed < 0.5
+        tool_messages = [m for m in executor.state.messages if m.get("role") == "tool"]
+        assert len(tool_messages) == 2
+        assert tool_messages[0]["tool_call_id"] == "call_1"
+        assert tool_messages[1]["tool_call_id"] == "call_2"
+
+    def test_execute_native_tool_falls_back_to_sequential_for_result_as_answer(
+        self, mock_dependencies
+    ):
+        executor = AgentExecutor(**mock_dependencies)
+
+        def slow_one() -> str:
+            time.sleep(0.2)
+            return "one"
+
+        def slow_two() -> str:
+            time.sleep(0.2)
+            return "two"
+
+        result_tool = Mock()
+        result_tool.name = "slow_one"
+        result_tool.result_as_answer = True
+        result_tool.max_usage_count = None
+        result_tool.current_usage_count = 0
+
+        executor.original_tools = [result_tool]
+        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
+        executor.state.pending_tool_calls = [
+            {
+                "id": "call_1",
+                "function": {"name": "slow_one", "arguments": "{}"},
+            },
+            {
+                "id": "call_2",
+                "function": {"name": "slow_two", "arguments": "{}"},
+            },
+        ]
+
+        started = time.perf_counter()
+        result = executor.execute_native_tool()
+        elapsed = time.perf_counter() - started
+
+        assert result == "tool_result_is_final"
+        assert elapsed >= 0.2
+        assert elapsed < 0.8
+        assert isinstance(executor.state.current_answer, AgentFinish)
+        assert executor.state.current_answer.output == "one"
+
+    def test_execute_native_tool_result_as_answer_short_circuits_remaining_calls(
+        self, mock_dependencies
+    ):
+        executor = AgentExecutor(**mock_dependencies)
+        call_counts = {"slow_one": 0, "slow_two": 0}
+
+        def slow_one() -> str:
+            call_counts["slow_one"] += 1
+            time.sleep(0.2)
+            return "one"
+
+        def slow_two() -> str:
+            call_counts["slow_two"] += 1
+            time.sleep(0.2)
+            return "two"
+
+        result_tool = Mock()
+        result_tool.name = "slow_one"
+        result_tool.result_as_answer = True
+        result_tool.max_usage_count = None
+        result_tool.current_usage_count = 0
+
+        executor.original_tools = [result_tool]
+        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
+        executor.state.pending_tool_calls = [
+            {
+                "id": "call_1",
+                "function": {"name": "slow_one", "arguments": "{}"},
+            },
+            {
+                "id": "call_2",
+                "function": {"name": "slow_two", "arguments": "{}"},
+            },
+        ]
+
+        started = time.perf_counter()
+        result = executor.execute_native_tool()
+        elapsed = time.perf_counter() - started
+
+        assert result == "tool_result_is_final"
+        assert isinstance(executor.state.current_answer, AgentFinish)
+        assert executor.state.current_answer.output == "one"
+        assert call_counts["slow_one"] == 1
+        assert call_counts["slow_two"] == 0
+        assert elapsed < 0.5
+
+        tool_messages = [m for m in executor.state.messages if m.get("role") == "tool"]
+        assert len(tool_messages) == 1
+        assert tool_messages[0]["tool_call_id"] == "call_1"
--- a/lib/crewai/tests/agents/test_async_agent_executor.py
+++ b/lib/crewai/tests/agents/test_async_agent_executor.py
@@ -2,7 +2,7 @@

 import asyncio
 from typing import Any
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, Mock, patch

 import pytest

@@ -291,6 +291,46 @@ class TestAsyncAgentExecutor:
        assert max_concurrent > 1, f"Expected concurrent execution, max concurrent was {max_concurrent}"


+class TestInvokeStepCallback:
+    """Tests for _invoke_step_callback with sync and async callbacks."""
+
+    def test_invoke_step_callback_with_sync_callback(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that a sync step callback is called normally."""
+        callback = Mock()
+        executor.step_callback = callback
+        answer = AgentFinish(thought="thinking", output="test", text="final")
+
+        executor._invoke_step_callback(answer)
+
+        callback.assert_called_once_with(answer)
+
+    def test_invoke_step_callback_with_async_callback(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that an async step callback is awaited via asyncio.run."""
+        async_callback = AsyncMock()
+        executor.step_callback = async_callback
+        answer = AgentFinish(thought="thinking", output="test", text="final")
+
+        with patch("crewai.agents.crew_agent_executor.asyncio.run") as mock_run:
+            executor._invoke_step_callback(answer)
+
+            async_callback.assert_called_once_with(answer)
+            mock_run.assert_called_once()
+
+    def test_invoke_step_callback_with_none(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that no error is raised when step_callback is None."""
+        executor.step_callback = None
+        answer = AgentFinish(thought="thinking", output="test", text="final")
+
+        # Should not raise
+        executor._invoke_step_callback(answer)
+
+
 class TestAsyncLLMResponseHelper:
    """Tests for aget_llm_response helper function."""

--- a/lib/crewai/tests/agents/test_lite_agent.py
+++ b/lib/crewai/tests/agents/test_lite_agent.py
@@ -659,7 +659,7 @@ def test_agent_kickoff_with_platform_tools(mock_get, mock_post):


@patch.dict("os.environ", {"EXA_API_KEY": "test_exa_key"})
-@patch("crewai.agent.Agent._get_external_mcp_tools")
+@patch("crewai.agent.Agent.get_mcp_tools")
@pytest.mark.vcr()
 def test_agent_kickoff_with_mcp_tools(mock_get_mcp_tools):
    """Test that Agent.kickoff() properly integrates MCP tools with LiteAgent"""
@@ -691,7 +691,7 @@ def test_agent_kickoff_with_mcp_tools(mock_get_mcp_tools):
    assert result.raw is not None

    # Verify MCP tools were retrieved
-    mock_get_mcp_tools.assert_called_once_with("https://mcp.exa.ai/mcp?api_key=test_exa_key&profile=research")
+    mock_get_mcp_tools.assert_called_once_with(["https://mcp.exa.ai/mcp?api_key=test_exa_key&profile=research"])


 # ============================================================================
@@ -1136,6 +1136,7 @@ def test_lite_agent_memory_instance_recall_and_save_called():
        successful_requests=1,
    )
    mock_memory = Mock()
+    mock_memory.read_only = False
    mock_memory.recall.return_value = []
    mock_memory.extract_memories.return_value = ["Fact one.", "Fact two."]

--- a/lib/crewai/tests/agents/test_native_tool_calling.py
+++ b/lib/crewai/tests/agents/test_native_tool_calling.py
@@ -6,13 +6,20 @@ when the LLM supports it, across multiple providers.

 from __future__ import annotations

+from collections.abc import Generator
 import os
-from unittest.mock import patch
+import threading
+import time
+from collections import Counter
+from unittest.mock import Mock, patch

 import pytest
 from pydantic import BaseModel, Field

 from crewai import Agent, Crew, Task
+from crewai.events import crewai_event_bus
+from crewai.hooks import register_after_tool_call_hook, register_before_tool_call_hook
+from crewai.hooks.tool_hooks import ToolCallHookContext
 from crewai.llm import LLM
 from crewai.tools.base_tool import BaseTool

@@ -64,6 +71,73 @@ class FailingTool(BaseTool):
    def _run(self) -> str:
        raise Exception("This tool always fails")

+
+class LocalSearchInput(BaseModel):
+    query: str = Field(description="Search query")
+
+
+class ParallelProbe:
+    """Thread-safe in-memory recorder for tool execution windows."""
+
+    _lock = threading.Lock()
+    _windows: list[tuple[str, float, float]] = []
+
+    @classmethod
+    def reset(cls) -> None:
+        with cls._lock:
+            cls._windows = []
+
+    @classmethod
+    def record(cls, tool_name: str, start: float, end: float) -> None:
+        with cls._lock:
+            cls._windows.append((tool_name, start, end))
+
+    @classmethod
+    def windows(cls) -> list[tuple[str, float, float]]:
+        with cls._lock:
+            return list(cls._windows)
+
+
+def _parallel_prompt() -> str:
+    return (
+        "This is a tool-calling compliance test. "
+        "In your next assistant turn, emit exactly 3 tool calls in the same response (parallel tool calls), in this order: "
+        "1) parallel_local_search_one(query='latest OpenAI model release notes'), "
+        "2) parallel_local_search_two(query='latest Anthropic model release notes'), "
+        "3) parallel_local_search_three(query='latest Gemini model release notes'). "
+        "Do not call any other tools and do not answer before those 3 tool calls are emitted. "
+        "After the tool results return, provide a one paragraph summary."
+    )
+
+
+def _max_concurrency(windows: list[tuple[str, float, float]]) -> int:
+    points: list[tuple[float, int]] = []
+    for _, start, end in windows:
+        points.append((start, 1))
+        points.append((end, -1))
+    points.sort(key=lambda p: (p[0], p[1]))
+
+    current = 0
+    maximum = 0
+    for _, delta in points:
+        current += delta
+        if current > maximum:
+            maximum = current
+    return maximum
+
+
+def _assert_tools_overlapped() -> None:
+    windows = ParallelProbe.windows()
+    local_windows = [
+        w
+        for w in windows
+        if w[0].startswith("parallel_local_search_")
+    ]
+
+    assert len(local_windows) >= 3, f"Expected at least 3 local tool calls, got {len(local_windows)}"
+    assert _max_concurrency(local_windows) >= 2, "Expected overlapping local tool executions"
+
+
@pytest.fixture
 def calculator_tool() -> CalculatorTool:
    """Create a calculator tool for testing."""
@@ -82,6 +156,65 @@ def failing_tool() -> BaseTool:

    )

+
+@pytest.fixture
+def parallel_tools() -> list[BaseTool]:
+    """Create local tools used to verify native parallel execution deterministically."""
+
+    class ParallelLocalSearchOne(BaseTool):
+        name: str = "parallel_local_search_one"
+        description: str = "Local search tool #1 for concurrency testing."
+        args_schema: type[BaseModel] = LocalSearchInput
+
+        def _run(self, query: str) -> str:
+            start = time.perf_counter()
+            time.sleep(1.0)
+            end = time.perf_counter()
+            ParallelProbe.record(self.name, start, end)
+            return f"[one] {query}"
+
+    class ParallelLocalSearchTwo(BaseTool):
+        name: str = "parallel_local_search_two"
+        description: str = "Local search tool #2 for concurrency testing."
+        args_schema: type[BaseModel] = LocalSearchInput
+
+        def _run(self, query: str) -> str:
+            start = time.perf_counter()
+            time.sleep(1.0)
+            end = time.perf_counter()
+            ParallelProbe.record(self.name, start, end)
+            return f"[two] {query}"
+
+    class ParallelLocalSearchThree(BaseTool):
+        name: str = "parallel_local_search_three"
+        description: str = "Local search tool #3 for concurrency testing."
+        args_schema: type[BaseModel] = LocalSearchInput
+
+        def _run(self, query: str) -> str:
+            start = time.perf_counter()
+            time.sleep(1.0)
+            end = time.perf_counter()
+            ParallelProbe.record(self.name, start, end)
+            return f"[three] {query}"
+
+    return [
+        ParallelLocalSearchOne(),
+        ParallelLocalSearchTwo(),
+        ParallelLocalSearchThree(),
+    ]
+
+
+def _attach_parallel_probe_handler() -> None:
+    @crewai_event_bus.on(ToolUsageFinishedEvent)
+    def _capture_tool_window(_source, event: ToolUsageFinishedEvent):
+        if not event.tool_name.startswith("parallel_local_search_"):
+            return
+        ParallelProbe.record(
+            event.tool_name,
+            event.started_at.timestamp(),
+            event.finished_at.timestamp(),
+        )
+
 # =============================================================================
 # OpenAI Provider Tests
 # =============================================================================
@@ -122,7 +255,7 @@ class TestOpenAINativeToolCalling:
        self, calculator_tool: CalculatorTool
    ) -> None:
        """Test OpenAI agent kickoff with mocked LLM call."""
-        llm = LLM(model="gpt-4o-mini")
+        llm = LLM(model="gpt-5-nano")

        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
            agent = Agent(
@@ -146,6 +279,174 @@ class TestOpenAINativeToolCalling:
            assert mock_call.called
            assert result is not None

+    @pytest.mark.vcr()
+    @pytest.mark.timeout(180)
+    def test_openai_parallel_native_tool_calling_test_crew(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="gpt-5-nano", temperature=1),
+            verbose=False,
+            max_iter=3,
+        )
+        task = Task(
+            description=_parallel_prompt(),
+            expected_output="A one sentence summary of both tool outputs",
+            agent=agent,
+        )
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+        assert result is not None
+        _assert_tools_overlapped()
+
+    @pytest.mark.vcr()
+    @pytest.mark.timeout(180)
+    def test_openai_parallel_native_tool_calling_test_agent_kickoff(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="gpt-4o-mini"),
+            verbose=False,
+            max_iter=3,
+        )
+        result = agent.kickoff(_parallel_prompt())
+        assert result is not None
+        _assert_tools_overlapped()
+
+    @pytest.mark.vcr()
+    @pytest.mark.timeout(180)
+    def test_openai_parallel_native_tool_calling_tool_hook_parity_crew(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        hook_calls: dict[str, list[dict[str, str]]] = {"before": [], "after": []}
+
+        def before_hook(context: ToolCallHookContext) -> bool | None:
+            if context.tool_name.startswith("parallel_local_search_"):
+                hook_calls["before"].append(
+                    {
+                        "tool_name": context.tool_name,
+                        "query": str(context.tool_input.get("query", "")),
+                    }
+                )
+            return None
+
+        def after_hook(context: ToolCallHookContext) -> str | None:
+            if context.tool_name.startswith("parallel_local_search_"):
+                hook_calls["after"].append(
+                    {
+                        "tool_name": context.tool_name,
+                        "query": str(context.tool_input.get("query", "")),
+                    }
+                )
+            return None
+
+        register_before_tool_call_hook(before_hook)
+        register_after_tool_call_hook(after_hook)
+
+        try:
+            agent = Agent(
+                role="Parallel Tool Agent",
+                goal="Use both tools exactly as instructed",
+                backstory="You follow tool instructions precisely.",
+                tools=parallel_tools,
+                llm=LLM(model="gpt-5-nano", temperature=1),
+                verbose=False,
+                max_iter=3,
+            )
+            task = Task(
+                description=_parallel_prompt(),
+                expected_output="A one sentence summary of both tool outputs",
+                agent=agent,
+            )
+            crew = Crew(agents=[agent], tasks=[task])
+            result = crew.kickoff()
+
+            assert result is not None
+            _assert_tools_overlapped()
+
+            before_names = [call["tool_name"] for call in hook_calls["before"]]
+            after_names = [call["tool_name"] for call in hook_calls["after"]]
+            assert len(before_names) >= 3, "Expected before hooks for all parallel calls"
+            assert Counter(before_names) == Counter(after_names)
+            assert all(call["query"] for call in hook_calls["before"])
+            assert all(call["query"] for call in hook_calls["after"])
+        finally:
+            from crewai.hooks import (
+                unregister_after_tool_call_hook,
+                unregister_before_tool_call_hook,
+            )
+
+            unregister_before_tool_call_hook(before_hook)
+            unregister_after_tool_call_hook(after_hook)
+
+    @pytest.mark.vcr()
+    @pytest.mark.timeout(180)
+    def test_openai_parallel_native_tool_calling_tool_hook_parity_agent_kickoff(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        hook_calls: dict[str, list[dict[str, str]]] = {"before": [], "after": []}
+
+        def before_hook(context: ToolCallHookContext) -> bool | None:
+            if context.tool_name.startswith("parallel_local_search_"):
+                hook_calls["before"].append(
+                    {
+                        "tool_name": context.tool_name,
+                        "query": str(context.tool_input.get("query", "")),
+                    }
+                )
+            return None
+
+        def after_hook(context: ToolCallHookContext) -> str | None:
+            if context.tool_name.startswith("parallel_local_search_"):
+                hook_calls["after"].append(
+                    {
+                        "tool_name": context.tool_name,
+                        "query": str(context.tool_input.get("query", "")),
+                    }
+                )
+            return None
+
+        register_before_tool_call_hook(before_hook)
+        register_after_tool_call_hook(after_hook)
+
+        try:
+            agent = Agent(
+                role="Parallel Tool Agent",
+                goal="Use both tools exactly as instructed",
+                backstory="You follow tool instructions precisely.",
+                tools=parallel_tools,
+                llm=LLM(model="gpt-5-nano", temperature=1),
+                verbose=False,
+                max_iter=3,
+            )
+            result = agent.kickoff(_parallel_prompt())
+
+            assert result is not None
+            _assert_tools_overlapped()
+
+            before_names = [call["tool_name"] for call in hook_calls["before"]]
+            after_names = [call["tool_name"] for call in hook_calls["after"]]
+            assert len(before_names) >= 3, "Expected before hooks for all parallel calls"
+            assert Counter(before_names) == Counter(after_names)
+            assert all(call["query"] for call in hook_calls["before"])
+            assert all(call["query"] for call in hook_calls["after"])
+        finally:
+            from crewai.hooks import (
+                unregister_after_tool_call_hook,
+                unregister_before_tool_call_hook,
+            )
+
+            unregister_before_tool_call_hook(before_hook)
+            unregister_after_tool_call_hook(after_hook)
+

 # =============================================================================
 # Anthropic Provider Tests
@@ -217,6 +518,46 @@ class TestAnthropicNativeToolCalling:
            assert mock_call.called
            assert result is not None

+    @pytest.mark.vcr()
+    def test_anthropic_parallel_native_tool_calling_test_crew(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="anthropic/claude-sonnet-4-6"),
+            verbose=False,
+            max_iter=3,
+        )
+        task = Task(
+            description=_parallel_prompt(),
+            expected_output="A one sentence summary of both tool outputs",
+            agent=agent,
+        )
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+        assert result is not None
+        _assert_tools_overlapped()
+
+    @pytest.mark.vcr()
+    def test_anthropic_parallel_native_tool_calling_test_agent_kickoff(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="anthropic/claude-sonnet-4-6"),
+            verbose=False,
+            max_iter=3,
+        )
+        result = agent.kickoff(_parallel_prompt())
+        assert result is not None
+        _assert_tools_overlapped()
+

 # =============================================================================
 # Google/Gemini Provider Tests
@@ -247,7 +588,7 @@ class TestGeminiNativeToolCalling:
            goal="Help users with mathematical calculations",
            backstory="You are a helpful math assistant.",
            tools=[calculator_tool],
-            llm=LLM(model="gemini/gemini-2.0-flash-exp"),
+            llm=LLM(model="gemini/gemini-2.5-flash"),
        )

        task = Task(
@@ -266,7 +607,7 @@ class TestGeminiNativeToolCalling:
        self, calculator_tool: CalculatorTool
    ) -> None:
        """Test Gemini agent kickoff with mocked LLM call."""
-        llm = LLM(model="gemini/gemini-2.0-flash-001")
+        llm = LLM(model="gemini/gemini-2.5-flash")

        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
            agent = Agent(
@@ -290,6 +631,46 @@ class TestGeminiNativeToolCalling:
            assert mock_call.called
            assert result is not None

+    @pytest.mark.vcr()
+    def test_gemini_parallel_native_tool_calling_test_crew(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="gemini/gemini-2.5-flash"),
+            verbose=False,
+            max_iter=3,
+        )
+        task = Task(
+            description=_parallel_prompt(),
+            expected_output="A one sentence summary of both tool outputs",
+            agent=agent,
+        )
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+        assert result is not None
+        _assert_tools_overlapped()
+
+    @pytest.mark.vcr()
+    def test_gemini_parallel_native_tool_calling_test_agent_kickoff(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="gemini/gemini-2.5-flash"),
+            verbose=False,
+            max_iter=3,
+        )
+        result = agent.kickoff(_parallel_prompt())
+        assert result is not None
+        _assert_tools_overlapped()
+

 # =============================================================================
 # Azure Provider Tests
@@ -324,7 +705,7 @@ class TestAzureNativeToolCalling:
            goal="Help users with mathematical calculations",
            backstory="You are a helpful math assistant.",
            tools=[calculator_tool],
-            llm=LLM(model="azure/gpt-4o-mini"),
+            llm=LLM(model="azure/gpt-5-nano"),
            verbose=False,
            max_iter=3,
        )
@@ -347,7 +728,7 @@ class TestAzureNativeToolCalling:
    ) -> None:
        """Test Azure agent kickoff with mocked LLM call."""
        llm = LLM(
-            model="azure/gpt-4o-mini",
+            model="azure/gpt-5-nano",
            api_key="test-key",
            base_url="https://test.openai.azure.com",
        )
@@ -374,6 +755,46 @@ class TestAzureNativeToolCalling:
            assert mock_call.called
            assert result is not None

+    @pytest.mark.vcr()
+    def test_azure_parallel_native_tool_calling_test_crew(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="azure/gpt-5-nano"),
+            verbose=False,
+            max_iter=3,
+        )
+        task = Task(
+            description=_parallel_prompt(),
+            expected_output="A one sentence summary of both tool outputs",
+            agent=agent,
+        )
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+        assert result is not None
+        _assert_tools_overlapped()
+
+    @pytest.mark.vcr()
+    def test_azure_parallel_native_tool_calling_test_agent_kickoff(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="azure/gpt-5-nano"),
+            verbose=False,
+            max_iter=3,
+        )
+        result = agent.kickoff(_parallel_prompt())
+        assert result is not None
+        _assert_tools_overlapped()
+

 # =============================================================================
 # Bedrock Provider Tests
@@ -384,18 +805,30 @@ class TestBedrockNativeToolCalling:
    """Tests for native tool calling with AWS Bedrock models."""

    @pytest.fixture(autouse=True)
-    def mock_aws_env(self):
-        """Mock AWS environment variables for tests."""
-        env_vars = {
-        "AWS_ACCESS_KEY_ID": "test-key",
-        "AWS_SECRET_ACCESS_KEY": "test-secret",
-        "AWS_REGION": "us-east-1",
-        }
-        if "AWS_ACCESS_KEY_ID" not in os.environ:
-            with patch.dict(os.environ, env_vars):
-                yield
-        else:
-            yield
+    def validate_bedrock_credentials_for_live_recording(self):
+        """Run Bedrock tests only when explicitly enabled."""
+        run_live_bedrock = os.getenv("RUN_BEDROCK_LIVE_TESTS", "false").lower() == "true"
+
+        if not run_live_bedrock:
+            pytest.skip(
+                "Skipping Bedrock tests by default. "
+                "Set RUN_BEDROCK_LIVE_TESTS=true with valid AWS credentials to enable."
+            )
+
+        access_key = os.getenv("AWS_ACCESS_KEY_ID", "")
+        secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "")
+        if (
+            not access_key
+            or not secret_key
+            or access_key.startswith(("fake-", "test-"))
+            or secret_key.startswith(("fake-", "test-"))
+        ):
+            pytest.skip(
+                "Skipping Bedrock tests: valid AWS credentials are required when "
+                "RUN_BEDROCK_LIVE_TESTS=true."
+            )
+
+        yield

    @pytest.mark.vcr()
    def test_bedrock_agent_kickoff_with_tools_mocked(
@@ -427,6 +860,46 @@ class TestBedrockNativeToolCalling:
        assert result.raw is not None
        assert "120" in str(result.raw)

+    @pytest.mark.vcr()
+    def test_bedrock_parallel_native_tool_calling_test_crew(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="bedrock/anthropic.claude-3-haiku-20240307-v1:0"),
+            verbose=False,
+            max_iter=3,
+        )
+        task = Task(
+            description=_parallel_prompt(),
+            expected_output="A one sentence summary of both tool outputs",
+            agent=agent,
+        )
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+        assert result is not None
+        _assert_tools_overlapped()
+
+    @pytest.mark.vcr()
+    def test_bedrock_parallel_native_tool_calling_test_agent_kickoff(
+        self, parallel_tools: list[BaseTool]
+    ) -> None:
+        agent = Agent(
+            role="Parallel Tool Agent",
+            goal="Use both tools exactly as instructed",
+            backstory="You follow tool instructions precisely.",
+            tools=parallel_tools,
+            llm=LLM(model="bedrock/anthropic.claude-3-haiku-20240307-v1:0"),
+            verbose=False,
+            max_iter=3,
+        )
+        result = agent.kickoff(_parallel_prompt())
+        assert result is not None
+        _assert_tools_overlapped()
+

 # =============================================================================
 # Cross-Provider Native Tool Calling Behavior Tests
@@ -439,7 +912,7 @@ class TestNativeToolCallingBehavior:
    def test_supports_function_calling_check(self) -> None:
        """Test that supports_function_calling() is properly checked."""
        # OpenAI should support function calling
-        openai_llm = LLM(model="gpt-4o-mini")
+        openai_llm = LLM(model="gpt-5-nano")
        assert hasattr(openai_llm, "supports_function_calling")
        assert openai_llm.supports_function_calling() is True

@@ -475,7 +948,7 @@ class TestNativeToolCallingTokenUsage:
            goal="Perform calculations efficiently",
            backstory="You calculate things.",
            tools=[calculator_tool],
-            llm=LLM(model="gpt-4o-mini"),
+            llm=LLM(model="gpt-5-nano"),
            verbose=False,
            max_iter=3,
        )
@@ -519,7 +992,7 @@ def test_native_tool_calling_error_handling(failing_tool: FailingTool):
        goal="Perform calculations efficiently",
        backstory="You calculate things.",
        tools=[failing_tool],
-        llm=LLM(model="gpt-4o-mini"),
+        llm=LLM(model="gpt-5-nano"),
        verbose=False,
        max_iter=3,
    )
@@ -578,7 +1051,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Call the counting tool multiple times",
            backstory="You are an agent that counts things.",
            tools=[tool],
-            llm=LLM(model="gpt-4o-mini"),
+            llm=LLM(model="gpt-5-nano"),
            verbose=False,
            max_iter=5,
        )
@@ -606,7 +1079,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Use the counting tool as many times as requested",
            backstory="You are an agent that counts things. You must try to use the tool for each value requested.",
            tools=[tool],
-            llm=LLM(model="gpt-4o-mini"),
+            llm=LLM(model="gpt-5-nano"),
            verbose=False,
            max_iter=5,
        )
@@ -638,7 +1111,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Use the counting tool exactly as requested",
            backstory="You are an agent that counts things precisely.",
            tools=[tool],
-            llm=LLM(model="gpt-4o-mini"),
+            llm=LLM(model="gpt-5-nano"),
            verbose=False,
            max_iter=5,
        )
@@ -653,5 +1126,153 @@ class TestMaxUsageCountWithNativeToolCalling:
        result = crew.kickoff()

        assert result is not None
-        # Verify usage count was incremented for each successful call
-        assert tool.current_usage_count == 2
+        # Verify the requested calls occurred while keeping usage bounded.
+        assert tool.current_usage_count >= 2
+        assert tool.current_usage_count <= tool.max_usage_count
+
+
+# =============================================================================
+# JSON Parse Error Handling Tests
+# =============================================================================
+
+
+class TestNativeToolCallingJsonParseError:
+    """Tests that malformed JSON tool arguments produce clear errors
+    instead of silently dropping all arguments."""
+
+    def _make_executor(self, tools: list[BaseTool]) -> "CrewAgentExecutor":
+        """Create a minimal CrewAgentExecutor with mocked dependencies."""
+        from crewai.agents.crew_agent_executor import CrewAgentExecutor
+        from crewai.tools.base_tool import to_langchain
+
+        structured_tools = to_langchain(tools)
+        mock_agent = Mock()
+        mock_agent.key = "test_agent"
+        mock_agent.role = "tester"
+        mock_agent.verbose = False
+        mock_agent.fingerprint = None
+        mock_agent.tools_results = []
+
+        mock_task = Mock()
+        mock_task.name = "test"
+        mock_task.description = "test"
+        mock_task.id = "test-id"
+
+        executor = object.__new__(CrewAgentExecutor)
+        executor.agent = mock_agent
+        executor.task = mock_task
+        executor.crew = Mock()
+        executor.tools = structured_tools
+        executor.original_tools = tools
+        executor.tools_handler = None
+        executor._printer = Mock()
+        executor.messages = []
+
+        return executor
+
+    def test_malformed_json_returns_parse_error(self) -> None:
+        """Malformed JSON args must return a descriptive error, not silently become {}."""
+
+        class CodeTool(BaseTool):
+            name: str = "execute_code"
+            description: str = "Run code"
+
+            def _run(self, code: str) -> str:
+                return f"ran: {code}"
+
+        tool = CodeTool()
+        executor = self._make_executor([tool])
+
+        from crewai.utilities.agent_utils import convert_tools_to_openai_schema
+        _, available_functions, _ = convert_tools_to_openai_schema([tool])
+
+        malformed_json = '{"code": "print("hello")"}'
+
+        result = executor._execute_single_native_tool_call(
+            call_id="call_123",
+            func_name="execute_code",
+            func_args=malformed_json,
+            available_functions=available_functions,
+        )
+
+        assert "Failed to parse tool arguments as JSON" in result["result"]
+        assert tool.current_usage_count == 0
+
+    def test_valid_json_still_executes_normally(self) -> None:
+        """Valid JSON args should execute the tool as before."""
+
+        class CodeTool(BaseTool):
+            name: str = "execute_code"
+            description: str = "Run code"
+
+            def _run(self, code: str) -> str:
+                return f"ran: {code}"
+
+        tool = CodeTool()
+        executor = self._make_executor([tool])
+
+        from crewai.utilities.agent_utils import convert_tools_to_openai_schema
+        _, available_functions, _ = convert_tools_to_openai_schema([tool])
+
+        valid_json = '{"code": "print(1)"}'
+
+        result = executor._execute_single_native_tool_call(
+            call_id="call_456",
+            func_name="execute_code",
+            func_args=valid_json,
+            available_functions=available_functions,
+        )
+
+        assert result["result"] == "ran: print(1)"
+
+    def test_dict_args_bypass_json_parsing(self) -> None:
+        """When func_args is already a dict, no JSON parsing occurs."""
+
+        class CodeTool(BaseTool):
+            name: str = "execute_code"
+            description: str = "Run code"
+
+            def _run(self, code: str) -> str:
+                return f"ran: {code}"
+
+        tool = CodeTool()
+        executor = self._make_executor([tool])
+
+        from crewai.utilities.agent_utils import convert_tools_to_openai_schema
+        _, available_functions, _ = convert_tools_to_openai_schema([tool])
+
+        result = executor._execute_single_native_tool_call(
+            call_id="call_789",
+            func_name="execute_code",
+            func_args={"code": "x = 42"},
+            available_functions=available_functions,
+        )
+
+        assert result["result"] == "ran: x = 42"
+
+    def test_schema_validation_catches_missing_args_on_native_path(self) -> None:
+        """The native function calling path should now enforce args_schema,
+        catching missing required fields before _run is called."""
+
+        class StrictTool(BaseTool):
+            name: str = "strict_tool"
+            description: str = "A tool with required args"
+
+            def _run(self, code: str, language: str) -> str:
+                return f"{language}: {code}"
+
+        tool = StrictTool()
+        executor = self._make_executor([tool])
+
+        from crewai.utilities.agent_utils import convert_tools_to_openai_schema
+        _, available_functions, _ = convert_tools_to_openai_schema([tool])
+
+        result = executor._execute_single_native_tool_call(
+            call_id="call_schema",
+            func_name="strict_tool",
+            func_args={"code": "print(1)"},
+            available_functions=available_functions,
+        )
+
+        assert "Error" in result["result"]
+        assert "validation failed" in result["result"].lower() or "missing" in result["result"].lower()
--- a/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -0,0 +1,247 @@
+interactions:
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
+      This is a tool-calling compliance test. In your next assistant turn, emit exactly
+      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
+      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
+      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
+      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
+      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
+      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '1639'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      x-api-key:
+      - X-API-KEY-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-sonnet-4-6","id":"msg_01XeN1XTXZgmPyLMMGjivabb","type":"message","role":"assistant","content":[{"type":"text","text":"I''ll
+        execute all 3 parallel searches simultaneously right now!"},{"type":"tool_use","id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","name":"parallel_local_search_one","input":{"query":"latest
+        OpenAI model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","name":"parallel_local_search_two","input":{"query":"latest
+        Anthropic model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","name":"parallel_local_search_three","input":{"query":"latest
+        Gemini model release notes"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":914,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":169,"service_tier":"standard","inference_geo":"global"}}'
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 18 Feb 2026 23:54:43 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '20000'
+      anthropic-ratelimit-requests-remaining:
+      - '19999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-18T23:54:41Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '2099'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
+      This is a tool-calling compliance test. In your next assistant turn, emit exactly
+      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."},{"role":"assistant","content":[{"type":"tool_use","id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","name":"parallel_local_search_one","input":{"query":"latest
+      OpenAI model release notes"}},{"type":"tool_use","id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","name":"parallel_local_search_two","input":{"query":"latest
+      Anthropic model release notes"}},{"type":"tool_use","id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","name":"parallel_local_search_three","input":{"query":"latest
+      Gemini model release notes"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","content":"[one]
+      latest OpenAI model release notes"},{"type":"tool_result","tool_use_id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","content":"[two]
+      latest Anthropic model release notes"},{"type":"tool_result","tool_use_id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","content":"[three]
+      latest Gemini model release notes"}]}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
+      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
+      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
+      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
+      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
+      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '2517'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      x-api-key:
+      - X-API-KEY-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: "{\"model\":\"claude-sonnet-4-6\",\"id\":\"msg_01PFXqwwdwwHWadPdtNU5tUZ\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"The
+        three parallel searches were executed successfully, each targeting the latest
+        release notes for the leading AI model families. The search results confirm
+        that queries were dispatched simultaneously to retrieve the most recent developments
+        from **OpenAI** (via tool one), **Anthropic** (via tool two), and **Google's
+        Gemini** (via tool three). While the local search tools returned placeholder
+        outputs in this test environment rather than detailed release notes, the structure
+        of the test validates that all three parallel tool calls were emitted correctly
+        and in the specified order \u2014 demonstrating proper concurrent tool-call
+        behavior with no dependencies between the three independent searches.\"}],\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"usage\":{\"input_tokens\":1197,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":131,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 18 Feb 2026 23:54:49 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '20000'
+      anthropic-ratelimit-requests-remaining:
+      - '19999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-18T23:54:44Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '4092'
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_crew.yaml
@@ -0,0 +1,254 @@
+interactions:
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
+      This is a tool-calling compliance test. In your next assistant turn, emit exactly
+      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
+      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
+      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
+      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
+      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
+      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '1820'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      x-api-key:
+      - X-API-KEY-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-sonnet-4-6","id":"msg_01RJ4CphwpmkmsJFJjeCNvXz","type":"message","role":"assistant","content":[{"type":"text","text":"I''ll
+        execute all 3 parallel tool calls simultaneously right away!"},{"type":"tool_use","id":"toolu_01YWY3cSomRuv4USmq55Prk3","name":"parallel_local_search_one","input":{"query":"latest
+        OpenAI model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","name":"parallel_local_search_two","input":{"query":"latest
+        Anthropic model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","name":"parallel_local_search_three","input":{"query":"latest
+        Gemini model release notes"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":951,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":170,"service_tier":"standard","inference_geo":"global"}}'
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 18 Feb 2026 23:54:51 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '20000'
+      anthropic-ratelimit-requests-remaining:
+      - '19999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-18T23:54:49Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '1967'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
+      This is a tool-calling compliance test. In your next assistant turn, emit exactly
+      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."},{"role":"assistant","content":[{"type":"tool_use","id":"toolu_01YWY3cSomRuv4USmq55Prk3","name":"parallel_local_search_one","input":{"query":"latest
+      OpenAI model release notes"}},{"type":"tool_use","id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","name":"parallel_local_search_two","input":{"query":"latest
+      Anthropic model release notes"}},{"type":"tool_use","id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","name":"parallel_local_search_three","input":{"query":"latest
+      Gemini model release notes"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01YWY3cSomRuv4USmq55Prk3","content":"[one]
+      latest OpenAI model release notes"},{"type":"tool_result","tool_use_id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","content":"[two]
+      latest Anthropic model release notes"},{"type":"tool_result","tool_use_id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","content":"[three]
+      latest Gemini model release notes"}]},{"role":"user","content":"Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
+      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
+      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
+      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
+      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
+      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '2882'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      x-api-key:
+      - X-API-KEY-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: "{\"model\":\"claude-sonnet-4-6\",\"id\":\"msg_0143MHUne1az3Tt69EoLjyZd\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"Here
+        is the complete content returned from all three tool calls:\\n\\n- **parallel_local_search_one**
+        result: `[one] latest OpenAI model release notes`\\n- **parallel_local_search_two**
+        result: `[two] latest Anthropic model release notes`\\n- **parallel_local_search_three**
+        result: `[three] latest Gemini model release notes`\\n\\nAll three parallel
+        tool calls were executed successfully in the same response turn, returning
+        their respective outputs: the first tool searched for the latest OpenAI model
+        release notes, the second tool searched for the latest Anthropic model release
+        notes, and the third tool searched for the latest Gemini model release notes
+        \u2014 confirming that all search queries were dispatched concurrently and
+        their results retrieved as expected.\"}],\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"usage\":{\"input_tokens\":1272,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":172,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 18 Feb 2026 23:54:55 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '20000'
+      anthropic-ratelimit-requests-remaining:
+      - '19999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-18T23:54:52Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '3144'
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_agent_with_native_tool_calling.yaml
@@ -5,20 +5,19 @@ interactions:
      calculations"}, {"role": "user", "content": "\nCurrent Task: Calculate what
      is 15 * 8\n\nThis is the expected criteria for your final answer: The result
      of the calculation\nyou MUST return the actual complete content as the final
-      answer, not a summary.\n\nThis is VERY important to you, your job depends on
-      it!"}], "stream": false, "stop": ["\nObservation:"], "tool_choice": "auto",
-      "tools": [{"function": {"name": "calculator", "description": "Perform mathematical
-      calculations. Use this for any math operations.", "parameters": {"properties":
-      {"expression": {"description": "Mathematical expression to evaluate", "title":
-      "Expression", "type": "string"}}, "required": ["expression"], "type": "object"}},
-      "type": "function"}]}'
+      answer, not a summary."}], "stream": false, "tool_choice": "auto", "tools":
+      [{"function": {"name": "calculator", "description": "Perform mathematical calculations.
+      Use this for any math operations.", "parameters": {"properties": {"expression":
+      {"description": "Mathematical expression to evaluate", "title": "Expression",
+      "type": "string"}}, "required": ["expression"], "type": "object", "additionalProperties":
+      false}}, "type": "function"}]}'
    headers:
      Accept:
      - application/json
      Connection:
      - keep-alive
      Content-Length:
-      - '883'
+      - '828'
      Content-Type:
      - application/json
      User-Agent:
@@ -32,20 +31,20 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-12-01-preview
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
  response:
    body:
      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"expression\":\"15
-        * 8\"}","name":"calculator"},"id":"call_cJWzKh5LdBpY3Sk8GATS3eRe","type":"function"}]}}],"created":1769122114,"id":"chatcmpl-D0xlavS0V3m00B9Fsjyv39xQWUGFV","model":"gpt-4o-mini-2024-07-18","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f97eff32c5","usage":{"completion_tokens":18,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":137,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":155}}
+        * 8\"}","name":"calculator"},"id":"call_Cow46pNllpDx0pxUgZFeqlh1","type":"function"}]}}],"created":1771459544,"id":"chatcmpl-DAlq4osCP9ABJ1HyXFBoYWylMg0bi","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":219,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":208,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":427}}

        '
    headers:
      Content-Length:
-      - '1058'
+      - '1049'
      Content-Type:
      - application/json
      Date:
-      - Thu, 22 Jan 2026 22:48:34 GMT
+      - Thu, 19 Feb 2026 00:05:45 GMT
      Strict-Transport-Security:
      - STS-XXX
      apim-request-id:
@@ -59,7 +58,7 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
      x-ms-deployment-name:
-      - gpt-4o-mini
+      - gpt-5-nano
      x-ms-rai-invoked:
      - 'true'
      x-ms-region:
@@ -83,26 +82,25 @@ interactions:
      calculations"}, {"role": "user", "content": "\nCurrent Task: Calculate what
      is 15 * 8\n\nThis is the expected criteria for your final answer: The result
      of the calculation\nyou MUST return the actual complete content as the final
-      answer, not a summary.\n\nThis is VERY important to you, your job depends on
-      it!"}, {"role": "assistant", "content": "", "tool_calls": [{"id": "call_cJWzKh5LdBpY3Sk8GATS3eRe",
-      "type": "function", "function": {"name": "calculator", "arguments": "{\"expression\":\"15
-      * 8\"}"}}]}, {"role": "tool", "tool_call_id": "call_cJWzKh5LdBpY3Sk8GATS3eRe",
-      "content": "The result of 15 * 8 is 120"}, {"role": "user", "content": "Analyze
-      the tool result. If requirements are met, provide the Final Answer. Otherwise,
-      call the next tool. Deliver only the answer without meta-commentary."}], "stream":
-      false, "stop": ["\nObservation:"], "tool_choice": "auto", "tools": [{"function":
-      {"name": "calculator", "description": "Perform mathematical calculations. Use
-      this for any math operations.", "parameters": {"properties": {"expression":
-      {"description": "Mathematical expression to evaluate", "title": "Expression",
-      "type": "string"}}, "required": ["expression"], "type": "object"}}, "type":
-      "function"}]}'
+      answer, not a summary."}, {"role": "assistant", "content": "", "tool_calls":
+      [{"id": "call_Cow46pNllpDx0pxUgZFeqlh1", "type": "function", "function": {"name":
+      "calculator", "arguments": "{\"expression\":\"15 * 8\"}"}}]}, {"role": "tool",
+      "tool_call_id": "call_Cow46pNllpDx0pxUgZFeqlh1", "content": "The result of 15
+      * 8 is 120"}, {"role": "user", "content": "Analyze the tool result. If requirements
+      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
+      the answer without meta-commentary."}], "stream": false, "tool_choice": "auto",
+      "tools": [{"function": {"name": "calculator", "description": "Perform mathematical
+      calculations. Use this for any math operations.", "parameters": {"properties":
+      {"expression": {"description": "Mathematical expression to evaluate", "title":
+      "Expression", "type": "string"}}, "required": ["expression"], "type": "object",
+      "additionalProperties": false}}, "type": "function"}]}'
    headers:
      Accept:
      - application/json
      Connection:
      - keep-alive
      Content-Length:
-      - '1375'
+      - '1320'
      Content-Type:
      - application/json
      User-Agent:
@@ -116,20 +114,19 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-12-01-preview
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
  response:
    body:
-      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
-        result of the calculation is 120.","refusal":null,"role":"assistant"}}],"created":1769122115,"id":"chatcmpl-D0xlbUNVA7RVkn0GsuBGoNhgQTtac","model":"gpt-4o-mini-2024-07-18","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f97eff32c5","usage":{"completion_tokens":11,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":207,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":218}}
+      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"120","refusal":null,"role":"assistant"}}],"created":1771459547,"id":"chatcmpl-DAlq7zJimnIMoXieNww8jY5f2pIPd","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":203,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":284,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":487}}

        '
    headers:
      Content-Length:
-      - '1250'
+      - '1207'
      Content-Type:
      - application/json
      Date:
-      - Thu, 22 Jan 2026 22:48:34 GMT
+      - Thu, 19 Feb 2026 00:05:49 GMT
      Strict-Transport-Security:
      - STS-XXX
      apim-request-id:
@@ -143,7 +140,7 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
      x-ms-deployment-name:
-      - gpt-4o-mini
+      - gpt-5-nano
      x-ms-rai-invoked:
      - 'true'
      x-ms-region:
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -0,0 +1,198 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
+      You follow tool instructions precisely.\nYour personal goal is: Use both tools
+      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
+      a tool-calling compliance test. In your next assistant turn, emit exactly 3
+      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."}], "stream": false, "tool_choice": "auto", "tools": [{"function":
+      {"name": "parallel_local_search_one", "description": "Local search tool #1 for
+      concurrency testing.", "parameters": {"properties": {"query": {"description":
+      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
+      "type": "object", "additionalProperties": false}}, "type": "function"}, {"function":
+      {"name": "parallel_local_search_two", "description": "Local search tool #2 for
+      concurrency testing.", "parameters": {"properties": {"query": {"description":
+      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
+      "type": "object", "additionalProperties": false}}, "type": "function"}, {"function":
+      {"name": "parallel_local_search_three", "description": "Local search tool #3
+      for concurrency testing.", "parameters": {"properties": {"query": {"description":
+      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
+      "type": "object", "additionalProperties": false}}, "type": "function"}]}'
+    headers:
+      Accept:
+      - application/json
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '1763'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      api-key:
+      - X-API-KEY-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      x-ms-client-request-id:
+      - X-MS-CLIENT-REQUEST-ID-XXX
+    method: POST
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
+  response:
+    body:
+      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"query\":
+        \"latest OpenAI model release notes\"}","name":"parallel_local_search_one"},"id":"call_emQmocGydKuxvESfQopNngdm","type":"function"},{"function":{"arguments":"{\"query\":
+        \"latest Anthropic model release notes\"}","name":"parallel_local_search_two"},"id":"call_eNpK9WUYFCX2ZEUPhYCKvdMs","type":"function"},{"function":{"arguments":"{\"query\":
+        \"latest Gemini model release notes\"}","name":"parallel_local_search_three"},"id":"call_Wdtl6jFxGehSUMn5I1O4Mrdx","type":"function"}]}}],"created":1771459550,"id":"chatcmpl-DAlqAyJGnQKDkNCaTcjU2T8BeJaXM","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":666,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":576,"rejected_prediction_tokens":0},"prompt_tokens":343,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":1009}}
+
+        '
+    headers:
+      Content-Length:
+      - '1433'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 19 Feb 2026 00:05:55 GMT
+      Strict-Transport-Security:
+      - STS-XXX
+      apim-request-id:
+      - APIM-REQUEST-ID-XXX
+      azureml-model-session:
+      - AZUREML-MODEL-SESSION-XXX
+      x-accel-buffering:
+      - 'no'
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+      x-ms-client-request-id:
+      - X-MS-CLIENT-REQUEST-ID-XXX
+      x-ms-deployment-name:
+      - gpt-5-nano
+      x-ms-rai-invoked:
+      - 'true'
+      x-ms-region:
+      - X-MS-REGION-XXX
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
+      You follow tool instructions precisely.\nYour personal goal is: Use both tools
+      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
+      a tool-calling compliance test. In your next assistant turn, emit exactly 3
+      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."}, {"role": "assistant", "content": "", "tool_calls": [{"id":
+      "call_emQmocGydKuxvESfQopNngdm", "type": "function", "function": {"name": "parallel_local_search_one",
+      "arguments": "{\"query\": \"latest OpenAI model release notes\"}"}}, {"id":
+      "call_eNpK9WUYFCX2ZEUPhYCKvdMs", "type": "function", "function": {"name": "parallel_local_search_two",
+      "arguments": "{\"query\": \"latest Anthropic model release notes\"}"}}, {"id":
+      "call_Wdtl6jFxGehSUMn5I1O4Mrdx", "type": "function", "function": {"name": "parallel_local_search_three",
+      "arguments": "{\"query\": \"latest Gemini model release notes\"}"}}]}, {"role":
+      "tool", "tool_call_id": "call_emQmocGydKuxvESfQopNngdm", "content": "[one] latest
+      OpenAI model release notes"}, {"role": "tool", "tool_call_id": "call_eNpK9WUYFCX2ZEUPhYCKvdMs",
+      "content": "[two] latest Anthropic model release notes"}, {"role": "tool", "tool_call_id":
+      "call_Wdtl6jFxGehSUMn5I1O4Mrdx", "content": "[three] latest Gemini model release
+      notes"}], "stream": false, "tool_choice": "auto", "tools": [{"function": {"name":
+      "parallel_local_search_one", "description": "Local search tool #1 for concurrency
+      testing.", "parameters": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}, "type": "function"}, {"function": {"name":
+      "parallel_local_search_two", "description": "Local search tool #2 for concurrency
+      testing.", "parameters": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}, "type": "function"}, {"function": {"name":
+      "parallel_local_search_three", "description": "Local search tool #3 for concurrency
+      testing.", "parameters": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}, "type": "function"}]}'
+    headers:
+      Accept:
+      - application/json
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '2727'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      api-key:
+      - X-API-KEY-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      x-ms-client-request-id:
+      - X-MS-CLIENT-REQUEST-ID-XXX
+    method: POST
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
+  response:
+    body:
+      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
+        latest release notes have been published for the OpenAI, Anthropic, and Gemini
+        models, signaling concurrent updates across the leading AI model families.
+        Each set outlines new capabilities and performance improvements, along with
+        changes to APIs, tooling, and deployment guidelines. Users should review the
+        individual notes to understand new features, adjustments to tokenization,
+        latency or throughput, safety and alignment enhancements, pricing or access
+        changes, and any breaking changes or migration steps required to adopt the
+        updated models in existing workflows.","refusal":null,"role":"assistant"}}],"created":1771459556,"id":"chatcmpl-DAlqGKWXfGNlTIbDY9F6oHQp6hbxM","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":747,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":640,"rejected_prediction_tokens":0},"prompt_tokens":467,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":1214}}
+
+        '
+    headers:
+      Content-Length:
+      - '1778'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 19 Feb 2026 00:06:02 GMT
+      Strict-Transport-Security:
+      - STS-XXX
+      apim-request-id:
+      - APIM-REQUEST-ID-XXX
+      azureml-model-session:
+      - AZUREML-MODEL-SESSION-XXX
+      x-accel-buffering:
+      - 'no'
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+      x-ms-client-request-id:
+      - X-MS-CLIENT-REQUEST-ID-XXX
+      x-ms-deployment-name:
+      - gpt-5-nano
+      x-ms-rai-invoked:
+      - 'true'
+      x-ms-region:
+      - X-MS-REGION-XXX
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_crew.yaml
@@ -0,0 +1,201 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
+      You follow tool instructions precisely.\nYour personal goal is: Use both tools
+      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
+      a tool-calling compliance test. In your next assistant turn, emit exactly 3
+      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}], "stream": false, "tool_choice":
+      "auto", "tools": [{"function": {"name": "parallel_local_search_one", "description":
+      "Local search tool #1 for concurrency testing.", "parameters": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
+      "function"}, {"function": {"name": "parallel_local_search_two", "description":
+      "Local search tool #2 for concurrency testing.", "parameters": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
+      "function"}, {"function": {"name": "parallel_local_search_three", "description":
+      "Local search tool #3 for concurrency testing.", "parameters": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
+      "function"}]}'
+    headers:
+      Accept:
+      - application/json
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '1944'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      api-key:
+      - X-API-KEY-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      x-ms-client-request-id:
+      - X-MS-CLIENT-REQUEST-ID-XXX
+    method: POST
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
+  response:
+    body:
+      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"query\":
+        \"latest OpenAI model release notes\"}","name":"parallel_local_search_one"},"id":"call_NEvGoF86nhPQfXRoJd5SOyLd","type":"function"},{"function":{"arguments":"{\"query\":
+        \"latest Anthropic model release notes\"}","name":"parallel_local_search_two"},"id":"call_q8Q2du4gAMQLrGTgWgfwfbDZ","type":"function"},{"function":{"arguments":"{\"query\":
+        \"latest Gemini model release notes\"}","name":"parallel_local_search_three"},"id":"call_yTBal9ofZzuo10j0pWqhHCSj","type":"function"}]}}],"created":1771459563,"id":"chatcmpl-DAlqN7kyC5ACI5Yl1Pj63rOH5HIvI","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":2457,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":2368,"rejected_prediction_tokens":0},"prompt_tokens":378,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":2835}}
+
+        '
+    headers:
+      Content-Length:
+      - '1435'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 19 Feb 2026 00:06:17 GMT
+      Strict-Transport-Security:
+      - STS-XXX
+      apim-request-id:
+      - APIM-REQUEST-ID-XXX
+      azureml-model-session:
+      - AZUREML-MODEL-SESSION-XXX
+      x-accel-buffering:
+      - 'no'
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+      x-ms-client-request-id:
+      - X-MS-CLIENT-REQUEST-ID-XXX
+      x-ms-deployment-name:
+      - gpt-5-nano
+      x-ms-rai-invoked:
+      - 'true'
+      x-ms-region:
+      - X-MS-REGION-XXX
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
+      You follow tool instructions precisely.\nYour personal goal is: Use both tools
+      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
+      a tool-calling compliance test. In your next assistant turn, emit exactly 3
+      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}, {"role": "assistant", "content":
+      "", "tool_calls": [{"id": "call_NEvGoF86nhPQfXRoJd5SOyLd", "type": "function",
+      "function": {"name": "parallel_local_search_one", "arguments": "{\"query\":
+      \"latest OpenAI model release notes\"}"}}, {"id": "call_q8Q2du4gAMQLrGTgWgfwfbDZ",
+      "type": "function", "function": {"name": "parallel_local_search_two", "arguments":
+      "{\"query\": \"latest Anthropic model release notes\"}"}}, {"id": "call_yTBal9ofZzuo10j0pWqhHCSj",
+      "type": "function", "function": {"name": "parallel_local_search_three", "arguments":
+      "{\"query\": \"latest Gemini model release notes\"}"}}]}, {"role": "tool", "tool_call_id":
+      "call_NEvGoF86nhPQfXRoJd5SOyLd", "content": "[one] latest OpenAI model release
+      notes"}, {"role": "tool", "tool_call_id": "call_q8Q2du4gAMQLrGTgWgfwfbDZ", "content":
+      "[two] latest Anthropic model release notes"}, {"role": "tool", "tool_call_id":
+      "call_yTBal9ofZzuo10j0pWqhHCSj", "content": "[three] latest Gemini model release
+      notes"}, {"role": "user", "content": "Analyze the tool result. If requirements
+      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
+      the answer without meta-commentary."}], "stream": false, "tool_choice": "auto",
+      "tools": [{"function": {"name": "parallel_local_search_one", "description":
+      "Local search tool #1 for concurrency testing.", "parameters": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
+      "function"}, {"function": {"name": "parallel_local_search_two", "description":
+      "Local search tool #2 for concurrency testing.", "parameters": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
+      "function"}, {"function": {"name": "parallel_local_search_three", "description":
+      "Local search tool #3 for concurrency testing.", "parameters": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
+      "function"}]}'
+    headers:
+      Accept:
+      - application/json
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '3096'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      api-key:
+      - X-API-KEY-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      x-ms-client-request-id:
+      - X-MS-CLIENT-REQUEST-ID-XXX
+    method: POST
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
+  response:
+    body:
+      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
+        three tool results indicate the latest release notes are available for OpenAI
+        models, Anthropic models, and Gemini models.","refusal":null,"role":"assistant"}}],"created":1771459579,"id":"chatcmpl-DAlqdRtr8EefmFfazuh4jm7KvVxim","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":1826,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":1792,"rejected_prediction_tokens":0},"prompt_tokens":537,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":2363}}
+
+        '
+    headers:
+      Content-Length:
+      - '1333'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 19 Feb 2026 00:06:31 GMT
+      Strict-Transport-Security:
+      - STS-XXX
+      apim-request-id:
+      - APIM-REQUEST-ID-XXX
+      azureml-model-session:
+      - AZUREML-MODEL-SESSION-XXX
+      x-accel-buffering:
+      - 'no'
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+      x-ms-client-request-id:
+      - X-MS-CLIENT-REQUEST-ID-XXX
+      x-ms-deployment-name:
+      - gpt-5-nano
+      x-ms-rai-invoked:
+      - 'true'
+      x-ms-region:
+      - X-MS-REGION-XXX
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -0,0 +1,63 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
+      is a tool-calling compliance test. In your next assistant turn, emit exactly
+      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."}]}], "inferenceConfig": {"stopSequences": ["\nObservation:"]},
+      "system": [{"text": "You are Parallel Tool Agent. You follow tool instructions
+      precisely.\nYour personal goal is: Use both tools exactly as instructed"}],
+      "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
+      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
+      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
+      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
+      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
+      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}}},
+      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
+      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
+      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
+      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
+    headers:
+      Content-Length:
+      - '1773'
+      Content-Type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+      User-Agent:
+      - X-USER-AGENT-XXX
+      amz-sdk-invocation-id:
+      - AMZ-SDK-INVOCATION-ID-XXX
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+      authorization:
+      - AUTHORIZATION-XXX
+      x-amz-date:
+      - X-AMZ-DATE-XXX
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
+  response:
+    body:
+      string: '{"message":"The security token included in the request is invalid."}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '68'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 19 Feb 2026 00:00:08 GMT
+      x-amzn-ErrorType:
+      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
+      x-amzn-RequestId:
+      - X-AMZN-REQUESTID-XXX
+    status:
+      code: 403
+      message: Forbidden
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_crew.yaml
@@ -0,0 +1,226 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
+      is a tool-calling compliance test. In your next assistant turn, emit exactly
+      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}]}], "inferenceConfig": {"stopSequences":
+      ["\nObservation:"]}, "system": [{"text": "You are Parallel Tool Agent. You follow
+      tool instructions precisely.\nYour personal goal is: Use both tools exactly
+      as instructed"}], "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
+      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
+      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
+      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
+      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
+      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}}},
+      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
+      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
+      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
+      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
+    headers:
+      Content-Length:
+      - '1954'
+      Content-Type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+      User-Agent:
+      - X-USER-AGENT-XXX
+      amz-sdk-invocation-id:
+      - AMZ-SDK-INVOCATION-ID-XXX
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+      authorization:
+      - AUTHORIZATION-XXX
+      x-amz-date:
+      - X-AMZ-DATE-XXX
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
+  response:
+    body:
+      string: '{"message":"The security token included in the request is invalid."}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '68'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 19 Feb 2026 00:00:07 GMT
+      x-amzn-ErrorType:
+      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
+      x-amzn-RequestId:
+      - X-AMZN-REQUESTID-XXX
+    status:
+      code: 403
+      message: Forbidden
+- request:
+    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
+      is a tool-calling compliance test. In your next assistant turn, emit exactly
+      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}]}, {"role": "user", "content":
+      [{"text": "\nCurrent Task: This is a tool-calling compliance test. In your next
+      assistant turn, emit exactly 3 tool calls in the same response (parallel tool
+      calls), in this order: 1) parallel_local_search_one(query=''latest OpenAI model
+      release notes''), 2) parallel_local_search_two(query=''latest Anthropic model
+      release notes''), 3) parallel_local_search_three(query=''latest Gemini model
+      release notes''). Do not call any other tools and do not answer before those
+      3 tool calls are emitted. After the tool results return, provide a one paragraph
+      summary.\n\nThis is the expected criteria for your final answer: A one sentence
+      summary of both tool outputs\nyou MUST return the actual complete content as
+      the final answer, not a summary."}]}], "inferenceConfig": {"stopSequences":
+      ["\nObservation:"]}, "system": [{"text": "You are Parallel Tool Agent. You follow
+      tool instructions precisely.\nYour personal goal is: Use both tools exactly
+      as instructed\n\nYou are Parallel Tool Agent. You follow tool instructions precisely.\nYour
+      personal goal is: Use both tools exactly as instructed"}], "toolConfig": {"tools":
+      [{"toolSpec": {"name": "parallel_local_search_one", "description": "Local search
+      tool #1 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
+      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
+      ["query"], "type": "object", "additionalProperties": false}}}}, {"toolSpec":
+      {"name": "parallel_local_search_two", "description": "Local search tool #2 for
+      concurrency testing.", "inputSchema": {"json": {"properties": {"query": {"description":
+      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
+      "type": "object", "additionalProperties": false}}}}, {"toolSpec": {"name": "parallel_local_search_three",
+      "description": "Local search tool #3 for concurrency testing.", "inputSchema":
+      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
+      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
+      false}}}}]}}'
+    headers:
+      Content-Length:
+      - '2855'
+      Content-Type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+      User-Agent:
+      - X-USER-AGENT-XXX
+      amz-sdk-invocation-id:
+      - AMZ-SDK-INVOCATION-ID-XXX
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+      authorization:
+      - AUTHORIZATION-XXX
+      x-amz-date:
+      - X-AMZ-DATE-XXX
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
+  response:
+    body:
+      string: '{"message":"The security token included in the request is invalid."}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '68'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 19 Feb 2026 00:00:07 GMT
+      x-amzn-ErrorType:
+      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
+      x-amzn-RequestId:
+      - X-AMZN-REQUESTID-XXX
+    status:
+      code: 403
+      message: Forbidden
+- request:
+    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
+      is a tool-calling compliance test. In your next assistant turn, emit exactly
+      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}]}, {"role": "user", "content":
+      [{"text": "\nCurrent Task: This is a tool-calling compliance test. In your next
+      assistant turn, emit exactly 3 tool calls in the same response (parallel tool
+      calls), in this order: 1) parallel_local_search_one(query=''latest OpenAI model
+      release notes''), 2) parallel_local_search_two(query=''latest Anthropic model
+      release notes''), 3) parallel_local_search_three(query=''latest Gemini model
+      release notes''). Do not call any other tools and do not answer before those
+      3 tool calls are emitted. After the tool results return, provide a one paragraph
+      summary.\n\nThis is the expected criteria for your final answer: A one sentence
+      summary of both tool outputs\nyou MUST return the actual complete content as
+      the final answer, not a summary."}]}, {"role": "user", "content": [{"text":
+      "\nCurrent Task: This is a tool-calling compliance test. In your next assistant
+      turn, emit exactly 3 tool calls in the same response (parallel tool calls),
+      in this order: 1) parallel_local_search_one(query=''latest OpenAI model release
+      notes''), 2) parallel_local_search_two(query=''latest Anthropic model release
+      notes''), 3) parallel_local_search_three(query=''latest Gemini model release
+      notes''). Do not call any other tools and do not answer before those 3 tool
+      calls are emitted. After the tool results return, provide a one paragraph summary.\n\nThis
+      is the expected criteria for your final answer: A one sentence summary of both
+      tool outputs\nyou MUST return the actual complete content as the final answer,
+      not a summary."}]}], "inferenceConfig": {"stopSequences": ["\nObservation:"]},
+      "system": [{"text": "You are Parallel Tool Agent. You follow tool instructions
+      precisely.\nYour personal goal is: Use both tools exactly as instructed\n\nYou
+      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
+      goal is: Use both tools exactly as instructed\n\nYou are Parallel Tool Agent.
+      You follow tool instructions precisely.\nYour personal goal is: Use both tools
+      exactly as instructed"}], "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
+      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
+      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
+      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
+      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
+      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}}},
+      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
+      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
+      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
+      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
+    headers:
+      Content-Length:
+      - '3756'
+      Content-Type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+      User-Agent:
+      - X-USER-AGENT-XXX
+      amz-sdk-invocation-id:
+      - AMZ-SDK-INVOCATION-ID-XXX
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+      authorization:
+      - AUTHORIZATION-XXX
+      x-amz-date:
+      - X-AMZ-DATE-XXX
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
+  response:
+    body:
+      string: '{"message":"The security token included in the request is invalid."}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '68'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 19 Feb 2026 00:00:07 GMT
+      x-amzn-ErrorType:
+      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
+      x-amzn-RequestId:
+      - X-AMZN-REQUESTID-XXX
+    status:
+      code: 403
+      message: Forbidden
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_agent_with_native_tool_calling.yaml
@@ -3,14 +3,14 @@ interactions:
    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
-      "role": "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
-      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
-      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
-      "Perform mathematical calculations. Use this for any math operations.", "name":
-      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
-      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
-      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      not a summary."}], "role": "user"}], "systemInstruction": {"parts": [{"text":
+      "You are Math Assistant. You are a helpful math assistant.\nYour personal goal
+      is: Help users with mathematical calculations"}], "role": "user"}, "tools":
+      [{"functionDeclarations": [{"description": "Perform mathematical calculations.
+      Use this for any math operations.", "name": "calculator", "parameters_json_schema":
+      {"properties": {"expression": {"description": "Mathematical expression to evaluate",
+      "title": "Expression", "type": "string"}}, "required": ["expression"], "type":
+      "object", "additionalProperties": false}}]}], "generationConfig": {"stopSequences":
      ["\nObservation:"]}}'
    headers:
      User-Agent:
@@ -22,7 +22,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '907'
+      - '892'
      content-type:
      - application/json
      host:
@@ -32,31 +32,31 @@ interactions:
      x-goog-api-key:
      - X-GOOG-API-KEY-XXX
    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
  response:
    body:
      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
-        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
-        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.00062879999833447594\n
-        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 103,\n    \"candidatesTokenCount\":
-        7,\n    \"totalTokenCount\": 110,\n    \"promptTokensDetails\": [\n      {\n
-        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 103\n      }\n    ],\n
-        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
-        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
-        \ \"responseId\": \"PpByabfUHsih_uMPlu2ysAM\"\n}\n"
+        \           },\n            \"thoughtSignature\": \"Cp8DAb4+9vu74rJ0QQNTa6oMMh3QAlvx3cS4TL0I1od7EdQZtMBbsr5viQiTUR/LKj8nwPvtLjZxib5SXqmV0t2B2ZMdq1nqD62vLPD3i7tmUeRoysODfxomRGRhy/CPysMhobt5HWF1W/n6tNiQz3V36f0/dRx5yJeyN4tJL/RZePv77FUqywOfFlYOkOIyAkrE5LT6FicOjhHm/B9bGV/y7TNmN6TtwQDxoE9nU92Q/UNZ7rNyZE7aSR7KPJZuRXrrBBh+akt5dX5n6N9kGWkyRpWVgUox01+b22RSj4S/QY45IvadtmmkFk8DMVAtAnEiK0WazltC+TOdUJHwVgBD494fngoVcHU+R1yIJrVe7h6Ce3Ts5IYLrRCedDU3wW1ghn/hXx1nvTqQumpsGTGtE2v3KjF/7DmQA96WzB1X7+QUOF2J3pK9HemiKxAQl4U9fP2eNN8shvy2YykBlahWDujEwye7ji4wIWtNHbf0t+uFwGTQ3QruAKXvWB04ExjHM2I/8O9U5tOsH0cwPqnpFR2EaTqaPXXUllZ2K+DaaA==\"\n
+        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
+        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
+        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        115,\n    \"candidatesTokenCount\": 17,\n    \"totalTokenCount\": 227,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 115\n
+        \     }\n    ],\n    \"thoughtsTokenCount\": 95\n  },\n  \"modelVersion\":
+        \"gemini-2.5-flash\",\n  \"responseId\": \"Y1KWadvNMKz1jMcPiJeJmAI\"\n}\n"
    headers:
      Alt-Svc:
      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
      Content-Type:
      - application/json; charset=UTF-8
      Date:
-      - Thu, 22 Jan 2026 21:01:50 GMT
+      - Wed, 18 Feb 2026 23:59:32 GMT
      Server:
      - scaffolding on HTTPServer2
      Server-Timing:
-      - gfet4t7; dur=521
+      - gfet4t7; dur=956
      Transfer-Encoding:
      - chunked
      Vary:
@@ -76,18 +76,19 @@ interactions:
    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
-      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
-      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
-      the tool result. If requirements are met, provide the Final Answer. Otherwise,
-      call the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
-      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
-      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
-      "Perform mathematical calculations. Use this for any math operations.", "name":
-      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
-      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
-      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      not a summary."}], "role": "user"}, {"parts": [{"functionCall": {"args": {"expression":
+      "15 * 8"}, "name": "calculator"}}], "role": "model"}, {"parts": [{"functionResponse":
+      {"name": "calculator", "response": {"result": "The result of 15 * 8 is 120"}}}],
+      "role": "user"}, {"parts": [{"text": "Analyze the tool result. If requirements
+      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
+      the answer without meta-commentary."}], "role": "user"}], "systemInstruction":
+      {"parts": [{"text": "You are Math Assistant. You are a helpful math assistant.\nYour
+      personal goal is: Help users with mathematical calculations"}], "role": "user"},
+      "tools": [{"functionDeclarations": [{"description": "Perform mathematical calculations.
+      Use this for any math operations.", "name": "calculator", "parameters_json_schema":
+      {"properties": {"expression": {"description": "Mathematical expression to evaluate",
+      "title": "Expression", "type": "string"}}, "required": ["expression"], "type":
+      "object", "additionalProperties": false}}]}], "generationConfig": {"stopSequences":
      ["\nObservation:"]}}'
    headers:
      User-Agent:
@@ -99,7 +100,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1219'
+      - '1326'
      content-type:
      - application/json
      host:
@@ -109,378 +110,28 @@ interactions:
      x-goog-api-key:
      - X-GOOG-API-KEY-XXX
    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
  response:
    body:
      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
-        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
-        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
-        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.013549212898526872\n
-        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 149,\n    \"candidatesTokenCount\":
-        7,\n    \"totalTokenCount\": 156,\n    \"promptTokensDetails\": [\n      {\n
-        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 149\n      }\n    ],\n
-        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
-        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
-        \ \"responseId\": \"P5Byadc8kJT-4w_p99XQAQ\"\n}\n"
+        [\n          {\n            \"text\": \"The result of 15 * 8 is 120\"\n          }\n
+        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
+        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        191,\n    \"candidatesTokenCount\": 14,\n    \"totalTokenCount\": 205,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 191\n
+        \     }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.5-flash\",\n  \"responseId\":
+        \"ZFKWaf2BMM6MjMcP6P--kQM\"\n}\n"
    headers:
      Alt-Svc:
      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
      Content-Type:
      - application/json; charset=UTF-8
      Date:
-      - Thu, 22 Jan 2026 21:01:51 GMT
+      - Wed, 18 Feb 2026 23:59:33 GMT
      Server:
      - scaffolding on HTTPServer2
      Server-Timing:
-      - gfet4t7; dur=444
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
-      * 8\n\nThis is the expected criteria for your final answer: The result of the
-      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
-      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
-      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
-      the tool result. If requirements are met, provide the Final Answer. Otherwise,
-      call the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
-      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
-      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
-      "Perform mathematical calculations. Use this for any math operations.", "name":
-      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
-      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
-      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
-      ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1531'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
-        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
-        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
-        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.0409286447933742\n
-        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 195,\n    \"candidatesTokenCount\":
-        7,\n    \"totalTokenCount\": 202,\n    \"promptTokensDetails\": [\n      {\n
-        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 195\n      }\n    ],\n
-        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
-        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
-        \ \"responseId\": \"P5Byadn5HOK6_uMPnvmXwAk\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Thu, 22 Jan 2026 21:01:51 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=503
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
-      * 8\n\nThis is the expected criteria for your final answer: The result of the
-      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
-      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
-      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
-      the tool result. If requirements are met, provide the Final Answer. Otherwise,
-      call the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
-      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
-      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
-      "Perform mathematical calculations. Use this for any math operations.", "name":
-      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
-      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
-      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
-      ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1843'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
-        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
-        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
-        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.018002046006066457\n
-        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 241,\n    \"candidatesTokenCount\":
-        7,\n    \"totalTokenCount\": 248,\n    \"promptTokensDetails\": [\n      {\n
-        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 241\n      }\n    ],\n
-        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
-        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
-        \ \"responseId\": \"P5Byafi2PKbn_uMPtIbfuQI\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Thu, 22 Jan 2026 21:01:52 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=482
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
-      * 8\n\nThis is the expected criteria for your final answer: The result of the
-      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
-      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
-      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
-      the tool result. If requirements are met, provide the Final Answer. Otherwise,
-      call the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
-      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
-      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
-      "Perform mathematical calculations. Use this for any math operations.", "name":
-      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
-      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
-      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
-      ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '2155'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
-        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
-        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
-        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.10329001290457589\n
-        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 287,\n    \"candidatesTokenCount\":
-        7,\n    \"totalTokenCount\": 294,\n    \"promptTokensDetails\": [\n      {\n
-        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 287\n      }\n    ],\n
-        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
-        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
-        \ \"responseId\": \"QJByaamVIP_g_uMPt6mI0Qg\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Thu, 22 Jan 2026 21:01:52 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=534
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
-      * 8\n\nThis is the expected criteria for your final answer: The result of the
-      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
-      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
-      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
-      the tool result. If requirements are met, provide the Final Answer. Otherwise,
-      call the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
-      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}], "role":
-      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
-      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
-      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
-      "Perform mathematical calculations. Use this for any math operations.", "name":
-      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
-      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
-      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
-      ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '2467'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"120\\n\"\n          }\n        ],\n
-        \       \"role\": \"model\"\n      },\n      \"finishReason\": \"STOP\",\n
-        \     \"avgLogprobs\": -0.0097615998238325119\n    }\n  ],\n  \"usageMetadata\":
-        {\n    \"promptTokenCount\": 333,\n    \"candidatesTokenCount\": 4,\n    \"totalTokenCount\":
-        337,\n    \"promptTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
-        \       \"tokenCount\": 333\n      }\n    ],\n    \"candidatesTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 4\n      }\n
-        \   ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n  \"responseId\":
-        \"QZByaZHABO-i_uMP58aYqAk\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Thu, 22 Jan 2026 21:01:53 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=412
+      - gfet4t7; dur=421
      Transfer-Encoding:
      - chunked
      Vary:
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -0,0 +1,188 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
+      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
+      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."}], "role": "user"}], "systemInstruction": {"parts": [{"text":
+      "You are Parallel Tool Agent. You follow tool instructions precisely.\nYour
+      personal goal is: Use both tools exactly as instructed"}], "role": "user"},
+      "tools": [{"functionDeclarations": [{"description": "Local search tool #1 for
+      concurrency testing.", "name": "parallel_local_search_one", "parameters_json_schema":
+      {"properties": {"query": {"description": "Search query", "title": "Query", "type":
+      "string"}}, "required": ["query"], "type": "object", "additionalProperties":
+      false}}, {"description": "Local search tool #2 for concurrency testing.", "name":
+      "parallel_local_search_two", "parameters_json_schema": {"properties": {"query":
+      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
+      ["query"], "type": "object", "additionalProperties": false}}, {"description":
+      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
+      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1783'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"parallel_local_search_one\",\n
+        \             \"args\": {\n                \"query\": \"latest OpenAI model
+        release notes\"\n              }\n            },\n            \"thoughtSignature\":
+        \"CrICAb4+9vtrrkiSatPyOs7fssb9akcgCIiQdJKp/k+hcEZVNFvU/H0e4FFmLIhTCPRyHxmU+AQPtBZ5vg6y9ZCcv11RdcWgYW8rPQzCnC+YTUxPAfDzaObky1QsL5pl9+yglQqVoVM31ZcnoiH02z85pwAv6TSJxdJZEekW6XwcIrCoHNCgY3ghHFEd3y3wLJ5JWL7wmiRNTC9TCT8aJHXKFohYrb+4JMULCx8BqKVxOucZPiDHA8GsoqSlzkYEe2xCh9oSdaZpCFrxhZ9bwoVDbVmPrjaq2hj5BoJ5hNxscHJ/E0EOl4ogeKZW+hIVfdzpjAFZW9Oejkb9G4ZSLbxXsoO7x8bi4LHFRABniGrWvNuOOH0Udh4t57oXHXZO4u5NNTood/GkJGcP+aHqUAH1fwqL\"\n
+        \         },\n          {\n            \"functionCall\": {\n              \"name\":
+        \"parallel_local_search_two\",\n              \"args\": {\n                \"query\":
+        \"latest Anthropic model release notes\"\n              }\n            }\n
+        \         },\n          {\n            \"functionCall\": {\n              \"name\":
+        \"parallel_local_search_three\",\n              \"args\": {\n                \"query\":
+        \"latest Gemini model release notes\"\n              }\n            }\n          }\n
+        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
+        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
+        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        291,\n    \"candidatesTokenCount\": 70,\n    \"totalTokenCount\": 428,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 291\n
+        \     }\n    ],\n    \"thoughtsTokenCount\": 67\n  },\n  \"modelVersion\":
+        \"gemini-2.5-flash\",\n  \"responseId\": \"alKWacytCLi5jMcPhISaoAI\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Wed, 18 Feb 2026 23:59:39 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=999
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
+      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
+      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."}], "role": "user"}, {"parts": [{"functionCall": {"args":
+      {"query": "latest OpenAI model release notes"}, "name": "parallel_local_search_one"},
+      "thoughtSignature": "CrICAb4-9vtrrkiSatPyOs7fssb9akcgCIiQdJKp_k-hcEZVNFvU_H0e4FFmLIhTCPRyHxmU-AQPtBZ5vg6y9ZCcv11RdcWgYW8rPQzCnC-YTUxPAfDzaObky1QsL5pl9-yglQqVoVM31ZcnoiH02z85pwAv6TSJxdJZEekW6XwcIrCoHNCgY3ghHFEd3y3wLJ5JWL7wmiRNTC9TCT8aJHXKFohYrb-4JMULCx8BqKVxOucZPiDHA8GsoqSlzkYEe2xCh9oSdaZpCFrxhZ9bwoVDbVmPrjaq2hj5BoJ5hNxscHJ_E0EOl4ogeKZW-hIVfdzpjAFZW9Oejkb9G4ZSLbxXsoO7x8bi4LHFRABniGrWvNuOOH0Udh4t57oXHXZO4u5NNTood_GkJGcP-aHqUAH1fwqL"},
+      {"functionCall": {"args": {"query": "latest Anthropic model release notes"},
+      "name": "parallel_local_search_two"}}, {"functionCall": {"args": {"query": "latest
+      Gemini model release notes"}, "name": "parallel_local_search_three"}}], "role":
+      "model"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_one",
+      "response": {"result": "[one] latest OpenAI model release notes"}}}], "role":
+      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_two",
+      "response": {"result": "[two] latest Anthropic model release notes"}}}], "role":
+      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_three",
+      "response": {"result": "[three] latest Gemini model release notes"}}}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Parallel Tool Agent.
+      You follow tool instructions precisely.\nYour personal goal is: Use both tools
+      exactly as instructed"}], "role": "user"}, "tools": [{"functionDeclarations":
+      [{"description": "Local search tool #1 for concurrency testing.", "name": "parallel_local_search_one",
+      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}, {"description": "Local search tool #2 for concurrency
+      testing.", "name": "parallel_local_search_two", "parameters_json_schema": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}, {"description":
+      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
+      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '3071'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"text\": \"Here is a summary of the latest model
+        release notes: I have retrieved information regarding the latest OpenAI model
+        release notes, the latest Anthropic model release notes, and the latest Gemini
+        model release notes. The specific details of these release notes are available
+        through the respective tool outputs.\",\n            \"thoughtSignature\":
+        \"CsoBAb4+9vtPvWFM08lR1S4QrLN+Z1+Zpf04Y/bC8tjOpnxz3EEvHyRNEwkslUX5pftBi8J78Xk4/FUER0xjJZc8clUObTvayxLNup4h1JwJ5ZdatulInNGTEieFnF4w8KjSFB/vqNCZvXWZbiLkpzqAnsoAIf0x4VmMN11V0Ozo+3f2QftD+iBrfu3g21UI5tbG0Z+0QHxjRVKXrQOp7dmoZPzaxI0zalfDEI+A2jGpVl/VvauVNv0jQn0yItcA5tkVeWLq6717CjNoig==\"\n
+        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
+        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        435,\n    \"candidatesTokenCount\": 54,\n    \"totalTokenCount\": 524,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 435\n
+        \     }\n    ],\n    \"thoughtsTokenCount\": 35\n  },\n  \"modelVersion\":
+        \"gemini-2.5-flash\",\n  \"responseId\": \"bFKWaZOZCqCvjMcPvvGNgAc\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Wed, 18 Feb 2026 23:59:41 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=967
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_crew.yaml
@@ -0,0 +1,192 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
+      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
+      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}], "role": "user"}], "systemInstruction":
+      {"parts": [{"text": "You are Parallel Tool Agent. You follow tool instructions
+      precisely.\nYour personal goal is: Use both tools exactly as instructed"}],
+      "role": "user"}, "tools": [{"functionDeclarations": [{"description": "Local
+      search tool #1 for concurrency testing.", "name": "parallel_local_search_one",
+      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}, {"description": "Local search tool #2 for concurrency
+      testing.", "name": "parallel_local_search_two", "parameters_json_schema": {"properties":
+      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
+      "required": ["query"], "type": "object", "additionalProperties": false}}, {"description":
+      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
+      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1964'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"parallel_local_search_one\",\n
+        \             \"args\": {\n                \"query\": \"latest OpenAI model
+        release notes\"\n              }\n            },\n            \"thoughtSignature\":
+        \"CuMEAb4+9vu1V1iOC9o/a8+jQqow8F4RTrjlnjnDCwsisMHLLJ+Wj3pZxbFDeIjCJe9pa6+14InyYHh/ezgHrv+xPGIJtX9pJQatDCBAfCmcZ3fDipVIMAHLcl0Q660EVuZ+vRgvNhPSau+uSN9u303wJsaKvdzOQnfww2LfLtJMNtOhSHfkfhfw2bkBOtMa5/FuLqKSr6m94dSdE7HShR6+jLMLbiSXkBLWsRp0jGl85Wvd0hoA7dUyq+uIuyOBr5Myo9uMrLbxfnrRRbPMorOpYTCmHK0HE8mEBRjzh1hNwcBcfRL0VcgA2UnBIurStIeVbq51BJQ1UOq6r1wVi50Wdh1GjIQ/iN9C15T1Ql3adjom5QbmY+XY08RJOiNyVplh1YQ0qlWCVHEpueEfdzcIB+BUauVrLNqBcBr5g6ekO5QZCAdt7PLerQU8jhKjDQy367jCKQyaHir0GmAISS8RlZ8tkLKNZlZhd11D76ui6X8ep9yznViBbqH0AS1R2hMm+ielMVFjhidglTMjqB0X+yk1K2eZXkc+R/xsXRPlnlZWRygnV+IbU8RAnZWtneM464Wccmc1scfF45GKiji5bLYO7Zx+ZF8mSLcQaC8M3z121D6VbFonhaIdkJ3Wb7nI2vEyxFjdinVk3/P0zL8nu3nHeqQviTrQIoHMsZk0yPyqu9NWxg3wGJL5pbcaQh87ROQuTsInkuzzEr0QMzjw9W5iquhMh4/Wy/OKXAgf3maQB9Jb4HoHZlc0io+KYqewFSVx2BvqXbqJbIrTkTo6XRTbK7dkwlCbMmE1wKIwjrrzZQI=\"\n
+        \         },\n          {\n            \"functionCall\": {\n              \"name\":
+        \"parallel_local_search_two\",\n              \"args\": {\n                \"query\":
+        \"latest Anthropic model release notes\"\n              }\n            }\n
+        \         },\n          {\n            \"functionCall\": {\n              \"name\":
+        \"parallel_local_search_three\",\n              \"args\": {\n                \"query\":
+        \"latest Gemini model release notes\"\n              }\n            }\n          }\n
+        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
+        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
+        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        327,\n    \"candidatesTokenCount\": 70,\n    \"totalTokenCount\": 536,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 327\n
+        \     }\n    ],\n    \"thoughtsTokenCount\": 139\n  },\n  \"modelVersion\":
+        \"gemini-2.5-flash\",\n  \"responseId\": \"ZVKWabziF7bcjMcP3r2SuAg\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Wed, 18 Feb 2026 23:59:34 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=1262
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
+      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
+      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}], "role": "user"}, {"parts": [{"functionCall":
+      {"args": {"query": "latest OpenAI model release notes"}, "name": "parallel_local_search_one"}},
+      {"functionCall": {"args": {"query": "latest Anthropic model release notes"},
+      "name": "parallel_local_search_two"}}, {"functionCall": {"args": {"query": "latest
+      Gemini model release notes"}, "name": "parallel_local_search_three"}}], "role":
+      "model"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_one",
+      "response": {"result": "[one] latest OpenAI model release notes"}}}], "role":
+      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_two",
+      "response": {"result": "[two] latest Anthropic model release notes"}}}], "role":
+      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_three",
+      "response": {"result": "[three] latest Gemini model release notes"}}}], "role":
+      "user"}, {"parts": [{"text": "Analyze the tool result. If requirements are met,
+      provide the Final Answer. Otherwise, call the next tool. Deliver only the answer
+      without meta-commentary."}], "role": "user"}], "systemInstruction": {"parts":
+      [{"text": "You are Parallel Tool Agent. You follow tool instructions precisely.\nYour
+      personal goal is: Use both tools exactly as instructed"}], "role": "user"},
+      "tools": [{"functionDeclarations": [{"description": "Local search tool #1 for
+      concurrency testing.", "name": "parallel_local_search_one", "parameters_json_schema":
+      {"properties": {"query": {"description": "Search query", "title": "Query", "type":
+      "string"}}, "required": ["query"], "type": "object", "additionalProperties":
+      false}}, {"description": "Local search tool #2 for concurrency testing.", "name":
+      "parallel_local_search_two", "parameters_json_schema": {"properties": {"query":
+      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
+      ["query"], "type": "object", "additionalProperties": false}}, {"description":
+      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
+      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
+      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
+      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '3014'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"text\": \"The search results indicate the latest
+        model release notes for OpenAI, Anthropic, and Gemini are: [one] latest OpenAI
+        model release notes[two] latest Anthropic model release notes[three] latest
+        Gemini model release notes.\",\n            \"thoughtSignature\": \"CsUPAb4+9vs4hkuatQAakl1FSHx5DIde9nHYobJdlWs2HEzES9gHn7uwjMIlFPTzJUbnZqxpAK93hqsCofdfGANr8dwK+/IbZAiMSikpAq2ZjEbWADjfalU3ke4LcQMh6TEYFVGz1QCinjne3jZx5jOVaL8YdAtjOYnBZWA6KqdvfKjD7+Ct/BLoEqvu4LW6kxhXQgcV+D3M1QxGlr1dxpajj4wyYFI9LXchE2vCdAMPYTkPQ4WPbS3xjz0jJb6qFAwwg+BY5kGemkWWVHsvq28t09pd7FEH0bod5cEpR65qEefpJfhHsXYqmOwHDkfNePYnYC+5qmn7kvkN+fhF41SoMRZahMZGDjIo+q6vvru3eXKmZiuLsrh8AqQIks/4S3sSuxt16ogYKE+LlFxml2ygXFPww59nRAtc+xK6VW8jB2vyv9Eo5cpnG9ZBv1dOznJnmj4AWA1ddMlp+yq8AdaboTSo5dysYMwFcSXS3kuU+xi92dC+7GqZZbDr5frvnc+MnSuzYwHhNjSQqvTo5DKGit53zDwlFJT74kLBXk36BOFQp4xlfs+BpKkw11bow6qQoTvC68D023ZHami+McO1WYBDoO5CrDoosU8fAYljqaGArBoMlssF4O7VKHEaEbEZnYCr0Wxo6XP/mtPIpHQE4OyCz/GAJSJtQv1hO7DNCMzpSpkLyuemB1SOZGl3mlLQhosh3TAGP0xgqmHpKccdCSWoXGWjO48VluFuV9E1FwW1Xi++XhMRcUaljJXPZaNVjGcAG1uAxeVkUMsY8tBvQ0vaumUK2jkzbyQTWeStEWwl1yKmklI8JDXske/k6tYJOyF+8t0mF7oCEqNHSNicj7TomihpPlVjNl1Mm4l5fvwlKtAPJwiKrchCunlZB3uGN1AR0h0Hvznffutc/lV/FWFbNgFAaNJZKRs40vMk1xmRZyH2rs+Ob2fZriQ3BSwzzNeiwDLXxm0m/ytOai+K9ObFuC/IEh5fJfvQbNeo3TmiCAMCZPNXMDtlOyLqQzzKwmMFH4c53Ol+kkTiuAKECNQR1dOCufAL0U5lzEUFRxFvOq67lp6xqG8m+WzCIkbnF8QyJHfujtXVMJACaevUkM7+kAVyTwETEKQsanp0tBwzV42ieChp/h7pivcC++cFXdSG5dvR94BgkHmtpC9+jfNH32RREPLuyWfU5aBXiOkxjRs9fDexAFjrkGjM18I+jqHZNeuUR20BKe2jFsU8xJS3Fa4eXabm/YPL1t8R5jr572Ch/r4bspFp8MQ5RcFo8Nn/HiBmW8uZ2BcLEY1RPWUBvxVhfvh/hNxaRKu21x8vGz72RoiNuOjNbeADYAaBJqBGLp0MALxZ/rnXPzDLQUt6Mv07fWHAZr5p3r/skleot25lr2Tcl4qJCPM4/cfs6U0x4CY26ktBiCs4bWKqSEV1Q05nf5kpxVOIRSTgxqFOj/rWIAF3uw7mvsuRKd3YXILV5OrvEoETdQvf7BdYPbQbIQYDf7DBKhf51O8RKQgcfl6mVQswamdJ+PyqLbozTkFCjXMKI0PwJdy8tfKfCeeEe0TbOXSfeTczKQkL8WyWkBg4tS81JnWAVzfVlNjbvo/fk+wv7FyfJJS1HJGlxZ0kUlWi1369rSlldYPoSqopuekOxtYnpYpz92y/jVLNQXE1IVLqWYh9o3gTwjeyaHG7fCaWF2QRGrCUvejT8eJjevhj/sgadjPVcEP5o7Zcw5yTBCgc0+FX1j5KpCmfZ/dVvT4iIX8bOkhxjHQ8ifOx39BMM4EObgCA+g+BFN+Ra7kOf4hJ6tPNhqvJa4E4fyISlVrRiBqSt59ZkuLyWuY9SYy0nvbklP30WDUHSAvcuEwVMSuT524afHISfO/+tSgE7JAKzEPSOoVO3Z5NS9kcAqHuBSe/LL4XJbCKF9Oggm9/gwdAulnBANd4ydQ/raTPE/QUu/CGqqGhBd+wo8x0Jg/BMZWkwhz0fEzsh+OjnrEkHv4QIqZ9v/j1Rv9uc+cDeK7eGi62okGLrPFX2pNQtsZRdUM9aBSlTBUVSdCDpkvieENzLnR257EDZy1EV2HxGRfOFZVVdaW1n8XvL73pcFoQ5XABpfYuigOS8i4S8g43Qfe77GosnuXR5rcJCrL03q3hptb97K5ysKFLgumsaaWo92MBhZYKvQ6SwStgyWRlb22uQGQJYsS8OTD/uVNiQzFjOMsR/l71c9RI1Eb7SQJT6WWvL1YhA7sQw/lQf8soLKfWshoky6mMrGopjRak8xHpJe5VWbqK8PK6iXDd403JrHICyh4M3FpEja3eX2V3SN6U+EgIWKIE8lE/iQZakhLtG2KL7nNQy/cksxzIh5ElQCe5NkrQZO0fai6ek8qwbmz07RVg2FknD7F2hvmxZBqoJSXhsFVn/9+fnkcsZekEtUevFmlQQNspPc63XgO0XmpTye9uM/BbTEsNEWeHSFZTEQLLx1l+pgwsYO3NlNSIUN24/GIR7JrZFG4fAoljkDKjhrYQzr1Fiy3t5G+CmadZ0TcjRQQdDw36ETlf7cizcrQc4FNtnx5rNWEaf54vUvlsd2DD19UIkzP9omITsiuNPPcUNq0A6v1TkgnSNYfhb26nxJIg34r8MmCAhWzB2eCy54gvOHDGLFAwfFZrQdvl\"\n
+        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
+        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        504,\n    \"candidatesTokenCount\": 45,\n    \"totalTokenCount\": 973,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 504\n
+        \     }\n    ],\n    \"thoughtsTokenCount\": 424\n  },\n  \"modelVersion\":
+        \"gemini-2.5-flash\",\n  \"responseId\": \"Z1KWaYbTKZvnjMcP7piEoAg\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Wed, 18 Feb 2026 23:59:37 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=2283
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_agent_with_native_tool_calling.yaml
@@ -5,9 +5,9 @@ interactions:
      calculations"},{"role":"user","content":"\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
-      mathematical calculations. Use this for any math operations.","parameters":{"properties":{"expression":{"description":"Mathematical
-      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object"}}}]}'
+      not a summary."}],"model":"gpt-5-nano","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
+      mathematical calculations. Use this for any math operations.","strict":true,"parameters":{"properties":{"expression":{"description":"Mathematical
+      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object","additionalProperties":false}}}]}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -20,7 +20,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '829'
+      - '813'
      content-type:
      - application/json
      host:
@@ -47,140 +47,17 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D0vm7joOuDBPcMpfmOnftOoTCPtc8\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1769114459,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_G73UZDvL4wC9EEdvm1UcRIRM\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"calculator\",\n
-        \             \"arguments\": \"{\\\"expression\\\":\\\"15 * 8\\\"}\"\n            }\n
-        \         }\n        ],\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 137,\n    \"completion_tokens\":
-        17,\n    \"total_tokens\": 154,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_c4585b5b9c\"\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 22 Jan 2026 20:40:59 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - SET-COOKIE-XXX
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '761'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-envoy-upstream-service-time:
-      - '1080'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Math Assistant. You are
-      a helpful math assistant.\nYour personal goal is: Help users with mathematical
-      calculations"},{"role":"user","content":"\nCurrent Task: Calculate what is 15
-      * 8\n\nThis is the expected criteria for your final answer: The result of the
-      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nThis is VERY important to you, your job depends on it!"},{"role":"assistant","content":null,"tool_calls":[{"id":"call_G73UZDvL4wC9EEdvm1UcRIRM","type":"function","function":{"name":"calculator","arguments":"{\"expression\":\"15
-      * 8\"}"}}]},{"role":"tool","tool_call_id":"call_G73UZDvL4wC9EEdvm1UcRIRM","content":"The
-      result of 15 * 8 is 120"},{"role":"user","content":"Analyze the tool result.
-      If requirements are met, provide the Final Answer. Otherwise, call the next
-      tool. Deliver only the answer without meta-commentary."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
-      mathematical calculations. Use this for any math operations.","parameters":{"properties":{"expression":{"description":"Mathematical
-      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object"}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1299'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-D0vm8mUnzLxu9pf1rc7MODkrMsCmf\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1769114460,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-DAlG9W2mJYuOgpf3FwCRgbqaiHWf3\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1771457317,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
        \"assistant\",\n        \"content\": \"120\",\n        \"refusal\": null,\n
-        \       \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
-        \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 207,\n    \"completion_tokens\":
-        2,\n    \"total_tokens\": 209,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \       \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 208,\n    \"completion_tokens\":
+        138,\n    \"total_tokens\": 346,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        {\n      \"reasoning_tokens\": 128,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_c4585b5b9c\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": null\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -189,7 +66,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Thu, 22 Jan 2026 20:41:00 GMT
+      - Wed, 18 Feb 2026 23:28:39 GMT
      Server:
      - cloudflare
      Strict-Transport-Security:
@@ -207,13 +84,13 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '262'
+      - '1869'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
      - '2020-10-01'
-      x-envoy-upstream-service-time:
-      - '496'
+      set-cookie:
+      - SET-COOKIE-XXX
      x-openai-proxy-wasm:
      - v0.1
      x-ratelimit-limit-requests:
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -0,0 +1,265 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
+      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
+      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
+      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
+      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
+      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
+      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
+      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1733'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DAldZHfQGVcV3FNwAJAtNooU3PAU7\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1771458769,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_kz1qLLRsugXwWiQMeH9oFAep\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
+        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
+        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
+        \"call_yNouGq1Kv6P5W9fhTng6acZi\",\n            \"type\": \"function\",\n
+        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
+        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
+        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
+        \"call_O7MqnuniDmyT6a0BS31GTunB\",\n            \"type\": \"function\",\n
+        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
+        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
+        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"tool_calls\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        259,\n    \"completion_tokens\": 78,\n    \"total_tokens\": 337,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_414ba99a04\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 18 Feb 2026 23:52:50 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1418'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - SET-COOKIE-XXX
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
+      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
+      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
+      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
+      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_kz1qLLRsugXwWiQMeH9oFAep","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
+      \"latest OpenAI model release notes\"}"}},{"id":"call_yNouGq1Kv6P5W9fhTng6acZi","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
+      \"latest Anthropic model release notes\"}"}},{"id":"call_O7MqnuniDmyT6a0BS31GTunB","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
+      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_kz1qLLRsugXwWiQMeH9oFAep","name":"parallel_local_search_one","content":"[one]
+      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_yNouGq1Kv6P5W9fhTng6acZi","name":"parallel_local_search_two","content":"[two]
+      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_O7MqnuniDmyT6a0BS31GTunB","name":"parallel_local_search_three","content":"[three]
+      latest Gemini model release notes"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
+      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
+      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
+      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '2756'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DAldbawkFNpOeXbaJTkTlsSi7OiII\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1771458771,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The latest release notes for OpenAI,
+        Anthropic, and Gemini models highlight significant updates and improvements
+        in each respective technology. OpenAI's notes detail new features and optimizations
+        that enhance user interaction and performance. Anthropic's release emphasizes
+        their focus on safety and alignment in AI development, showcasing advancements
+        in responsible AI practices. Gemini's notes underline their innovative approaches
+        and cutting-edge functionalities designed to push the boundaries of current
+        AI capabilities.\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 377,\n    \"completion_tokens\":
+        85,\n    \"total_tokens\": 462,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_414ba99a04\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 18 Feb 2026 23:52:53 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1755'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_crew.yaml
@@ -0,0 +1,265 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
+      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
+      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
+      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
+      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
+      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
+      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
+      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1929'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DAlddfEozIpgleBufPaffZMQWK0Hj\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1771458773,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_Putc2jV5GhiIZMwx8mDcI61Q\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
+        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
+        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
+        \"call_iyjwcvkL3PdoOddxsqkHCT9T\",\n            \"type\": \"function\",\n
+        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
+        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
+        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
+        \"call_G728RseEU7SbGk5YTiyyp9IH\",\n            \"type\": \"function\",\n
+        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
+        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
+        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 378,\n    \"completion_tokens\":
+        1497,\n    \"total_tokens\": 1875,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 1408,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": null\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 18 Feb 2026 23:53:08 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '14853'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - SET-COOKIE-XXX
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
+      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
+      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
+      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
+      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
+      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
+      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
+      model release notes''). Do not call any other tools and do not answer before
+      those 3 tool calls are emitted. After the tool results return, provide a one
+      paragraph summary.\n\nThis is the expected criteria for your final answer: A
+      one sentence summary of both tool outputs\nyou MUST return the actual complete
+      content as the final answer, not a summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_Putc2jV5GhiIZMwx8mDcI61Q","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
+      \"latest OpenAI model release notes\"}"}},{"id":"call_iyjwcvkL3PdoOddxsqkHCT9T","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
+      \"latest Anthropic model release notes\"}"}},{"id":"call_G728RseEU7SbGk5YTiyyp9IH","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
+      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_Putc2jV5GhiIZMwx8mDcI61Q","name":"parallel_local_search_one","content":"[one]
+      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_iyjwcvkL3PdoOddxsqkHCT9T","name":"parallel_local_search_two","content":"[two]
+      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_G728RseEU7SbGk5YTiyyp9IH","name":"parallel_local_search_three","content":"[three]
+      latest Gemini model release notes"},{"role":"user","content":"Analyze the tool
+      result. If requirements are met, provide the Final Answer. Otherwise, call the
+      next tool. Deliver only the answer without meta-commentary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
+      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
+      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
+      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
+      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '3136'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DAldt2BXNqiYYLPgInjHCpYKfk2VK\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1771458789,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The results show the latest model release
+        notes for OpenAI, Anthropic, and Gemini.\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 537,\n    \"completion_tokens\":
+        2011,\n    \"total_tokens\": 2548,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 1984,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": null\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 18 Feb 2026 23:53:25 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '15368'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/Show More
+++ b/Show More