fix: widen OpenTelemetry version constraints to >=1.34.0,<2 (fixes #4474 )

The opentelemetry-api, opentelemetry-sdk, and opentelemetry-exporter-otlp-proto-http dependencies were pinned to ~=1.34.0 (>=1.34.0,<1.35.0), which conflicts with google-adk and other libraries requiring >=1.36.0. Widened to >=1.34.0,<2 to allow compatible newer versions while staying within the stable 1.x API. Co-Authored-By: João <joao@crewai.com>
2026-04-23 11:22:38 +00:00 · 2026-02-13 08:59:52 +00:00
134 changed files with 25001 additions and 32924 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,3 @@ conceptual_plan.md
 build_image
 chromadb-*.lock
 .claude
-.crewai/memory
-blogs/*
-secrets/*
--- a/conftest.py
+++ b/conftest.py
@@ -11,11 +11,7 @@ from typing import Any
 from dotenv import load_dotenv
 import pytest
 from vcr.request import Request  # type: ignore[import-untyped]
-
-try:
-    import vcr.stubs.httpx_stubs as httpx_stubs  # type: ignore[import-untyped]
-except ModuleNotFoundError:
-    import vcr.stubs.httpcore_stubs as httpx_stubs  # type: ignore[import-untyped]
+import vcr.stubs.httpx_stubs as httpx_stubs  # type: ignore[import-untyped]


 env_test_path = Path(__file__).parent / ".env.test"
--- a/docs/en/concepts/flows.mdx
+++ b/docs/en/concepts/flows.mdx
@@ -975,79 +975,6 @@ result = streaming.result

 Learn more about streaming in the [Streaming Flow Execution](/en/learn/streaming-flow-execution) guide.

-## Memory in Flows
-
-Every Flow automatically has access to CrewAI's unified [Memory](/concepts/memory) system. You can store, recall, and extract memories directly inside any flow method using three built-in convenience methods.
-
-### Built-in Methods
-
-| Method | Description |
-| :--- | :--- |
-| `self.remember(content, **kwargs)` | Store content in memory. Accepts optional `scope`, `categories`, `metadata`, `importance`. |
-| `self.recall(query, **kwargs)` | Retrieve relevant memories. Accepts optional `scope`, `categories`, `limit`, `depth`. |
-| `self.extract_memories(content)` | Break raw text into discrete, self-contained memory statements. |
-
-A default `Memory()` instance is created automatically when the Flow initializes. You can also pass a custom one:
-
-```python
-from crewai.flow.flow import Flow
-from crewai import Memory
-
-custom_memory = Memory(
-    recency_weight=0.5,
-    recency_half_life_days=7,
-    embedder={"provider": "ollama", "config": {"model_name": "mxbai-embed-large"}},
-)
-
-flow = MyFlow(memory=custom_memory)
-```
-
-### Example: Research and Analyze Flow
-
-```python
-from crewai.flow.flow import Flow, listen, start
-
-
-class ResearchAnalysisFlow(Flow):
-    @start()
-    def gather_data(self):
-        # Simulate research findings
-        findings = (
-            "PostgreSQL handles 10k concurrent connections with connection pooling. "
-            "MySQL caps at around 5k. MongoDB scales horizontally but adds complexity."
-        )
-
-        # Extract atomic facts and remember each one
-        memories = self.extract_memories(findings)
-        for mem in memories:
-            self.remember(mem, scope="/research/databases")
-
-        return findings
-
-    @listen(gather_data)
-    def analyze(self, raw_findings):
-        # Recall relevant past research (from this run or previous runs)
-        past = self.recall("database performance and scaling", limit=10, depth="shallow")
-
-        context_lines = [f"- {m.record.content}" for m in past]
-        context = "\n".join(context_lines) if context_lines else "No prior context."
-
-        return {
-            "new_findings": raw_findings,
-            "prior_context": context,
-            "total_memories": len(past),
-        }
-
-
-flow = ResearchAnalysisFlow()
-result = flow.kickoff()
-print(result)
-```
-
-Because memory persists across runs (backed by LanceDB on disk), the `analyze` step will recall findings from previous executions too -- enabling flows that learn and accumulate knowledge over time.
-
-See the [Memory documentation](/concepts/memory) for details on scopes, slices, composite scoring, embedder configuration, and more.
-
 ### Using the CLI

 Starting from version 0.103.0, you can run flows using the `crewai run` command:
--- a/docs/en/concepts/memory.mdx
+++ b/docs/en/concepts/memory.mdx
--- a/docs/en/enterprise/features/flow-hitl-management.mdx
+++ b/docs/en/enterprise/features/flow-hitl-management.mdx
@@ -38,21 +38,22 @@ CrewAI Enterprise provides a comprehensive Human-in-the-Loop (HITL) management s
 Configure human review checkpoints within your Flows using the `@human_feedback` decorator. When execution reaches a review point, the system pauses, notifies the assignee via email, and waits for a response.

 ```python
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult

 class ContentApprovalFlow(Flow):
    @start()
    def generate_content(self):
+        # AI generates content
        return "Generated marketing copy for Q1 campaign..."

+    @listen(generate_content)
    @human_feedback(
        message="Please review this content for brand compliance:",
        emit=["approved", "rejected", "needs_revision"],
    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "Marketing copy for review..."
+    def review_content(self, content):
+        return content

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
@@ -61,6 +62,10 @@ class ContentApprovalFlow(Flow):
    @listen("rejected")
    def archive_content(self, result: HumanFeedbackResult):
        print(f"Content rejected. Reason: {result.feedback}")
+
+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        print(f"Revision requested: {result.feedback}")
 ```

 For complete implementation details, see the [Human Feedback in Flows](/en/learn/human-feedback-in-flows) guide.
--- a/docs/en/learn/human-feedback-in-flows.mdx
+++ b/docs/en/learn/human-feedback-in-flows.mdx
@@ -73,8 +73,6 @@ When this flow runs, it will:
 | `default_outcome` | `str` | No | Outcome to use if no feedback provided. Must be in `emit` |
 | `metadata` | `dict` | No | Additional data for enterprise integrations |
 | `provider` | `HumanFeedbackProvider` | No | Custom provider for async/non-blocking feedback. See [Async Human Feedback](#async-human-feedback-non-blocking) |
-| `learn` | `bool` | No | Enable HITL learning: distill lessons from feedback and pre-review future output. Default `False`. See [Learning from Feedback](#learning-from-feedback) |
-| `learn_limit` | `int` | No | Max past lessons to recall for pre-review. Default `5` |

 ### Basic Usage (No Routing)

@@ -98,43 +96,33 @@ def handle_feedback(self, result):
 When you specify `emit`, the decorator becomes a router. The human's free-form feedback is interpreted by an LLM and collapsed into one of the specified outcomes:

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
-from crewai.flow.human_feedback import human_feedback
+@start()
+@human_feedback(
+    message="Do you approve this content for publication?",
+    emit=["approved", "rejected", "needs_revision"],
+    llm="gpt-4o-mini",
+    default_outcome="needs_revision",
+)
+def review_content(self):
+    return "Draft blog post content here..."

-class ReviewFlow(Flow):
-    @start()
-    def generate_content(self):
-        return "Draft blog post content here..."
+@listen("approved")
+def publish(self, result):
+    print(f"Publishing! User said: {result.feedback}")

-    @human_feedback(
-        message="Do you approve this content for publication?",
-        emit=["approved", "rejected", "needs_revision"],
-        llm="gpt-4o-mini",
-        default_outcome="needs_revision",
-    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "Draft blog post content here..."
+@listen("rejected")
+def discard(self, result):
+    print(f"Discarding. Reason: {result.feedback}")

-    @listen("approved")
-    def publish(self, result):
-        print(f"Publishing! User said: {result.feedback}")
-
-    @listen("rejected")
-    def discard(self, result):
-        print(f"Discarding. Reason: {result.feedback}")
+@listen("needs_revision")
+def revise(self, result):
+    print(f"Revising based on: {result.feedback}")
 ```

-When the human says something like "needs more detail", the LLM collapses that to `"needs_revision"`, which triggers `review_content` again via `or_()` — creating a revision loop. The loop continues until the outcome is `"approved"` or `"rejected"`.
-
 <Tip>
 The LLM uses structured outputs (function calling) when available to guarantee the response is one of your specified outcomes. This makes routing reliable and predictable.
 </Tip>

-<Warning>
-A `@start()` method only runs once at the beginning of the flow. If you need a revision loop, separate the start method from the review method and use `@listen(or_("trigger", "revision_outcome"))` on the review method to enable the self-loop.
-</Warning>
-
 ## HumanFeedbackResult

 The `HumanFeedbackResult` dataclass contains all information about a human feedback interaction:
@@ -198,183 +186,127 @@ Each `HumanFeedbackResult` is appended to `human_feedback_history`, so multiple

 ## Complete Example: Content Approval Workflow

-Here's a full example implementing a content review and approval workflow with a revision loop:
+Here's a full example implementing a content review and approval workflow:

 <CodeGroup>

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult
 from pydantic import BaseModel


 class ContentState(BaseModel):
+    topic: str = ""
    draft: str = ""
+    final_content: str = ""
    revision_count: int = 0
-    status: str = "pending"


 class ContentApprovalFlow(Flow[ContentState]):
-    """A flow that generates content and loops until the human approves."""
+    """A flow that generates content and gets human approval."""

    @start()
-    def generate_draft(self):
-        self.state.draft = "# AI Safety\n\nThis is a draft about AI Safety..."
+    def get_topic(self):
+        self.state.topic = input("What topic should I write about? ")
+        return self.state.topic
+
+    @listen(get_topic)
+    def generate_draft(self, topic):
+        # In real use, this would call an LLM
+        self.state.draft = f"# {topic}\n\nThis is a draft about {topic}..."
        return self.state.draft

+    @listen(generate_draft)
    @human_feedback(
-        message="Please review this draft. Approve, reject, or describe what needs changing:",
+        message="Please review this draft. Reply 'approved', 'rejected', or provide revision feedback:",
        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
        default_outcome="needs_revision",
    )
-    @listen(or_("generate_draft", "needs_revision"))
-    def review_draft(self):
-        self.state.revision_count += 1
-        return f"{self.state.draft} (v{self.state.revision_count})"
+    def review_draft(self, draft):
+        return draft

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
-        self.state.status = "published"
-        print(f"Content approved and published! Reviewer said: {result.feedback}")
+        self.state.final_content = result.output
+        print("\n✅ Content approved and published!")
+        print(f"Reviewer comment: {result.feedback}")
        return "published"

    @listen("rejected")
    def handle_rejection(self, result: HumanFeedbackResult):
-        self.state.status = "rejected"
-        print(f"Content rejected. Reason: {result.feedback}")
+        print("\n❌ Content rejected")
+        print(f"Reason: {result.feedback}")
        return "rejected"

+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        self.state.revision_count += 1
+        print(f"\n📝 Revision #{self.state.revision_count} requested")
+        print(f"Feedback: {result.feedback}")

+        # In a real flow, you might loop back to generate_draft
+        # For this example, we just acknowledge
+        return "revision_requested"
+
+
+# Run the flow
 flow = ContentApprovalFlow()
 result = flow.kickoff()
-print(f"\nFlow completed. Status: {flow.state.status}, Reviews: {flow.state.revision_count}")
+print(f"\nFlow completed. Revisions requested: {flow.state.revision_count}")
 ```

 ```text Output
-==================================================
-OUTPUT FOR REVIEW:
-==================================================
-# AI Safety
-
-This is a draft about AI Safety... (v1)
-==================================================
-
-Please review this draft. Approve, reject, or describe what needs changing:
-(Press Enter to skip, or type your feedback)
-
-Your feedback: Needs more detail on alignment research
+What topic should I write about? AI Safety

 ==================================================
 OUTPUT FOR REVIEW:
 ==================================================
 # AI Safety

-This is a draft about AI Safety... (v2)
+This is a draft about AI Safety...
 ==================================================

-Please review this draft. Approve, reject, or describe what needs changing:
+Please review this draft. Reply 'approved', 'rejected', or provide revision feedback:
 (Press Enter to skip, or type your feedback)

 Your feedback: Looks good, approved!

-Content approved and published! Reviewer said: Looks good, approved!
+✅ Content approved and published!
+Reviewer comment: Looks good, approved!

-Flow completed. Status: published, Reviews: 2
+Flow completed. Revisions requested: 0
 ```

 </CodeGroup>

-The key pattern is `@listen(or_("generate_draft", "needs_revision"))` — the review method listens to both the initial trigger and its own revision outcome, creating a self-loop that repeats until the human approves or rejects.
-
 ## Combining with Other Decorators

-The `@human_feedback` decorator works with `@start()`, `@listen()`, and `or_()`. Both decorator orderings work — the framework propagates attributes in both directions — but the recommended patterns are:
+The `@human_feedback` decorator works with other flow decorators. Place it as the innermost decorator (closest to the function):

 ```python Code
-# One-shot review at the start of a flow (no self-loop)
+# Correct: @human_feedback is innermost (closest to the function)
@start()
-@human_feedback(message="Review this:", emit=["approved", "rejected"], llm="gpt-4o-mini")
+@human_feedback(message="Review this:")
 def my_start_method(self):
    return "content"

-# Linear review on a listener (no self-loop)
@listen(other_method)
-@human_feedback(message="Review this too:", emit=["good", "bad"], llm="gpt-4o-mini")
+@human_feedback(message="Review this too:")
 def my_listener(self, data):
    return f"processed: {data}"
-
-# Self-loop: review that can loop back for revisions
-@human_feedback(message="Approve or revise?", emit=["approved", "revise"], llm="gpt-4o-mini")
-@listen(or_("upstream_method", "revise"))
-def review_with_loop(self):
-    return "content for review"
 ```

-### Self-loop pattern
-
-To create a revision loop, the review method must listen to **both** an upstream trigger and its own revision outcome using `or_()`:
-
-```python Code
-@start()
-def generate(self):
-    return "initial draft"
-
-@human_feedback(
-    message="Approve or request changes?",
-    emit=["revise", "approved"],
-    llm="gpt-4o-mini",
-    default_outcome="approved",
-)
-@listen(or_("generate", "revise"))
-def review(self):
-    return "content"
-
-@listen("approved")
-def publish(self):
-    return "published"
-```
-
-When the outcome is `"revise"`, the flow routes back to `review` (because it listens to `"revise"` via `or_()`). When the outcome is `"approved"`, the flow continues to `publish`. This works because the flow engine exempts routers from the "fire once" rule, allowing them to re-execute on each loop iteration.
-
-### Chained routers
-
-A listener triggered by one router's outcome can itself be a router:
-
-```python Code
-@start()
-def generate(self):
-    return "draft content"
-
-@human_feedback(message="First review:", emit=["approved", "rejected"], llm="gpt-4o-mini")
-@listen("generate")
-def first_review(self):
-    return "draft content"
-
-@human_feedback(message="Final review:", emit=["publish", "hold"], llm="gpt-4o-mini")
-@listen("approved")
-def final_review(self, prev):
-    return "final content"
-
-@listen("publish")
-def on_publish(self, prev):
-    return "published"
-
-@listen("hold")
-def on_hold(self, prev):
-    return "held for later"
-```
-
-### Limitations
-
- **`@start()` methods run once**: A `@start()` method cannot self-loop. If you need a revision cycle, use a separate `@start()` method as the entry point and put the `@human_feedback` on a `@listen()` method.
- **No `@start()` + `@listen()` on the same method**: This is a Flow framework constraint. A method is either a start point or a listener, not both.
+<Tip>
+Place `@human_feedback` as the innermost decorator (last/closest to the function) so it wraps the method directly and can capture the return value before passing to the flow system.
+</Tip>

 ## Best Practices

 ### 1. Write Clear Request Messages

-The `message` parameter is what the human sees. Make it actionable:
+The `request` parameter is what the human sees. Make it actionable:

 ```python Code
 # ✅ Good - clear and actionable
@@ -582,9 +514,9 @@ class ContentPipeline(Flow):
    @start()
    @human_feedback(
        message="Approve this content for publication?",
-        emit=["approved", "rejected"],
+        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
-        default_outcome="rejected",
+        default_outcome="needs_revision",
        provider=SlackNotificationProvider("#content-reviews"),
    )
    def generate_content(self):
@@ -600,6 +532,11 @@ class ContentPipeline(Flow):
        print(f"Archived. Reason: {result.feedback}")
        return {"status": "archived"}

+    @listen("needs_revision")
+    def queue_revision(self, result):
+        print(f"Queued for revision: {result.feedback}")
+        return {"status": "revision_needed"}
+

 # Starting the flow (will pause and wait for Slack response)
 def start_content_pipeline():
@@ -639,64 +576,6 @@ If you're using an async web framework (FastAPI, aiohttp, Slack Bolt async mode)
 5. **Automatic persistence**: State is automatically saved when `HumanFeedbackPending` is raised and uses `SQLiteFlowPersistence` by default
 6. **Custom persistence**: Pass a custom persistence instance to `from_pending()` if needed

-## Learning from Feedback
-
-The `learn=True` parameter enables a feedback loop between human reviewers and the memory system. When enabled, the system progressively improves its outputs by learning from past human corrections.
-
-### How It Works
-
-1. **After feedback**: The LLM extracts generalizable lessons from the output + feedback and stores them in memory with `source="hitl"`. If the feedback is just approval (e.g. "looks good"), nothing is stored.
-2. **Before next review**: Past HITL lessons are recalled from memory and applied by the LLM to improve the output before the human sees it.
-
-Over time, the human sees progressively better pre-reviewed output because each correction informs future reviews.
-
-### Example
-
-```python Code
-class ArticleReviewFlow(Flow):
-    @start()
-    def generate_article(self):
-        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw
-
-    @human_feedback(
-        message="Review this article draft:",
-        emit=["approved", "needs_revision"],
-        llm="gpt-4o-mini",
-        learn=True,  # enable HITL learning
-    )
-    @listen(or_("generate_article", "needs_revision"))
-    def review_article(self):
-        return self.last_human_feedback.output if self.last_human_feedback else "article draft"
-
-    @listen("approved")
-    def publish(self):
-        print(f"Publishing: {self.last_human_feedback.output}")
-```
-
-**First run**: The human sees the raw output and says "Always include citations for factual claims." The lesson is distilled and stored in memory.
-
-**Second run**: The system recalls the citation lesson, pre-reviews the output to add citations, then shows the improved version. The human's job shifts from "fix everything" to "catch what the system missed."
-
-### Configuration
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| `learn` | `False` | Enable HITL learning |
-| `learn_limit` | `5` | Max past lessons to recall for pre-review |
-
-### Key Design Decisions
-
- **Same LLM for everything**: The `llm` parameter on the decorator is shared by outcome collapsing, lesson distillation, and pre-review. No need to configure multiple models.
- **Structured output**: Both distillation and pre-review use function calling with Pydantic models when the LLM supports it, falling back to text parsing otherwise.
- **Non-blocking storage**: Lessons are stored via `remember_many()` which runs in a background thread -- the flow continues immediately.
- **Graceful degradation**: If the LLM fails during distillation, nothing is stored. If it fails during pre-review, the raw output is shown. Neither failure blocks the flow.
- **No scope/categories needed**: When storing lessons, only `source` is passed. The encoding pipeline infers scope, categories, and importance automatically.
-
-<Note>
-`learn=True` requires the Flow to have memory available. Flows get memory automatically by default, but if you've disabled it with `_skip_auto_memory`, HITL learning will be silently skipped.
-</Note>
-
-
 ## Related Documentation

 - [Flows Overview](/en/concepts/flows) - Learn about CrewAI Flows
@@ -704,4 +583,3 @@ class ArticleReviewFlow(Flow):
 - [Flow Persistence](/en/concepts/flows#persistence) - Persisting flow state
 - [Routing with @router](/en/concepts/flows#router) - More about conditional routing
 - [Human Input on Execution](/en/learn/human-input-on-execution) - Task-level human input
- [Memory](/en/concepts/memory) - The unified memory system used by HITL learning
--- a/docs/ko/concepts/memory.mdx
+++ b/docs/ko/concepts/memory.mdx
--- a/docs/ko/enterprise/features/flow-hitl-management.mdx
+++ b/docs/ko/enterprise/features/flow-hitl-management.mdx
@@ -38,21 +38,22 @@ CrewAI Enterprise는 AI 워크플로우를 협업적인 인간-AI 프로세스
 `@human_feedback` 데코레이터를 사용하여 Flow 내에 인간 검토 체크포인트를 구성합니다. 실행이 검토 포인트에 도달하면 시스템이 일시 중지되고, 담당자에게 이메일로 알리며, 응답을 기다립니다.

 ```python
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult

 class ContentApprovalFlow(Flow):
    @start()
    def generate_content(self):
+        # AI가 콘텐츠 생성
        return "Q1 캠페인용 마케팅 카피 생성..."

+    @listen(generate_content)
    @human_feedback(
        message="브랜드 준수를 위해 이 콘텐츠를 검토해 주세요:",
        emit=["approved", "rejected", "needs_revision"],
    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "검토용 마케팅 카피..."
+    def review_content(self, content):
+        return content

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
@@ -61,6 +62,10 @@ class ContentApprovalFlow(Flow):
    @listen("rejected")
    def archive_content(self, result: HumanFeedbackResult):
        print(f"콘텐츠 거부됨. 사유: {result.feedback}")
+
+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        print(f"수정 요청: {result.feedback}")
 ```

 완전한 구현 세부 사항은 [Flow에서 인간 피드백](/ko/learn/human-feedback-in-flows) 가이드를 참조하세요.
--- a/docs/ko/learn/human-feedback-in-flows.mdx
+++ b/docs/ko/learn/human-feedback-in-flows.mdx
@@ -73,8 +73,6 @@ flow.kickoff()
 | `default_outcome` | `str` | 아니오 | 피드백이 제공되지 않을 때 사용할 outcome. `emit`에 있어야 합니다 |
 | `metadata` | `dict` | 아니오 | 엔터프라이즈 통합을 위한 추가 데이터 |
 | `provider` | `HumanFeedbackProvider` | 아니오 | 비동기/논블로킹 피드백을 위한 커스텀 프로바이더. [비동기 인간 피드백](#비동기-인간-피드백-논블로킹) 참조 |
-| `learn` | `bool` | 아니오 | HITL 학습 활성화: 피드백에서 교훈을 추출하고 향후 출력을 사전 검토합니다. 기본값 `False`. [피드백에서 학습하기](#피드백에서-학습하기) 참조 |
-| `learn_limit` | `int` | 아니오 | 사전 검토를 위해 불러올 최대 과거 교훈 수. 기본값 `5` |

 ### 기본 사용법 (라우팅 없음)

@@ -98,43 +96,33 @@ def handle_feedback(self, result):
 `emit`을 지정하면, 데코레이터는 라우터가 됩니다. 인간의 자유 형식 피드백이 LLM에 의해 해석되어 지정된 outcome 중 하나로 매핑됩니다:

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
-from crewai.flow.human_feedback import human_feedback
+@start()
+@human_feedback(
+    message="이 콘텐츠의 출판을 승인하시겠습니까?",
+    emit=["approved", "rejected", "needs_revision"],
+    llm="gpt-4o-mini",
+    default_outcome="needs_revision",
+)
+def review_content(self):
+    return "블로그 게시물 초안 내용..."

-class ReviewFlow(Flow):
-    @start()
-    def generate_content(self):
-        return "블로그 게시물 초안 내용..."
+@listen("approved")
+def publish(self, result):
+    print(f"출판 중! 사용자 의견: {result.feedback}")

-    @human_feedback(
-        message="이 콘텐츠의 출판을 승인하시겠습니까?",
-        emit=["approved", "rejected", "needs_revision"],
-        llm="gpt-4o-mini",
-        default_outcome="needs_revision",
-    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "블로그 게시물 초안 내용..."
+@listen("rejected")
+def discard(self, result):
+    print(f"폐기됨. 이유: {result.feedback}")

-    @listen("approved")
-    def publish(self, result):
-        print(f"출판 중! 사용자 의견: {result.feedback}")
-
-    @listen("rejected")
-    def discard(self, result):
-        print(f"폐기됨. 이유: {result.feedback}")
+@listen("needs_revision")
+def revise(self, result):
+    print(f"다음을 기반으로 수정 중: {result.feedback}")
 ```

-사용자가 "더 자세한 내용이 필요합니다"와 같이 말하면, LLM이 이를 `"needs_revision"`으로 매핑하고, `or_()`를 통해 `review_content`가 다시 트리거됩니다 — 수정 루프가 생성됩니다. outcome이 `"approved"` 또는 `"rejected"`가 될 때까지 루프가 계속됩니다.
-
 <Tip>
 LLM은 가능한 경우 구조화된 출력(function calling)을 사용하여 응답이 지정된 outcome 중 하나임을 보장합니다. 이로 인해 라우팅이 신뢰할 수 있고 예측 가능해집니다.
 </Tip>

-<Warning>
-`@start()` 메서드는 flow 시작 시 한 번만 실행됩니다. 수정 루프가 필요한 경우, start 메서드를 review 메서드와 분리하고 review 메서드에 `@listen(or_("trigger", "revision_outcome"))`를 사용하여 self-loop을 활성화하세요.
-</Warning>
-
 ## HumanFeedbackResult

 `HumanFeedbackResult` 데이터클래스는 인간 피드백 상호작용에 대한 모든 정보를 포함합니다:
@@ -203,162 +191,116 @@ def summarize(self):
 <CodeGroup>

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult
 from pydantic import BaseModel


 class ContentState(BaseModel):
+    topic: str = ""
    draft: str = ""
+    final_content: str = ""
    revision_count: int = 0
-    status: str = "pending"


 class ContentApprovalFlow(Flow[ContentState]):
-    """콘텐츠를 생성하고 승인될 때까지 반복하는 Flow."""
+    """콘텐츠를 생성하고 인간의 승인을 받는 Flow입니다."""

    @start()
-    def generate_draft(self):
-        self.state.draft = "# AI 안전\n\nAI 안전에 대한 초안..."
+    def get_topic(self):
+        self.state.topic = input("어떤 주제에 대해 글을 쓸까요? ")
+        return self.state.topic
+
+    @listen(get_topic)
+    def generate_draft(self, topic):
+        # 실제 사용에서는 LLM을 호출합니다
+        self.state.draft = f"# {topic}\n\n{topic}에 대한 초안입니다..."
        return self.state.draft

+    @listen(generate_draft)
    @human_feedback(
-        message="이 초안을 검토해 주세요. 승인, 거부 또는 변경이 필요한 사항을 설명해 주세요:",
+        message="이 초안을 검토해 주세요. 'approved', 'rejected'로 답하거나 수정 피드백을 제공해 주세요:",
        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
        default_outcome="needs_revision",
    )
-    @listen(or_("generate_draft", "needs_revision"))
-    def review_draft(self):
-        self.state.revision_count += 1
-        return f"{self.state.draft} (v{self.state.revision_count})"
+    def review_draft(self, draft):
+        return draft

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
-        self.state.status = "published"
-        print(f"콘텐츠 승인 및 게시! 리뷰어 의견: {result.feedback}")
+        self.state.final_content = result.output
+        print("\n✅ 콘텐츠가 승인되어 출판되었습니다!")
+        print(f"검토자 코멘트: {result.feedback}")
        return "published"

    @listen("rejected")
    def handle_rejection(self, result: HumanFeedbackResult):
-        self.state.status = "rejected"
-        print(f"콘텐츠 거부됨. 이유: {result.feedback}")
+        print("\n❌ 콘텐츠가 거부되었습니다")
+        print(f"이유: {result.feedback}")
        return "rejected"

+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        self.state.revision_count += 1
+        print(f"\n📝 수정 #{self.state.revision_count} 요청됨")
+        print(f"피드백: {result.feedback}")

+        # 실제 Flow에서는 generate_draft로 돌아갈 수 있습니다
+        # 이 예제에서는 단순히 확인합니다
+        return "revision_requested"
+
+
+# Flow 실행
 flow = ContentApprovalFlow()
 result = flow.kickoff()
-print(f"\nFlow 완료. 상태: {flow.state.status}, 검토 횟수: {flow.state.revision_count}")
+print(f"\nFlow 완료. 요청된 수정: {flow.state.revision_count}")
 ```

 ```text Output
-==================================================
-OUTPUT FOR REVIEW:
-==================================================
-# AI 안전
-
-AI 안전에 대한 초안... (v1)
-==================================================
-
-이 초안을 검토해 주세요. 승인, 거부 또는 변경이 필요한 사항을 설명해 주세요:
-(Press Enter to skip, or type your feedback)
-
-Your feedback: 더 자세한 내용이 필요합니다
+어떤 주제에 대해 글을 쓸까요? AI 안전

 ==================================================
 OUTPUT FOR REVIEW:
 ==================================================
 # AI 안전

-AI 안전에 대한 초안... (v2)
+AI 안전에 대한 초안입니다...
 ==================================================

-이 초안을 검토해 주세요. 승인, 거부 또는 변경이 필요한 사항을 설명해 주세요:
+이 초안을 검토해 주세요. 'approved', 'rejected'로 답하거나 수정 피드백을 제공해 주세요:
 (Press Enter to skip, or type your feedback)

 Your feedback: 좋아 보입니다, 승인!

-콘텐츠 승인 및 게시! 리뷰어 의견: 좋아 보입니다, 승인!
+✅ 콘텐츠가 승인되어 출판되었습니다!
+검토자 코멘트: 좋아 보입니다, 승인!

-Flow 완료. 상태: published, 검토 횟수: 2
+Flow 완료. 요청된 수정: 0
 ```

 </CodeGroup>

 ## 다른 데코레이터와 결합하기

-`@human_feedback` 데코레이터는 `@start()`, `@listen()`, `or_()`와 함께 작동합니다. 데코레이터 순서는 두 가지 모두 동작합니다—프레임워크가 양방향으로 속성을 전파합니다—하지만 권장 패턴은 다음과 같습니다:
+`@human_feedback` 데코레이터는 다른 Flow 데코레이터와 함께 작동합니다. 가장 안쪽 데코레이터(함수에 가장 가까운)로 배치하세요:

 ```python Code
-# Flow 시작 시 일회성 검토 (self-loop 없음)
+# 올바름: @human_feedback이 가장 안쪽(함수에 가장 가까움)
@start()
-@human_feedback(message="이것을 검토해 주세요:", emit=["approved", "rejected"], llm="gpt-4o-mini")
+@human_feedback(message="이것을 검토해 주세요:")
 def my_start_method(self):
    return "content"

-# 리스너에서 선형 검토 (self-loop 없음)
@listen(other_method)
-@human_feedback(message="이것도 검토해 주세요:", emit=["good", "bad"], llm="gpt-4o-mini")
+@human_feedback(message="이것도 검토해 주세요:")
 def my_listener(self, data):
    return f"processed: {data}"
-
-# Self-loop: 수정을 위해 반복할 수 있는 검토
-@human_feedback(message="승인 또는 수정 요청?", emit=["approved", "revise"], llm="gpt-4o-mini")
-@listen(or_("upstream_method", "revise"))
-def review_with_loop(self):
-    return "content for review"
 ```

-### Self-loop 패턴
-
-수정 루프를 만들려면 `or_()`를 사용하여 검토 메서드가 **상위 트리거**와 **자체 수정 outcome**을 모두 리스닝해야 합니다:
-
-```python Code
-@start()
-def generate(self):
-    return "initial draft"
-
-@human_feedback(
-    message="승인하시겠습니까, 아니면 변경을 요청하시겠습니까?",
-    emit=["revise", "approved"],
-    llm="gpt-4o-mini",
-    default_outcome="approved",
-)
-@listen(or_("generate", "revise"))
-def review(self):
-    return "content"
-
-@listen("approved")
-def publish(self):
-    return "published"
-```
-
-outcome이 `"revise"`이면 flow가 `review`로 다시 라우팅됩니다 (`or_()`를 통해 `"revise"`를 리스닝하기 때문). outcome이 `"approved"`이면 flow가 `publish`로 계속됩니다. flow 엔진이 라우터를 "한 번만 실행" 규칙에서 제외하여 각 루프 반복마다 재실행할 수 있기 때문에 이 패턴이 동작합니다.
-
-### 체인된 라우터
-
-한 라우터의 outcome으로 트리거된 리스너가 그 자체로 라우터가 될 수 있습니다:
-
-```python Code
-@start()
-@human_feedback(message="첫 번째 검토:", emit=["approved", "rejected"], llm="gpt-4o-mini")
-def draft(self):
-    return "draft content"
-
-@listen("approved")
-@human_feedback(message="최종 검토:", emit=["publish", "revise"], llm="gpt-4o-mini")
-def final_review(self, prev):
-    return "final content"
-
-@listen("publish")
-def on_publish(self, prev):
-    return "published"
-```
-
-### 제한 사항
-
- **`@start()` 메서드는 한 번만 실행**: `@start()` 메서드는 self-loop할 수 없습니다. 수정 주기가 필요하면 별도의 `@start()` 메서드를 진입점으로 사용하고 `@listen()` 메서드에 `@human_feedback`를 배치하세요.
- **동일 메서드에 `@start()` + `@listen()` 불가**: 이는 Flow 프레임워크 제약입니다. 메서드는 시작점이거나 리스너여야 하며, 둘 다일 수 없습니다.
+<Tip>
+`@human_feedback`를 가장 안쪽 데코레이터(마지막/함수에 가장 가까움)로 배치하여 메서드를 직접 래핑하고 Flow 시스템에 전달하기 전에 반환 값을 캡처할 수 있도록 하세요.
+</Tip>

 ## 모범 사례

@@ -572,9 +514,9 @@ class ContentPipeline(Flow):
    @start()
    @human_feedback(
        message="이 콘텐츠의 출판을 승인하시겠습니까?",
-        emit=["approved", "rejected"],
+        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
-        default_outcome="rejected",
+        default_outcome="needs_revision",
        provider=SlackNotificationProvider("#content-reviews"),
    )
    def generate_content(self):
@@ -590,6 +532,11 @@ class ContentPipeline(Flow):
        print(f"보관됨. 이유: {result.feedback}")
        return {"status": "archived"}

+    @listen("needs_revision")
+    def queue_revision(self, result):
+        print(f"수정 대기열에 추가됨: {result.feedback}")
+        return {"status": "revision_needed"}
+

 # Flow 시작 (Slack 응답을 기다리며 일시 중지)
 def start_content_pipeline():
@@ -629,64 +576,6 @@ async def on_slack_feedback_async(flow_id: str, slack_message: str):
 5. **자동 영속성**: `HumanFeedbackPending`이 발생하면 상태가 자동으로 저장되며 기본적으로 `SQLiteFlowPersistence` 사용
 6. **커스텀 영속성**: 필요한 경우 `from_pending()`에 커스텀 영속성 인스턴스 전달

-## 피드백에서 학습하기
-
-`learn=True` 매개변수는 인간 검토자와 메모리 시스템 간의 피드백 루프를 활성화합니다. 활성화되면 시스템은 과거 인간의 수정 사항에서 학습하여 출력을 점진적으로 개선합니다.
-
-### 작동 방식
-
-1. **피드백 후**: LLM이 출력 + 피드백에서 일반화 가능한 교훈을 추출하고 `source="hitl"`로 메모리에 저장합니다. 피드백이 단순한 승인(예: "좋아 보입니다")인 경우 아무것도 저장하지 않습니다.
-2. **다음 검토 전**: 과거 HITL 교훈을 메모리에서 불러와 LLM이 인간이 보기 전에 출력을 개선하는 데 적용합니다.
-
-시간이 지남에 따라 각 수정 사항이 향후 검토에 반영되므로 인간은 점진적으로 더 나은 사전 검토된 출력을 보게 됩니다.
-
-### 예제
-
-```python Code
-class ArticleReviewFlow(Flow):
-    @start()
-    def generate_article(self):
-        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw
-
-    @human_feedback(
-        message="이 글 초안을 검토해 주세요:",
-        emit=["approved", "needs_revision"],
-        llm="gpt-4o-mini",
-        learn=True,
-    )
-    @listen(or_("generate_article", "needs_revision"))
-    def review_article(self):
-        return self.last_human_feedback.output if self.last_human_feedback else "article draft"
-
-    @listen("approved")
-    def publish(self):
-        print(f"Publishing: {self.last_human_feedback.output}")
-```
-
-**첫 번째 실행**: 인간이 원시 출력을 보고 "사실에 대한 주장에는 항상 인용을 포함하세요."라고 말합니다. 교훈이 추출되어 메모리에 저장됩니다.
-
-**두 번째 실행**: 시스템이 인용 교훈을 불러와 출력을 사전 검토하여 인용을 추가한 후 개선된 버전을 표시합니다. 인간의 역할이 "모든 것을 수정"에서 "시스템이 놓친 것을 찾기"로 전환됩니다.
-
-### 구성
-
-| 매개변수 | 기본값 | 설명 |
-|-----------|--------|------|
-| `learn` | `False` | HITL 학습 활성화 |
-| `learn_limit` | `5` | 사전 검토를 위해 불러올 최대 과거 교훈 수 |
-
-### 주요 설계 결정
-
- **모든 것에 동일한 LLM 사용**: 데코레이터의 `llm` 매개변수는 outcome 매핑, 교훈 추출, 사전 검토에 공유됩니다. 여러 모델을 구성할 필요가 없습니다.
- **구조화된 출력**: 추출과 사전 검토 모두 LLM이 지원하는 경우 Pydantic 모델과 함께 function calling을 사용하고, 그렇지 않으면 텍스트 파싱으로 폴백합니다.
- **논블로킹 저장**: 교훈은 백그라운드 스레드에서 실행되는 `remember_many()`를 통해 저장됩니다 -- Flow는 즉시 계속됩니다.
- **우아한 저하**: 추출 중 LLM이 실패하면 아무것도 저장하지 않습니다. 사전 검토 중 실패하면 원시 출력이 표시됩니다. 어느 쪽의 실패도 Flow를 차단하지 않습니다.
- **범위/카테고리 불필요**: 교훈을 저장할 때 `source`만 전달됩니다. 인코딩 파이프라인이 범위, 카테고리, 중요도를 자동으로 추론합니다.
-
-<Note>
-`learn=True`는 Flow에 메모리가 사용 가능해야 합니다. Flow는 기본적으로 자동으로 메모리를 얻지만, `_skip_auto_memory`로 비활성화한 경우 HITL 학습은 조용히 건너뜁니다.
-</Note>
-
-
 ## 관련 문서

 - [Flow 개요](/ko/concepts/flows) - CrewAI Flow에 대해 알아보기
@@ -694,4 +583,3 @@ class ArticleReviewFlow(Flow):
 - [Flow 영속성](/ko/concepts/flows#persistence) - Flow 상태 영속화
 - [@router를 사용한 라우팅](/ko/concepts/flows#router) - 조건부 라우팅에 대해 더 알아보기
 - [실행 시 인간 입력](/ko/learn/human-input-on-execution) - 태스크 수준 인간 입력
- [메모리](/ko/concepts/memory) - HITL 학습에서 사용되는 통합 메모리 시스템
--- a/docs/pt-BR/concepts/memory.mdx
+++ b/docs/pt-BR/concepts/memory.mdx
--- a/docs/pt-BR/enterprise/features/flow-hitl-management.mdx
+++ b/docs/pt-BR/enterprise/features/flow-hitl-management.mdx
@@ -38,21 +38,22 @@ O CrewAI Enterprise oferece um sistema abrangente de gerenciamento Human-in-the-
 Configure checkpoints de revisão humana em seus Flows usando o decorador `@human_feedback`. Quando a execução atinge um ponto de revisão, o sistema pausa, notifica o responsável via email e aguarda uma resposta.

 ```python
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult

 class ContentApprovalFlow(Flow):
    @start()
    def generate_content(self):
+        # IA gera conteúdo
        return "Texto de marketing gerado para campanha Q1..."

+    @listen(generate_content)
    @human_feedback(
        message="Por favor, revise este conteúdo para conformidade com a marca:",
        emit=["approved", "rejected", "needs_revision"],
    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "Texto de marketing para revisão..."
+    def review_content(self, content):
+        return content

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
@@ -61,6 +62,10 @@ class ContentApprovalFlow(Flow):
    @listen("rejected")
    def archive_content(self, result: HumanFeedbackResult):
        print(f"Conteúdo rejeitado. Motivo: {result.feedback}")
+
+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        print(f"Revisão solicitada: {result.feedback}")
 ```

 Para detalhes completos de implementação, consulte o guia [Feedback Humano em Flows](/pt-BR/learn/human-feedback-in-flows).
--- a/docs/pt-BR/learn/human-feedback-in-flows.mdx
+++ b/docs/pt-BR/learn/human-feedback-in-flows.mdx
@@ -73,8 +73,6 @@ Quando este flow é executado, ele irá:
 | `default_outcome` | `str` | Não | Outcome a usar se nenhum feedback for fornecido. Deve estar em `emit` |
 | `metadata` | `dict` | Não | Dados adicionais para integrações enterprise |
 | `provider` | `HumanFeedbackProvider` | Não | Provider customizado para feedback assíncrono/não-bloqueante. Veja [Feedback Humano Assíncrono](#feedback-humano-assíncrono-não-bloqueante) |
-| `learn` | `bool` | Não | Habilitar aprendizado HITL: destila lições do feedback e pré-revisa saídas futuras. Padrão `False`. Veja [Aprendendo com Feedback](#aprendendo-com-feedback) |
-| `learn_limit` | `int` | Não | Máximo de lições passadas para recuperar na pré-revisão. Padrão `5` |

 ### Uso Básico (Sem Roteamento)

@@ -98,43 +96,33 @@ def handle_feedback(self, result):
 Quando você especifica `emit`, o decorador se torna um roteador. O feedback livre do humano é interpretado por um LLM e mapeado para um dos outcomes especificados:

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
-from crewai.flow.human_feedback import human_feedback
+@start()
+@human_feedback(
+    message="Você aprova este conteúdo para publicação?",
+    emit=["approved", "rejected", "needs_revision"],
+    llm="gpt-4o-mini",
+    default_outcome="needs_revision",
+)
+def review_content(self):
+    return "Rascunho do post do blog aqui..."

-class ReviewFlow(Flow):
-    @start()
-    def generate_content(self):
-        return "Rascunho do post do blog aqui..."
+@listen("approved")
+def publish(self, result):
+    print(f"Publicando! Usuário disse: {result.feedback}")

-    @human_feedback(
-        message="Você aprova este conteúdo para publicação?",
-        emit=["approved", "rejected", "needs_revision"],
-        llm="gpt-4o-mini",
-        default_outcome="needs_revision",
-    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "Rascunho do post do blog aqui..."
+@listen("rejected")
+def discard(self, result):
+    print(f"Descartando. Motivo: {result.feedback}")

-    @listen("approved")
-    def publish(self, result):
-        print(f"Publicando! Usuário disse: {result.feedback}")
-
-    @listen("rejected")
-    def discard(self, result):
-        print(f"Descartando. Motivo: {result.feedback}")
+@listen("needs_revision")
+def revise(self, result):
+    print(f"Revisando baseado em: {result.feedback}")
 ```

-Quando o humano diz algo como "precisa de mais detalhes", o LLM mapeia para `"needs_revision"`, que dispara `review_content` novamente via `or_()` — criando um loop de revisão. O loop continua até que o outcome seja `"approved"` ou `"rejected"`.
-
 <Tip>
 O LLM usa saídas estruturadas (function calling) quando disponível para garantir que a resposta seja um dos seus outcomes especificados. Isso torna o roteamento confiável e previsível.
 </Tip>

-<Warning>
-Um método `@start()` só executa uma vez no início do flow. Se você precisa de um loop de revisão, separe o método start do método de revisão e use `@listen(or_("trigger", "revision_outcome"))` no método de revisão para habilitar o self-loop.
-</Warning>
-
 ## HumanFeedbackResult

 O dataclass `HumanFeedbackResult` contém todas as informações sobre uma interação de feedback humano:
@@ -203,162 +191,116 @@ Aqui está um exemplo completo implementando um fluxo de revisão e aprovação
 <CodeGroup>

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult
 from pydantic import BaseModel


 class ContentState(BaseModel):
+    topic: str = ""
    draft: str = ""
+    final_content: str = ""
    revision_count: int = 0
-    status: str = "pending"


 class ContentApprovalFlow(Flow[ContentState]):
-    """Um flow que gera conteúdo e faz loop até o humano aprovar."""
+    """Um flow que gera conteúdo e obtém aprovação humana."""

    @start()
-    def generate_draft(self):
-        self.state.draft = "# IA Segura\n\nEste é um rascunho sobre IA Segura..."
+    def get_topic(self):
+        self.state.topic = input("Sobre qual tópico devo escrever? ")
+        return self.state.topic
+
+    @listen(get_topic)
+    def generate_draft(self, topic):
+        # Em uso real, isso chamaria um LLM
+        self.state.draft = f"# {topic}\n\nEste é um rascunho sobre {topic}..."
        return self.state.draft

+    @listen(generate_draft)
    @human_feedback(
-        message="Por favor, revise este rascunho. Aprove, rejeite ou descreva o que precisa mudar:",
+        message="Por favor, revise este rascunho. Responda 'approved', 'rejected', ou forneça feedback de revisão:",
        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
        default_outcome="needs_revision",
    )
-    @listen(or_("generate_draft", "needs_revision"))
-    def review_draft(self):
-        self.state.revision_count += 1
-        return f"{self.state.draft} (v{self.state.revision_count})"
+    def review_draft(self, draft):
+        return draft

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
-        self.state.status = "published"
-        print(f"Conteúdo aprovado e publicado! Revisor disse: {result.feedback}")
+        self.state.final_content = result.output
+        print("\n✅ Conteúdo aprovado e publicado!")
+        print(f"Comentário do revisor: {result.feedback}")
        return "published"

    @listen("rejected")
    def handle_rejection(self, result: HumanFeedbackResult):
-        self.state.status = "rejected"
-        print(f"Conteúdo rejeitado. Motivo: {result.feedback}")
+        print("\n❌ Conteúdo rejeitado")
+        print(f"Motivo: {result.feedback}")
        return "rejected"

+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        self.state.revision_count += 1
+        print(f"\n📝 Revisão #{self.state.revision_count} solicitada")
+        print(f"Feedback: {result.feedback}")

+        # Em um flow real, você pode voltar para generate_draft
+        # Para este exemplo, apenas reconhecemos
+        return "revision_requested"
+
+
+# Executar o flow
 flow = ContentApprovalFlow()
 result = flow.kickoff()
-print(f"\nFlow finalizado. Status: {flow.state.status}, Revisões: {flow.state.revision_count}")
+print(f"\nFlow concluído. Revisões solicitadas: {flow.state.revision_count}")
 ```

 ```text Output
-==================================================
-OUTPUT FOR REVIEW:
-==================================================
-# IA Segura
-
-Este é um rascunho sobre IA Segura... (v1)
-==================================================
-
-Por favor, revise este rascunho. Aprove, rejeite ou descreva o que precisa mudar:
-(Press Enter to skip, or type your feedback)
-
-Your feedback: Preciso de mais detalhes sobre segurança em IA.
+Sobre qual tópico devo escrever? Segurança em IA

 ==================================================
 OUTPUT FOR REVIEW:
 ==================================================
-# IA Segura
+# Segurança em IA

-Este é um rascunho sobre IA Segura... (v2)
+Este é um rascunho sobre Segurança em IA...
 ==================================================

-Por favor, revise este rascunho. Aprove, rejeite ou descreva o que precisa mudar:
+Por favor, revise este rascunho. Responda 'approved', 'rejected', ou forneça feedback de revisão:
 (Press Enter to skip, or type your feedback)

 Your feedback: Parece bom, aprovado!

-Conteúdo aprovado e publicado! Revisor disse: Parece bom, aprovado!
+✅ Conteúdo aprovado e publicado!
+Comentário do revisor: Parece bom, aprovado!

-Flow finalizado. Status: published, Revisões: 2
+Flow concluído. Revisões solicitadas: 0
 ```

 </CodeGroup>

 ## Combinando com Outros Decoradores

-O decorador `@human_feedback` funciona com `@start()`, `@listen()` e `or_()`. Ambas as ordens de decoradores funcionam — o framework propaga atributos em ambas as direções — mas os padrões recomendados são:
+O decorador `@human_feedback` funciona com outros decoradores de flow. Coloque-o como o decorador mais interno (mais próximo da função):

 ```python Code
-# Revisão única no início do flow (sem self-loop)
+# Correto: @human_feedback é o mais interno (mais próximo da função)
@start()
-@human_feedback(message="Revise isto:", emit=["approved", "rejected"], llm="gpt-4o-mini")
+@human_feedback(message="Revise isto:")
 def my_start_method(self):
    return "content"

-# Revisão linear em um listener (sem self-loop)
@listen(other_method)
-@human_feedback(message="Revise isto também:", emit=["good", "bad"], llm="gpt-4o-mini")
+@human_feedback(message="Revise isto também:")
 def my_listener(self, data):
    return f"processed: {data}"
-
-# Self-loop: revisão que pode voltar para revisões
-@human_feedback(message="Aprovar ou revisar?", emit=["approved", "revise"], llm="gpt-4o-mini")
-@listen(or_("upstream_method", "revise"))
-def review_with_loop(self):
-    return "content for review"
 ```

-### Padrão de self-loop
-
-Para criar um loop de revisão, o método de revisão deve escutar **ambos** um gatilho upstream e seu próprio outcome de revisão usando `or_()`:
-
-```python Code
-@start()
-def generate(self):
-    return "initial draft"
-
-@human_feedback(
-    message="Aprovar ou solicitar alterações?",
-    emit=["revise", "approved"],
-    llm="gpt-4o-mini",
-    default_outcome="approved",
-)
-@listen(or_("generate", "revise"))
-def review(self):
-    return "content"
-
-@listen("approved")
-def publish(self):
-    return "published"
-```
-
-Quando o outcome é `"revise"`, o flow roteia de volta para `review` (porque ele escuta `"revise"` via `or_()`). Quando o outcome é `"approved"`, o flow continua para `publish`. Isso funciona porque o engine de flow isenta roteadores da regra "fire once", permitindo que eles re-executem em cada iteração do loop.
-
-### Roteadores encadeados
-
-Um listener disparado pelo outcome de um roteador pode ser ele mesmo um roteador:
-
-```python Code
-@start()
-@human_feedback(message="Primeira revisão:", emit=["approved", "rejected"], llm="gpt-4o-mini")
-def draft(self):
-    return "draft content"
-
-@listen("approved")
-@human_feedback(message="Revisão final:", emit=["publish", "revise"], llm="gpt-4o-mini")
-def final_review(self, prev):
-    return "final content"
-
-@listen("publish")
-def on_publish(self, prev):
-    return "published"
-```
-
-### Limitações
-
- **Métodos `@start()` executam uma vez**: Um método `@start()` não pode fazer self-loop. Se você precisa de um ciclo de revisão, use um método `@start()` separado como ponto de entrada e coloque o `@human_feedback` em um método `@listen()`.
- **Sem `@start()` + `@listen()` no mesmo método**: Esta é uma restrição do framework de Flow. Um método é ou um ponto de início ou um listener, não ambos.
+<Tip>
+Coloque `@human_feedback` como o decorador mais interno (último/mais próximo da função) para que ele envolva o método diretamente e possa capturar o valor de retorno antes de passar para o sistema de flow.
+</Tip>

 ## Melhores Práticas

@@ -572,9 +514,9 @@ class ContentPipeline(Flow):
    @start()
    @human_feedback(
        message="Aprova este conteúdo para publicação?",
-        emit=["approved", "rejected"],
+        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
-        default_outcome="rejected",
+        default_outcome="needs_revision",
        provider=SlackNotificationProvider("#content-reviews"),
    )
    def generate_content(self):
@@ -590,6 +532,11 @@ class ContentPipeline(Flow):
        print(f"Arquivado. Motivo: {result.feedback}")
        return {"status": "archived"}

+    @listen("needs_revision")
+    def queue_revision(self, result):
+        print(f"Na fila para revisão: {result.feedback}")
+        return {"status": "revision_needed"}
+

 # Iniciando o flow (vai pausar e aguardar resposta do Slack)
 def start_content_pipeline():
@@ -629,64 +576,6 @@ Se você está usando um framework web assíncrono (FastAPI, aiohttp, Slack Bolt
 5. **Persistência automática**: O estado é automaticamente salvo quando `HumanFeedbackPending` é lançado e usa `SQLiteFlowPersistence` por padrão
 6. **Persistência customizada**: Passe uma instância de persistência customizada para `from_pending()` se necessário

-## Aprendendo com Feedback
-
-O parâmetro `learn=True` habilita um ciclo de feedback entre revisores humanos e o sistema de memória. Quando habilitado, o sistema melhora progressivamente suas saídas aprendendo com correções humanas anteriores.
-
-### Como Funciona
-
-1. **Após o feedback**: O LLM extrai lições generalizáveis da saída + feedback e as armazena na memória com `source="hitl"`. Se o feedback for apenas aprovação (ex: "parece bom"), nada é armazenado.
-2. **Antes da próxima revisão**: Lições HITL passadas são recuperadas da memória e aplicadas pelo LLM para melhorar a saída antes que o humano a veja.
-
-Com o tempo, o humano vê saídas pré-revisadas progressivamente melhores porque cada correção informa revisões futuras.
-
-### Exemplo
-
-```python Code
-class ArticleReviewFlow(Flow):
-    @start()
-    def generate_article(self):
-        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw
-
-    @human_feedback(
-        message="Revise este rascunho do artigo:",
-        emit=["approved", "needs_revision"],
-        llm="gpt-4o-mini",
-        learn=True,  # enable HITL learning
-    )
-    @listen(or_("generate_article", "needs_revision"))
-    def review_article(self):
-        return self.last_human_feedback.output if self.last_human_feedback else "article draft"
-
-    @listen("approved")
-    def publish(self):
-        print(f"Publishing: {self.last_human_feedback.output}")
-```
-
-**Primeira execução**: O humano vê a saída bruta e diz "Sempre inclua citações para afirmações factuais." A lição é destilada e armazenada na memória.
-
-**Segunda execução**: O sistema recupera a lição sobre citações, pré-revisa a saída para adicionar citações e então mostra a versão melhorada. O trabalho do humano muda de "corrigir tudo" para "identificar o que o sistema deixou passar."
-
-### Configuração
-
-| Parâmetro | Padrão | Descrição |
-|-----------|--------|-----------|
-| `learn` | `False` | Habilitar aprendizado HITL |
-| `learn_limit` | `5` | Máximo de lições passadas para recuperar na pré-revisão |
-
-### Decisões de Design Principais
-
- **Mesmo LLM para tudo**: O parâmetro `llm` no decorador é compartilhado pelo mapeamento de outcome, destilação de lições e pré-revisão. Não é necessário configurar múltiplos modelos.
- **Saída estruturada**: Tanto a destilação quanto a pré-revisão usam function calling com modelos Pydantic quando o LLM suporta, com fallback para parsing de texto caso contrário.
- **Armazenamento não-bloqueante**: Lições são armazenadas via `remember_many()` que executa em uma thread em segundo plano -- o flow continua imediatamente.
- **Degradação graciosa**: Se o LLM falhar durante a destilação, nada é armazenado. Se falhar durante a pré-revisão, a saída bruta é mostrada. Nenhuma falha bloqueia o flow.
- **Sem escopo/categorias necessários**: Ao armazenar lições, apenas `source` é passado. O pipeline de codificação infere escopo, categorias e importância automaticamente.
-
-<Note>
-`learn=True` requer que o Flow tenha memória disponível. Flows obtêm memória automaticamente por padrão, mas se você a desabilitou com `_skip_auto_memory`, o aprendizado HITL será silenciosamente ignorado.
-</Note>
-
-
 ## Documentação Relacionada

 - [Visão Geral de Flows](/pt-BR/concepts/flows) - Aprenda sobre CrewAI Flows
@@ -694,4 +583,3 @@ class ArticleReviewFlow(Flow):
 - [Persistência de Flows](/pt-BR/concepts/flows#persistence) - Persistindo estado de flows
 - [Roteamento com @router](/pt-BR/concepts/flows#router) - Mais sobre roteamento condicional
 - [Input Humano na Execução](/pt-BR/learn/human-input-on-execution) - Input humano no nível de task
- [Memória](/pt-BR/concepts/memory) - O sistema unificado de memória usado pelo aprendizado HITL
--- a/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_action_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_action_tool.py
@@ -6,10 +6,8 @@ from typing import Any

 from crewai.tools import BaseTool
 from crewai.utilities.pydantic_schema_utils import create_model_from_schema
-from crewai.utilities.string_utils import sanitize_tool_name
-from pydantic import Field, create_model, model_validator
+from pydantic import Field, create_model
 import requests
-from typing_extensions import Self

 from crewai_tools.tools.crewai_platform_tools.misc import (
    get_platform_api_base_url,
@@ -22,27 +20,34 @@ class CrewAIPlatformActionTool(BaseTool):
    action_schema: dict[str, Any] = Field(
        default_factory=dict, description="The schema of the action"
    )
-    integration_token: str | None = Field(
-        default_factory=get_platform_integration_token,
-    )

-    @model_validator(mode="after")
-    def _build_args_schema(self) -> Self:
-        parameters = self.action_schema.get("function", {}).get("parameters", {})
+    def __init__(
+        self,
+        description: str,
+        action_name: str,
+        action_schema: dict[str, Any],
+    ):
+        parameters = action_schema.get("function", {}).get("parameters", {})
+
        if parameters and parameters.get("properties"):
            try:
                if "title" not in parameters:
-                    parameters = {**parameters, "title": f"{self.action_name}Schema"}
+                    parameters = {**parameters, "title": f"{action_name}Schema"}
                if "type" not in parameters:
                    parameters = {**parameters, "type": "object"}
-                self.args_schema = create_model_from_schema(parameters)
+                args_schema = create_model_from_schema(parameters)
            except Exception:
-                self.args_schema = create_model(f"{self.action_name}Schema")
+                args_schema = create_model(f"{action_name}Schema")
        else:
-            self.args_schema = create_model(f"{self.action_name}Schema")
-        if not self.name:
-            self.name = sanitize_tool_name(self.action_name)
-        return self
+            args_schema = create_model(f"{action_name}Schema")
+
+        super().__init__(
+            name=action_name.lower().replace(" ", "_"),
+            description=description,
+            args_schema=args_schema,
+        )
+        self.action_name = action_name
+        self.action_schema = action_schema

    def _run(self, **kwargs: Any) -> str:
        try:
@@ -53,8 +58,9 @@ class CrewAIPlatformActionTool(BaseTool):
            api_url = (
                f"{get_platform_api_base_url()}/actions/{self.action_name}/execute"
            )
+            token = get_platform_integration_token()
            headers = {
-                "Authorization": f"Bearer {self.integration_token}",
+                "Authorization": f"Bearer {token}",
                "Content-Type": "application/json",
            }
            payload = {
--- a/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tool_builder.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tool_builder.py
@@ -6,7 +6,6 @@ from types import TracebackType
 from typing import Any

 from crewai.tools import BaseTool
-from crewai.utilities.string_utils import sanitize_tool_name
 import requests

 from crewai_tools.tools.crewai_platform_tools.crewai_platform_action_tool import (
@@ -31,7 +30,6 @@ class CrewaiPlatformToolBuilder:
        self._apps = apps
        self._actions_schema: dict[str, dict[str, Any]] = {}
        self._tools: list[BaseTool] | None = None
-        self._integration_token = get_platform_integration_token()

    def tools(self) -> list[BaseTool]:
        """Fetch actions and return built tools."""
@@ -43,7 +41,7 @@ class CrewaiPlatformToolBuilder:
    def _fetch_actions(self) -> None:
        """Fetch action schemas from the platform API."""
        actions_url = f"{get_platform_api_base_url()}/actions"
-        headers = {"Authorization": f"Bearer {self._integration_token}"}
+        headers = {"Authorization": f"Bearer {get_platform_integration_token()}"}

        try:
            response = requests.get(
@@ -90,11 +88,9 @@ class CrewaiPlatformToolBuilder:
            description = function_details.get("description", f"Execute {action_name}")

            tool = CrewAIPlatformActionTool(
-                name=sanitize_tool_name(action_name),
                description=description,
                action_name=action_name,
                action_schema=action_schema,
-                integration_token=self._integration_token,
            )

            tools.append(tool)
--- a/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/misc.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/misc.py
@@ -1,7 +1,5 @@
 import os

-from crewai.context import get_platform_integration_token as _get_context_token
-

 def get_platform_api_base_url() -> str:
    """Get the platform API base URL from environment or use default."""
@@ -9,5 +7,11 @@ def get_platform_api_base_url() -> str:
    return f"{base_url}/crewai_plus/api/v1/integrations"


-def get_platform_integration_token() -> str | None:
-    return _get_context_token() or os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN")
+def get_platform_integration_token() -> str:
+    """Get the platform API base URL from environment or use default."""
+    token = os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN") or ""
+    if not token:
+        raise ValueError(
+            "No platform integration token found, please set the CREWAI_PLATFORM_INTEGRATION_TOKEN environment variable"
+        )
+    return token  # TODO: Use context manager to get token
--- a/lib/crewai-tools/tests/tools/crewai_platform_tools/test_crewai_platform_action_tool.py
+++ b/lib/crewai-tools/tests/tools/crewai_platform_tools/test_crewai_platform_action_tool.py
@@ -27,10 +27,9 @@ class TestCrewAIPlatformActionToolVerify:

    def create_test_tool(self):
        return CrewAIPlatformActionTool(
-            name="test_action",
            description="Test action tool",
            action_name="test_action",
-            action_schema=self.action_schema,
+            action_schema=self.action_schema
        )

    @patch.dict("os.environ", {"CREWAI_PLATFORM_INTEGRATION_TOKEN": "test_token"}, clear=True)
--- a/lib/crewai-tools/tests/tools/crewai_platform_tools/test_crewai_platform_tool_builder.py
+++ b/lib/crewai-tools/tests/tools/crewai_platform_tools/test_crewai_platform_tool_builder.py
@@ -107,10 +107,12 @@ class TestCrewaiPlatformToolBuilder(unittest.TestCase):
        )

    def test_fetch_actions_no_token(self):
+        builder = CrewaiPlatformToolBuilder(apps=["github"])
+
        with patch.dict("os.environ", {}, clear=True):
-            builder = CrewaiPlatformToolBuilder(apps=["github"])
-            assert builder._integration_token is None
-            assert builder.tools() == []
+            with self.assertRaises(ValueError) as context:
+                builder._fetch_actions()
+            assert "No platform integration token found" in str(context.exception)

    @patch.dict("os.environ", {"CREWAI_PLATFORM_INTEGRATION_TOKEN": "test_token"})
    @patch(
--- a/lib/crewai-tools/tests/tools/crewai_platform_tools/test_crewai_platform_tools.py
+++ b/lib/crewai-tools/tests/tools/crewai_platform_tools/test_crewai_platform_tools.py
@@ -110,5 +110,6 @@ class TestCrewaiPlatformTools(unittest.TestCase):

    def test_crewai_platform_tools_no_token(self):
        with patch.dict("os.environ", {}, clear=True):
-            tools = CrewaiPlatformTools(apps=["github"])
-            assert tools == []
+            with self.assertRaises(ValueError) as context:
+                CrewaiPlatformTools(apps=["github"])
+            assert "No platform integration token found" in str(context.exception)
--- a/lib/crewai-tools/tool.specs.json
+++ b/lib/crewai-tools/tool.specs.json
@@ -20117,6 +20117,18 @@
      "humanized_name": "Web Automation Tool",
      "init_params_schema": {
        "$defs": {
+          "AvailableModel": {
+            "enum": [
+              "gpt-4o",
+              "gpt-4o-mini",
+              "claude-3-5-sonnet-latest",
+              "claude-3-7-sonnet-latest",
+              "computer-use-preview",
+              "gemini-2.0-flash"
+            ],
+            "title": "AvailableModel",
+            "type": "string"
+          },
          "EnvVar": {
            "properties": {
              "default": {
@@ -20194,6 +20206,17 @@
            "default": null,
            "title": "Model Api Key"
          },
+          "model_name": {
+            "anyOf": [
+              {
+                "$ref": "#/$defs/AvailableModel"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": "claude-3-7-sonnet-latest"
+          },
          "project_id": {
            "anyOf": [
              {
--- a/lib/crewai/pyproject.toml
+++ b/lib/crewai/pyproject.toml
@@ -16,9 +16,9 @@ dependencies = [
    "pdfplumber~=0.11.4",
    "regex~=2026.1.15",
    # Telemetry and Monitoring
-    "opentelemetry-api~=1.34.0",
-    "opentelemetry-sdk~=1.34.0",
-    "opentelemetry-exporter-otlp-proto-http~=1.34.0",
+    "opentelemetry-api>=1.34.0,<2",
+    "opentelemetry-sdk>=1.34.0,<2",
+    "opentelemetry-exporter-otlp-proto-http>=1.34.0,<2",
    # Data Handling
    "chromadb~=1.1.0",
    "tokenizers~=0.20.3",
@@ -26,8 +26,6 @@ dependencies = [
    # Authentication and Security
    "python-dotenv~=1.1.1",
    "pyjwt>=2.9.0,<3",
-    # TUI
-    "textual>=7.5.0",
    # Configuration and Utils
    "click~=8.1.7",
    "appdirs~=1.4.4",
@@ -41,7 +39,6 @@ dependencies = [
    "mcp~=1.26.0",
    "uv~=0.9.13",
    "aiosqlite~=0.21.0",
-    "lancedb>=0.4.0",
 ]

 [project.urls]
--- a/lib/crewai/src/crewai/init.py
+++ b/lib/crewai/src/crewai/init.py
@@ -10,7 +10,6 @@ from crewai.flow.flow import Flow
 from crewai.knowledge.knowledge import Knowledge
 from crewai.llm import LLM
 from crewai.llms.base_llm import BaseLLM
-from crewai.memory.unified_memory import Memory
 from crewai.process import Process
 from crewai.task import Task
 from crewai.tasks.llm_guardrail import LLMGuardrail
@@ -81,7 +80,6 @@ __all__ = [
    "Flow",
    "Knowledge",
    "LLMGuardrail",
-    "Memory",
    "Process",
    "Task",
    "TaskOutput",
--- a/lib/crewai/src/crewai/agent/core.py
+++ b/lib/crewai/src/crewai/agent/core.py
@@ -71,6 +71,7 @@ from crewai.mcp import (
 from crewai.mcp.transports.http import HTTPTransport
 from crewai.mcp.transports.sse import SSETransport
 from crewai.mcp.transports.stdio import StdioTransport
+from crewai.memory.contextual.contextual_memory import ContextualMemory
 from crewai.rag.embeddings.types import EmbedderConfig
 from crewai.security.fingerprint import Fingerprint
 from crewai.tools.agent_tools.agent_tools import AgentTools
@@ -310,12 +311,19 @@ class Agent(BaseAgent):
            raise ValueError(f"Invalid Knowledge Configuration: {e!s}") from e

    def _is_any_available_memory(self) -> bool:
-        """Check if unified memory is available (agent or crew)."""
-        if getattr(self, "memory", None):
-            return True
-        if self.crew and getattr(self.crew, "_memory", None):
-            return True
-        return False
+        """Check if any memory is available."""
+        if not self.crew:
+            return False
+
+        memory_attributes = [
+            "memory",
+            "_short_term_memory",
+            "_long_term_memory",
+            "_entity_memory",
+            "_external_memory",
+        ]
+
+        return any(getattr(self.crew, attr) for attr in memory_attributes)

    def _supports_native_tool_calling(self, tools: list[BaseTool]) -> bool:
        """Check if the LLM supports native function calling with the given tools.
@@ -379,16 +387,15 @@ class Agent(BaseAgent):
            memory = ""

            try:
-                unified_memory = getattr(self, "memory", None) or (
-                    getattr(self.crew, "_memory", None) if self.crew else None
+                contextual_memory = ContextualMemory(
+                    self.crew._short_term_memory,
+                    self.crew._long_term_memory,
+                    self.crew._entity_memory,
+                    self.crew._external_memory,
+                    agent=self,
+                    task=task,
                )
-                if unified_memory is not None:
-                    query = task.description
-                    matches = unified_memory.recall(query, limit=10)
-                    if matches:
-                        memory = "Relevant memories:\n" + "\n".join(
-                            f"- {m.record.content}" for m in matches
-                        )
+                memory = contextual_memory.build_context_for_task(task, context or "")
                if memory.strip() != "":
                    task_prompt += self.i18n.slice("memory").format(memory=memory)

@@ -617,16 +624,17 @@ class Agent(BaseAgent):
            memory = ""

            try:
-                unified_memory = getattr(self, "memory", None) or (
-                    getattr(self.crew, "_memory", None) if self.crew else None
+                contextual_memory = ContextualMemory(
+                    self.crew._short_term_memory,
+                    self.crew._long_term_memory,
+                    self.crew._entity_memory,
+                    self.crew._external_memory,
+                    agent=self,
+                    task=task,
+                )
+                memory = await contextual_memory.abuild_context_for_task(
+                    task, context or ""
                )
-                if unified_memory is not None:
-                    query = task.description
-                    matches = unified_memory.recall(query, limit=10)
-                    if matches:
-                        memory = "Relevant memories:\n" + "\n".join(
-                            f"- {m.record.content}" for m in matches
-                        )
                if memory.strip() != "":
                    task_prompt += self.i18n.slice("memory").format(memory=memory)

@@ -1704,18 +1712,6 @@ class Agent(BaseAgent):

        # Prepare tools
        raw_tools: list[BaseTool] = self.tools or []
-
-        # Inject memory tools for standalone kickoff (crew path handles its own)
-        agent_memory = getattr(self, "memory", None)
-        if agent_memory is not None:
-            from crewai.tools.memory_tools import create_memory_tools
-
-            existing_names = {sanitize_tool_name(t.name) for t in raw_tools}
-            raw_tools.extend(
-                mt for mt in create_memory_tools(agent_memory)
-                if sanitize_tool_name(mt.name) not in existing_names
-            )
-
        parsed_tools = parse_tools(raw_tools)

        # Build agent_info for backward-compatible event emission
@@ -1790,49 +1786,6 @@ class Agent(BaseAgent):
        if input_files:
            all_files.update(input_files)

-        # Inject memory context for standalone kickoff (recall before execution)
-        if agent_memory is not None:
-            try:
-                crewai_event_bus.emit(
-                    self,
-                    event=MemoryRetrievalStartedEvent(
-                        task_id=None,
-                        source_type="agent_kickoff",
-                        from_agent=self,
-                    ),
-                )
-                start_time = time.time()
-                matches = agent_memory.recall(formatted_messages, limit=10)
-                memory_block = ""
-                if matches:
-                    memory_block = "Relevant memories:\n" + "\n".join(
-                        f"- {m.record.content}" for m in matches
-                    )
-                if memory_block:
-                    formatted_messages += "\n\n" + self.i18n.slice("memory").format(
-                        memory=memory_block
-                    )
-                crewai_event_bus.emit(
-                    self,
-                    event=MemoryRetrievalCompletedEvent(
-                        task_id=None,
-                        memory_content=memory_block,
-                        retrieval_time_ms=(time.time() - start_time) * 1000,
-                        source_type="agent_kickoff",
-                        from_agent=self,
-                    ),
-                )
-            except Exception as e:
-                crewai_event_bus.emit(
-                    self,
-                    event=MemoryRetrievalFailedEvent(
-                        task_id=None,
-                        source_type="agent_kickoff",
-                        from_agent=self,
-                        error=str(e),
-                    ),
-                )
-
        # Build the input dict for the executor
        inputs: dict[str, Any] = {
            "input": formatted_messages,
@@ -1903,9 +1856,6 @@ class Agent(BaseAgent):
                    response_format=response_format,
                )

-            # Save to memory after execution (passive save)
-            self._save_kickoff_to_memory(messages, output.raw)
-
            crewai_event_bus.emit(
                self,
                event=LiteAgentExecutionCompletedEvent(
@@ -1926,31 +1876,6 @@ class Agent(BaseAgent):
            )
            raise

-    def _save_kickoff_to_memory(
-        self, messages: str | list[LLMMessage], output_text: str
-    ) -> None:
-        """Save kickoff result to memory. No-op if agent has no memory."""
-        agent_memory = getattr(self, "memory", None)
-        if agent_memory is None:
-            return
-        try:
-            if isinstance(messages, str):
-                input_str = messages
-            else:
-                input_str = "\n".join(
-                    str(msg.get("content", "")) for msg in messages if msg.get("content")
-                ) or "User request"
-            raw = (
-                f"Input: {input_str}\n"
-                f"Agent: {self.role}\n"
-                f"Result: {output_text}"
-            )
-            extracted = agent_memory.extract_memories(raw)
-            if extracted:
-                agent_memory.remember_many(extracted)
-        except Exception as e:
-            self._logger.log("error", f"Failed to save kickoff result to memory: {e}")
-
    def _execute_and_build_output(
        self,
        executor: AgentExecutor,
@@ -2233,9 +2158,6 @@ class Agent(BaseAgent):
                    response_format=response_format,
                )

-            # Save to memory after async execution (passive save)
-            self._save_kickoff_to_memory(messages, output.raw)
-
            crewai_event_bus.emit(
                self,
                event=LiteAgentExecutionCompletedEvent(
--- a/lib/crewai/src/crewai/agents/agent_builder/base_agent.py
+++ b/lib/crewai/src/crewai/agents/agent_builder/base_agent.py
@@ -199,14 +199,6 @@ class BaseAgent(BaseModel, ABC, metaclass=AgentMeta):
        default=None,
        description="List of MCP server references. Supports 'https://server.com/path' for external servers and 'crewai-amp:mcp-name' for AMP marketplace. Use '#tool_name' suffix for specific tools.",
    )
-    memory: Any = Field(
-        default=None,
-        description=(
-            "Enable agent memory. Pass True for default Memory(), "
-            "or a Memory/MemoryScope/MemorySlice instance for custom configuration. "
-            "If not set, falls back to crew memory."
-        ),
-    )

    @model_validator(mode="before")
    @classmethod
@@ -337,17 +329,6 @@ class BaseAgent(BaseModel, ABC, metaclass=AgentMeta):
            self._token_process = TokenProcess()
        return self

-    @model_validator(mode="after")
-    def resolve_memory(self) -> Self:
-        """Resolve memory field: True creates a default Memory(), instance is used as-is."""
-        if self.memory is True:
-            from crewai.memory.unified_memory import Memory
-
-            self.memory = Memory()
-        elif self.memory is False:
-            self.memory = None
-        return self
-
    @property
    def key(self) -> str:
        source = [
--- a/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
+++ b/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
@@ -1,8 +1,13 @@
 from __future__ import annotations

+import time
 from typing import TYPE_CHECKING

 from crewai.agents.parser import AgentFinish
+from crewai.memory.entity.entity_memory_item import EntityMemoryItem
+from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem
+from crewai.utilities.converter import ConverterError
+from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
 from crewai.utilities.printer import Printer
 from crewai.utilities.string_utils import sanitize_tool_name

@@ -25,29 +30,110 @@ class CrewAgentExecutorMixin:
    _i18n: I18N
    _printer: Printer = Printer()

-    def _save_to_memory(self, output: AgentFinish) -> None:
-        """Save task result to unified memory (memory or crew._memory)."""
-        memory = getattr(self.agent, "memory", None) or (
-            getattr(self.crew, "_memory", None) if self.crew else None
-        )
-        if memory is None or not self.task:
-            return
+    def _create_short_term_memory(self, output: AgentFinish) -> None:
+        """Create and save a short-term memory item if conditions are met."""
        if (
-            f"Action: {sanitize_tool_name('Delegate work to coworker')}"
-            in output.text
+            self.crew
+            and self.agent
+            and self.task
+            and f"Action: {sanitize_tool_name('Delegate work to coworker')}"
+            not in output.text
        ):
-            return
-        try:
-            raw = (
-                f"Task: {self.task.description}\n"
-                f"Agent: {self.agent.role}\n"
-                f"Expected result: {self.task.expected_output}\n"
-                f"Result: {output.text}"
-            )
-            extracted = memory.extract_memories(raw)
-            if extracted:
-                memory.remember_many(extracted, agent_role=self.agent.role)
-        except Exception as e:
-            self.agent._logger.log(
-                "error", f"Failed to save to memory: {e}"
-            )
+            try:
+                if (
+                    hasattr(self.crew, "_short_term_memory")
+                    and self.crew._short_term_memory
+                ):
+                    self.crew._short_term_memory.save(
+                        value=output.text,
+                        metadata={
+                            "observation": self.task.description,
+                        },
+                    )
+            except Exception as e:
+                self.agent._logger.log(
+                    "error", f"Failed to add to short term memory: {e}"
+                )
+
+    def _create_external_memory(self, output: AgentFinish) -> None:
+        """Create and save a external-term memory item if conditions are met."""
+        if (
+            self.crew
+            and self.agent
+            and self.task
+            and hasattr(self.crew, "_external_memory")
+            and self.crew._external_memory
+        ):
+            try:
+                self.crew._external_memory.save(
+                    value=output.text,
+                    metadata={
+                        "description": self.task.description,
+                        "messages": self.messages,
+                    },
+                )
+            except Exception as e:
+                self.agent._logger.log(
+                    "error", f"Failed to add to external memory: {e}"
+                )
+
+    def _create_long_term_memory(self, output: AgentFinish) -> None:
+        """Create and save long-term and entity memory items based on evaluation."""
+        if (
+            self.crew
+            and self.crew._long_term_memory
+            and self.crew._entity_memory
+            and self.task
+            and self.agent
+        ):
+            try:
+                ltm_agent = TaskEvaluator(self.agent)
+                evaluation = ltm_agent.evaluate(self.task, output.text)
+
+                if isinstance(evaluation, ConverterError):
+                    return
+
+                long_term_memory = LongTermMemoryItem(
+                    task=self.task.description,
+                    agent=self.agent.role,
+                    quality=evaluation.quality,
+                    datetime=str(time.time()),
+                    expected_output=self.task.expected_output,
+                    metadata={
+                        "suggestions": evaluation.suggestions,
+                        "quality": evaluation.quality,
+                    },
+                )
+                self.crew._long_term_memory.save(long_term_memory)
+
+                entity_memories = [
+                    EntityMemoryItem(
+                        name=entity.name,
+                        type=entity.type,
+                        description=entity.description,
+                        relationships="\n".join(
+                            [f"- {r}" for r in entity.relationships]
+                        ),
+                    )
+                    for entity in evaluation.entities
+                ]
+                if entity_memories:
+                    self.crew._entity_memory.save(entity_memories)
+            except AttributeError as e:
+                self.agent._logger.log(
+                    "error", f"Missing attributes for long term memory: {e}"
+                )
+            except Exception as e:
+                self.agent._logger.log(
+                    "error", f"Failed to add to long term memory: {e}"
+                )
+        elif (
+            self.crew
+            and self.crew._long_term_memory
+            and self.crew._entity_memory is None
+        ):
+            if self.agent and self.agent.verbose:
+                self._printer.print(
+                    content="Long term memory is enabled, but entity memory is not enabled. Please configure entity memory or set memory=True to automatically enable it.",
+                    color="bold_yellow",
+                )
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -7,7 +7,6 @@ and memory management.
 from __future__ import annotations

 from collections.abc import Callable
-from concurrent.futures import ThreadPoolExecutor, as_completed
 import logging
 from typing import TYPE_CHECKING, Any, Literal, cast

@@ -235,7 +234,9 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        if self.ask_for_human_input:
            formatted_answer = self._handle_human_feedback(formatted_answer)

-        self._save_to_memory(formatted_answer)
+        self._create_short_term_memory(formatted_answer)
+        self._create_long_term_memory(formatted_answer)
+        self._create_external_memory(formatted_answer)
        return {"output": formatted_answer.output}

    def _inject_multimodal_files(self, inputs: dict[str, Any] | None = None) -> None:
@@ -686,138 +687,30 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        Returns:
            AgentFinish if tool has result_as_answer=True, None otherwise.
        """
+        from datetime import datetime
+        import json
+
+        from crewai.events import crewai_event_bus
+        from crewai.events.types.tool_usage_events import (
+            ToolUsageErrorEvent,
+            ToolUsageFinishedEvent,
+            ToolUsageStartedEvent,
+        )
+
        if not tool_calls:
            return None

-        parsed_calls = [
-            parsed
-            for tool_call in tool_calls
-            if (parsed := self._parse_native_tool_call(tool_call)) is not None
-        ]
-        if not parsed_calls:
-            return None
+        # Only process the FIRST tool call for sequential execution with reflection
+        tool_call = tool_calls[0]

-        original_tools_by_name: dict[str, Any] = {}
-        for tool in self.original_tools or []:
-            original_tools_by_name[sanitize_tool_name(tool.name)] = tool
-
-        if len(parsed_calls) > 1:
-            has_result_as_answer_in_batch = any(
-                bool(
-                    original_tools_by_name.get(func_name)
-                    and getattr(
-                        original_tools_by_name.get(func_name), "result_as_answer", False
-                    )
-                )
-                for _, func_name, _ in parsed_calls
-            )
-            has_max_usage_count_in_batch = any(
-                bool(
-                    original_tools_by_name.get(func_name)
-                    and getattr(
-                        original_tools_by_name.get(func_name),
-                        "max_usage_count",
-                        None,
-                    )
-                    is not None
-                )
-                for _, func_name, _ in parsed_calls
-            )
-
-            # Preserve historical sequential behavior for result_as_answer batches.
-            # Also avoid threading around usage counters for max_usage_count tools.
-            if has_result_as_answer_in_batch or has_max_usage_count_in_batch:
-                logger.debug(
-                    "Skipping parallel native execution because batch includes result_as_answer or max_usage_count tool"
-                )
-            else:
-                execution_plan: list[
-                    tuple[str, str, str | dict[str, Any], Any | None]
-                ] = []
-                for call_id, func_name, func_args in parsed_calls:
-                    original_tool = original_tools_by_name.get(func_name)
-                    execution_plan.append((call_id, func_name, func_args, original_tool))
-
-                self._append_assistant_tool_calls_message(
-                    [
-                        (call_id, func_name, func_args)
-                        for call_id, func_name, func_args, _ in execution_plan
-                    ]
-                )
-
-                max_workers = min(8, len(execution_plan))
-                ordered_results: list[dict[str, Any] | None] = [None] * len(execution_plan)
-                with ThreadPoolExecutor(max_workers=max_workers) as pool:
-                    futures = {
-                        pool.submit(
-                            self._execute_single_native_tool_call,
-                            call_id=call_id,
-                            func_name=func_name,
-                            func_args=func_args,
-                            available_functions=available_functions,
-                            original_tool=original_tool,
-                            should_execute=True,
-                        ): idx
-                        for idx, (
-                            call_id,
-                            func_name,
-                            func_args,
-                            original_tool,
-                        ) in enumerate(execution_plan)
-                    }
-                    for future in as_completed(futures):
-                        idx = futures[future]
-                        ordered_results[idx] = future.result()
-
-                for execution_result in ordered_results:
-                    if not execution_result:
-                        continue
-                    tool_finish = self._append_tool_result_and_check_finality(
-                        execution_result
-                    )
-                    if tool_finish:
-                        return tool_finish
-
-                reasoning_prompt = self._i18n.slice("post_tool_reasoning")
-                reasoning_message: LLMMessage = {
-                    "role": "user",
-                    "content": reasoning_prompt,
-                }
-                self.messages.append(reasoning_message)
-                return None
-
-        # Sequential behavior: process only first tool call, then force reflection.
-        call_id, func_name, func_args = parsed_calls[0]
-        self._append_assistant_tool_calls_message([(call_id, func_name, func_args)])
-
-        execution_result = self._execute_single_native_tool_call(
-            call_id=call_id,
-            func_name=func_name,
-            func_args=func_args,
-            available_functions=available_functions,
-            original_tool=original_tools_by_name.get(func_name),
-            should_execute=True,
-        )
-        tool_finish = self._append_tool_result_and_check_finality(execution_result)
-        if tool_finish:
-            return tool_finish
-
-        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
-        reasoning_message: LLMMessage = {
-            "role": "user",
-            "content": reasoning_prompt,
-        }
-        self.messages.append(reasoning_message)
-        return None
-
-    def _parse_native_tool_call(
-        self, tool_call: Any
-    ) -> tuple[str, str, str | dict[str, Any]] | None:
+        # Extract tool call info - handle OpenAI-style, Anthropic-style, and Gemini-style
        if hasattr(tool_call, "function"):
+            # OpenAI-style: has .function.name and .function.arguments
            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
            func_name = sanitize_tool_name(tool_call.function.name)
-            return call_id, func_name, tool_call.function.arguments
-        if hasattr(tool_call, "function_call") and tool_call.function_call:
+            func_args = tool_call.function.arguments
+        elif hasattr(tool_call, "function_call") and tool_call.function_call:
+            # Gemini-style: has .function_call.name and .function_call.args
            call_id = f"call_{id(tool_call)}"
            func_name = sanitize_tool_name(tool_call.function_call.name)
            func_args = (
@@ -825,12 +718,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                if tool_call.function_call.args
                else {}
            )
-            return call_id, func_name, func_args
-        if hasattr(tool_call, "name") and hasattr(tool_call, "input"):
+        elif hasattr(tool_call, "name") and hasattr(tool_call, "input"):
+            # Anthropic format: has .name and .input (ToolUseBlock)
            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
            func_name = sanitize_tool_name(tool_call.name)
-            return call_id, func_name, tool_call.input
-        if isinstance(tool_call, dict):
+            func_args = tool_call.input  # Already a dict in Anthropic
+        elif isinstance(tool_call, dict):
+            # Support OpenAI "id", Bedrock "toolUseId", or generate one
            call_id = (
                tool_call.get("id")
                or tool_call.get("toolUseId")
@@ -841,15 +735,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                func_info.get("name", "") or tool_call.get("name", "")
            )
            func_args = func_info.get("arguments", "{}") or tool_call.get("input", {})
-            return call_id, func_name, func_args
-        return None
-
-    def _append_assistant_tool_calls_message(
-        self,
-        parsed_calls: list[tuple[str, str, str | dict[str, Any]]],
-    ) -> None:
-        import json
+        else:
+            return None

+        # Append assistant message with single tool call
        assistant_message: LLMMessage = {
            "role": "assistant",
            "content": None,
@@ -864,30 +753,12 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        else json.dumps(func_args),
                    },
                }
-                for call_id, func_name, func_args in parsed_calls
            ],
        }
+
        self.messages.append(assistant_message)

-    def _execute_single_native_tool_call(
-        self,
-        *,
-        call_id: str,
-        func_name: str,
-        func_args: str | dict[str, Any],
-        available_functions: dict[str, Callable[..., Any]],
-        original_tool: Any | None = None,
-        should_execute: bool = True,
-    ) -> dict[str, Any]:
-        from datetime import datetime
-        import json
-
-        from crewai.events.types.tool_usage_events import (
-            ToolUsageErrorEvent,
-            ToolUsageFinishedEvent,
-            ToolUsageStartedEvent,
-        )
-
+        # Parse arguments for the single tool call
        if isinstance(func_args, str):
            try:
                args_dict = json.loads(func_args)
@@ -896,26 +767,28 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        else:
            args_dict = func_args

-        if original_tool is None:
-            for tool in self.original_tools or []:
-                if sanitize_tool_name(tool.name) == func_name:
-                    original_tool = tool
-                    break
+        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"

+        # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
+
+        original_tool = None
+        for tool in self.original_tools or []:
+            if sanitize_tool_name(tool.name) == func_name:
+                original_tool = tool
+                break
+
+        # Check if tool has reached max usage count
        max_usage_reached = False
-        if not should_execute and original_tool:
-            max_usage_reached = True
-        elif (
-            should_execute
-            and original_tool
-            and getattr(original_tool, "max_usage_count", None) is not None
-            and getattr(original_tool, "current_usage_count", 0)
-            >= original_tool.max_usage_count
-        ):
-            max_usage_reached = True
+        if original_tool:
+            if (
+                hasattr(original_tool, "max_usage_count")
+                and original_tool.max_usage_count is not None
+                and original_tool.current_usage_count >= original_tool.max_usage_count
+            ):
+                max_usage_reached = True

+        # Check cache before executing
        from_cache = False
-        result: str = "Tool not found"
        input_str = json.dumps(args_dict) if args_dict else ""
        if self.tools_handler and self.tools_handler.cache:
            cached_result = self.tools_handler.cache.read(
@@ -929,7 +802,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                )
                from_cache = True

-        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+        # Emit tool usage started event
        started_at = datetime.now()
        crewai_event_bus.emit(
            self,
@@ -945,12 +818,14 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

        track_delegation_if_needed(func_name, args_dict, self.task)

+        # Find the structured tool for hook context
        structured_tool: CrewStructuredTool | None = None
        for structured in self.tools or []:
            if sanitize_tool_name(structured.name) == func_name:
                structured_tool = structured
                break

+        # Execute before_tool_call hooks
        hook_blocked = False
        before_hook_context = ToolCallHookContext(
            tool_name=func_name,
@@ -974,44 +849,58 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    color="red",
                )

+        # If hook blocked execution, set result and skip tool execution
        if hook_blocked:
            result = f"Tool execution blocked by hook. Tool: {func_name}"
+        # Execute the tool (only if not cached, not at max usage, and not blocked by hook)
+        elif not from_cache and not max_usage_reached:
+            result = "Tool not found"
+            if func_name in available_functions:
+                try:
+                    tool_func = available_functions[func_name]
+                    raw_result = tool_func(**args_dict)
+
+                    # Add to cache after successful execution (before string conversion)
+                    if self.tools_handler and self.tools_handler.cache:
+                        should_cache = True
+                        if (
+                            original_tool
+                            and hasattr(original_tool, "cache_function")
+                            and callable(original_tool.cache_function)
+                        ):
+                            should_cache = original_tool.cache_function(
+                                args_dict, raw_result
+                            )
+                        if should_cache:
+                            self.tools_handler.cache.add(
+                                tool=func_name, input=input_str, output=raw_result
+                            )
+
+                    # Convert to string for message
+                    result = (
+                        str(raw_result)
+                        if not isinstance(raw_result, str)
+                        else raw_result
+                    )
+                except Exception as e:
+                    result = f"Error executing tool: {e}"
+                    if self.task:
+                        self.task.increment_tools_errors()
+                    crewai_event_bus.emit(
+                        self,
+                        event=ToolUsageErrorEvent(
+                            tool_name=func_name,
+                            tool_args=args_dict,
+                            from_agent=self.agent,
+                            from_task=self.task,
+                            agent_key=agent_key,
+                            error=e,
+                        ),
+                    )
+                    error_event_emitted = True
        elif max_usage_reached and original_tool:
+            # Return error message when max usage limit is reached
            result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
-        elif not from_cache and func_name in available_functions:
-            try:
-                raw_result = available_functions[func_name](**args_dict)
-
-                if self.tools_handler and self.tools_handler.cache:
-                    should_cache = True
-                    if (
-                        original_tool
-                        and hasattr(original_tool, "cache_function")
-                        and callable(original_tool.cache_function)
-                    ):
-                        should_cache = original_tool.cache_function(args_dict, raw_result)
-                    if should_cache:
-                        self.tools_handler.cache.add(
-                            tool=func_name, input=input_str, output=raw_result
-                        )
-
-                result = str(raw_result) if not isinstance(raw_result, str) else raw_result
-            except Exception as e:
-                result = f"Error executing tool: {e}"
-                if self.task:
-                    self.task.increment_tools_errors()
-                crewai_event_bus.emit(
-                    self,
-                    event=ToolUsageErrorEvent(
-                        tool_name=func_name,
-                        tool_args=args_dict,
-                        from_agent=self.agent,
-                        from_task=self.task,
-                        agent_key=agent_key,
-                        error=e,
-                    ),
-                )
-                error_event_emitted = True

        after_hook_context = ToolCallHookContext(
            tool_name=func_name,
@@ -1051,23 +940,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                ),
            )

-        return {
-            "call_id": call_id,
-            "func_name": func_name,
-            "result": result,
-            "from_cache": from_cache,
-            "original_tool": original_tool,
-        }
-
-    def _append_tool_result_and_check_finality(
-        self, execution_result: dict[str, Any]
-    ) -> AgentFinish | None:
-        call_id = cast(str, execution_result["call_id"])
-        func_name = cast(str, execution_result["func_name"])
-        result = cast(str, execution_result["result"])
-        from_cache = cast(bool, execution_result["from_cache"])
-        original_tool = execution_result["original_tool"]
-
+        # Append tool result message
        tool_message: LLMMessage = {
            "role": "tool",
            "tool_call_id": call_id,
@@ -1076,6 +949,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        }
        self.messages.append(tool_message)

+        # Log the tool execution
        if self.agent and self.agent.verbose:
            cache_info = " (from cache)" if from_cache else ""
            self._printer.print(
@@ -1088,11 +962,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            and hasattr(original_tool, "result_as_answer")
            and original_tool.result_as_answer
        ):
+            # Return immediately with tool result as final answer
            return AgentFinish(
                thought="Tool result is the final answer",
                output=result,
                text=result,
            )
+
+        # Inject post-tool reasoning prompt to enforce analysis
+        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+        reasoning_message: LLMMessage = {
+            "role": "user",
+            "content": reasoning_prompt,
+        }
+        self.messages.append(reasoning_message)
        return None

    async def ainvoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
@@ -1128,7 +1011,9 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        if self.ask_for_human_input:
            formatted_answer = await self._ahandle_human_feedback(formatted_answer)

-        self._save_to_memory(formatted_answer)
+        self._create_short_term_memory(formatted_answer)
+        self._create_long_term_memory(formatted_answer)
+        self._create_external_memory(formatted_answer)
        return {"output": formatted_answer.output}

    async def _ainvoke_loop(self) -> AgentFinish:
--- a/lib/crewai/src/crewai/cli/cli.py
+++ b/lib/crewai/src/crewai/cli/cli.py
@@ -1,7 +1,6 @@
 from importlib.metadata import version as get_version
 import os
 import subprocess
-from typing import Any

 import click

@@ -180,19 +179,9 @@ def log_tasks_outputs() -> None:


@crewai.command()
-@click.option("-m", "--memory", is_flag=True, help="Reset MEMORY")
-@click.option(
-    "-l", "--long", is_flag=True, hidden=True,
-    help="[Deprecated: use --memory] Reset memory",
-)
-@click.option(
-    "-s", "--short", is_flag=True, hidden=True,
-    help="[Deprecated: use --memory] Reset memory",
-)
-@click.option(
-    "-e", "--entities", is_flag=True, hidden=True,
-    help="[Deprecated: use --memory] Reset memory",
-)
+@click.option("-l", "--long", is_flag=True, help="Reset LONG TERM memory")
+@click.option("-s", "--short", is_flag=True, help="Reset SHORT TERM memory")
+@click.option("-e", "--entities", is_flag=True, help="Reset ENTITIES memory")
@click.option("-kn", "--knowledge", is_flag=True, help="Reset KNOWLEDGE storage")
@click.option(
    "-akn", "--agent-knowledge", is_flag=True, help="Reset AGENT KNOWLEDGE storage"
@@ -202,7 +191,6 @@ def log_tasks_outputs() -> None:
 )
@click.option("-a", "--all", is_flag=True, help="Reset ALL memories")
 def reset_memories(
-    memory: bool,
    long: bool,
    short: bool,
    entities: bool,
@@ -212,22 +200,13 @@ def reset_memories(
    all: bool,
 ) -> None:
    """
-    Reset the crew memories (memory, knowledge, agent_knowledge, kickoff_outputs). This will delete all the data saved.
+    Reset the crew memories (long, short, entity, latest_crew_kickoff_ouputs, knowledge, agent_knowledge). This will delete all the data saved.
    """
    try:
-        # Treat legacy flags as --memory with a deprecation warning
-        if long or short or entities:
-            legacy_used = [
-                f for f, v in [("--long", long), ("--short", short), ("--entities", entities)] if v
-            ]
-            click.echo(
-                f"Warning: {', '.join(legacy_used)} {'is' if len(legacy_used) == 1 else 'are'} "
-                "deprecated. Use --memory (-m) instead. All memory is now unified."
-            )
-            memory = True
-
        memory_types = [
-            memory,
+            long,
+            short,
+            entities,
            knowledge,
            agent_knowledge,
            kickoff_outputs,
@@ -239,73 +218,12 @@ def reset_memories(
            )
            return
        reset_memories_command(
-            memory, knowledge, agent_knowledge, kickoff_outputs, all
+            long, short, entities, knowledge, agent_knowledge, kickoff_outputs, all
        )
    except Exception as e:
        click.echo(f"An error occurred while resetting memories: {e}", err=True)


-@crewai.command()
-@click.option(
-    "--storage-path",
-    type=str,
-    default=None,
-    help="Path to LanceDB memory directory. If omitted, uses ./.crewai/memory.",
-)
-@click.option(
-    "--embedder-provider",
-    type=str,
-    default=None,
-    help="Embedder provider for recall queries (e.g. openai, google-vertex, cohere, ollama).",
-)
-@click.option(
-    "--embedder-model",
-    type=str,
-    default=None,
-    help="Embedder model name (e.g. text-embedding-3-small, gemini-embedding-001).",
-)
-@click.option(
-    "--embedder-config",
-    type=str,
-    default=None,
-    help='Full embedder config as JSON (e.g. \'{"provider": "cohere", "config": {"model_name": "embed-v4.0"}}\').',
-)
-def memory(
-    storage_path: str | None,
-    embedder_provider: str | None,
-    embedder_model: str | None,
-    embedder_config: str | None,
-) -> None:
-    """Open the Memory TUI to browse scopes and recall memories."""
-    try:
-        from crewai.cli.memory_tui import MemoryTUI
-    except ImportError as exc:
-        click.echo(
-            "Textual is required for the memory TUI but could not be imported. "
-            "Try reinstalling crewai or: pip install textual"
-        )
-        raise SystemExit(1) from exc
-
-    # Build embedder spec from CLI flags.
-    embedder_spec: dict[str, Any] | None = None
-    if embedder_config:
-        import json as _json
-
-        try:
-            embedder_spec = _json.loads(embedder_config)
-        except _json.JSONDecodeError as exc:
-            click.echo(f"Invalid --embedder-config JSON: {exc}")
-            raise SystemExit(1) from exc
-    elif embedder_provider:
-        cfg: dict[str, str] = {}
-        if embedder_model:
-            cfg["model_name"] = embedder_model
-        embedder_spec = {"provider": embedder_provider, "config": cfg}
-
-    app = MemoryTUI(storage_path=storage_path, embedder_config=embedder_spec)
-    app.run()
-
-
@crewai.command()
@click.option(
    "-n",
--- a/lib/crewai/src/crewai/cli/memory_tui.py
+++ b/lib/crewai/src/crewai/cli/memory_tui.py
@@ -1,398 +0,0 @@
-"""Textual TUI for browsing and recalling unified memory."""
-
-from __future__ import annotations
-
-import asyncio
-from typing import Any
-
-from textual.app import App, ComposeResult
-from textual.containers import Horizontal, Vertical
-from textual.widgets import Footer, Header, Input, OptionList, Static, Tree
-
-
-# -- CrewAI brand palette --
-_PRIMARY = "#eb6658"  # coral
-_SECONDARY = "#1F7982"  # teal
-_TERTIARY = "#ffffff"  # white
-
-
-def _format_scope_info(info: Any) -> str:
-    """Format ScopeInfo with Rich markup."""
-    return (
-        f"[bold {_PRIMARY}]{info.path}[/]\n\n"
-        f"[dim]Records:[/]     [bold]{info.record_count}[/]\n"
-        f"[dim]Categories:[/]  {', '.join(info.categories) or 'none'}\n"
-        f"[dim]Oldest:[/]      {info.oldest_record or '-'}\n"
-        f"[dim]Newest:[/]      {info.newest_record or '-'}\n"
-        f"[dim]Children:[/]    {', '.join(info.child_scopes) or 'none'}"
-    )
-
-
-class MemoryTUI(App[None]):
-    """TUI to browse memory scopes and run recall queries."""
-
-    TITLE = "CrewAI Memory"
-    SUB_TITLE = "Browse scopes and recall memories"
-
-    CSS = f"""
-    Header {{
-        background: {_PRIMARY};
-        color: {_TERTIARY};
-    }}
-    Footer {{
-        background: {_SECONDARY};
-        color: {_TERTIARY};
-    }}
-    Footer > .footer-key--key {{
-        background: {_PRIMARY};
-        color: {_TERTIARY};
-    }}
-    Horizontal {{
-        height: 1fr;
-    }}
-    #scope-tree {{
-        width: 30%;
-        padding: 1 2;
-        background: {_SECONDARY} 8%;
-        border-right: solid {_SECONDARY};
-    }}
-    #scope-tree:focus > .tree--cursor {{
-        background: {_SECONDARY};
-        color: {_TERTIARY};
-    }}
-    #scope-tree > .tree--guides {{
-        color: {_SECONDARY} 50%;
-    }}
-    #scope-tree > .tree--guides-hover {{
-        color: {_PRIMARY};
-    }}
-    #scope-tree > .tree--guides-selected {{
-        color: {_SECONDARY};
-    }}
-    #right-panel {{
-        width: 70%;
-        padding: 0 1;
-    }}
-    #info-panel {{
-        height: 2fr;
-        padding: 1 2;
-        overflow-y: auto;
-        border: round {_SECONDARY};
-    }}
-    #info-panel:focus {{
-        border: round {_PRIMARY};
-    }}
-    #info-panel LoadingIndicator {{
-        color: {_PRIMARY};
-    }}
-    #entry-list {{
-        height: 1fr;
-        border: round {_SECONDARY};
-        padding: 0 1;
-        scrollbar-color: {_PRIMARY};
-    }}
-    #entry-list:focus {{
-        border: round {_PRIMARY};
-    }}
-    #entry-list > .option-list--option-highlighted {{
-        background: {_SECONDARY};
-        color: {_TERTIARY};
-    }}
-    #recall-input {{
-        margin: 0 1 1 1;
-        border: tall {_SECONDARY};
-    }}
-    #recall-input:focus {{
-        border: tall {_PRIMARY};
-    }}
-    """
-
-    def __init__(
-        self,
-        storage_path: str | None = None,
-        embedder_config: dict[str, Any] | None = None,
-    ) -> None:
-        super().__init__()
-        self._memory: Any = None
-        self._init_error: str | None = None
-        self._selected_scope: str = "/"
-        self._entries: list[Any] = []
-        self._view_mode: str = "list"  # "list" | "recall"
-        self._recall_matches: list[Any] = []
-        self._last_scope_info: Any = None
-        self._custom_embedder = embedder_config is not None
-        try:
-            from crewai.memory.storage.lancedb_storage import LanceDBStorage
-            from crewai.memory.unified_memory import Memory
-
-            storage = LanceDBStorage(path=storage_path) if storage_path else LanceDBStorage()
-            embedder = None
-            if embedder_config is not None:
-                from crewai.rag.embeddings.factory import build_embedder
-
-                embedder = build_embedder(embedder_config)
-            self._memory = Memory(storage=storage, embedder=embedder) if embedder else Memory(storage=storage)
-        except Exception as e:
-            self._init_error = str(e)
-
-    def compose(self) -> ComposeResult:
-        yield Header(show_clock=False)
-        with Horizontal():
-            yield self._build_scope_tree()
-            initial = (
-                self._init_error
-                if self._init_error
-                else "Select a scope or type a recall query."
-            )
-            with Vertical(id="right-panel"):
-                yield Static(initial, id="info-panel")
-                yield OptionList(id="entry-list")
-        yield Input(
-            placeholder="Type a query and press Enter to recall...",
-            id="recall-input",
-        )
-        yield Footer()
-
-    def on_mount(self) -> None:
-        """Set initial border titles on mounted widgets."""
-        self.query_one("#info-panel", Static).border_title = "Detail"
-        self.query_one("#entry-list", OptionList).border_title = "Entries"
-
-    def _build_scope_tree(self) -> Tree[str]:
-        tree: Tree[str] = Tree("/", id="scope-tree")
-        if self._memory is None:
-            tree.root.data = "/"
-            tree.root.label = "/ (0 records)"
-            return tree
-        info = self._memory.info("/")
-        tree.root.label = f"/ ({info.record_count} records)"
-        tree.root.data = "/"
-        self._add_children(tree.root, "/", depth=0, max_depth=3)
-        tree.root.expand()
-        return tree
-
-    def _add_children(
-        self,
-        parent_node: Tree.Node[str],
-        path: str,
-        depth: int,
-        max_depth: int,
-    ) -> None:
-        if depth >= max_depth or self._memory is None:
-            return
-        info = self._memory.info(path)
-        for child in info.child_scopes:
-            child_info = self._memory.info(child)
-            label = f"{child} ({child_info.record_count})"
-            node = parent_node.add(label, data=child)
-            self._add_children(node, child, depth + 1, max_depth)
-
-    # -- Populating the OptionList -------------------------------------------
-
-    def _populate_entry_list(self) -> None:
-        """Clear the OptionList and fill it with the current scope's entries."""
-        option_list = self.query_one("#entry-list", OptionList)
-        option_list.clear_options()
-        for record in self._entries:
-            date_str = record.created_at.strftime("%Y-%m-%d")
-            preview = (
-                (record.content[:80] + "…")
-                if len(record.content) > 80
-                else record.content
-            )
-            label = (
-                f"{date_str}  "
-                f"[bold]{record.importance:.1f}[/]  "
-                f"{preview}"
-            )
-            option_list.add_option(label)
-
-    def _populate_recall_list(self) -> None:
-        """Clear the OptionList and fill it with the current recall matches."""
-        option_list = self.query_one("#entry-list", OptionList)
-        option_list.clear_options()
-        if not self._recall_matches:
-            return
-        for m in self._recall_matches:
-            preview = (
-                (m.record.content[:80] + "…")
-                if len(m.record.content) > 80
-                else m.record.content
-            )
-            label = (
-                f"[bold]\\[{m.score:.2f}][/]  "
-                f"{preview}  "
-                f"[dim]scope={m.record.scope}[/]"
-            )
-            option_list.add_option(label)
-
-    # -- Detail rendering ----------------------------------------------------
-
-    def _format_record_detail(self, record: Any, context_line: str = "") -> str:
-        """Format a full MemoryRecord as Rich markup for the detail view.
-
-        Args:
-            record: A MemoryRecord instance.
-            context_line: Optional header line shown above the fields
-                (e.g. "Entry 3 of 47").
-
-        Returns:
-            A Rich-markup string with all meaningful record fields.
-        """
-        sep = f"[bold {_PRIMARY}]{'─' * 44}[/]"
-        lines: list[str] = []
-
-        if context_line:
-            lines.append(context_line)
-            lines.append("")
-
-        # -- Fields block --
-        lines.append(f"[dim]ID:[/]             {record.id}")
-        lines.append(f"[dim]Scope:[/]          [bold]{record.scope}[/]")
-        lines.append(f"[dim]Importance:[/]      [bold]{record.importance:.2f}[/]")
-        lines.append(
-            f"[dim]Created:[/]        "
-            f"{record.created_at.strftime('%Y-%m-%d %H:%M:%S')}"
-        )
-        lines.append(
-            f"[dim]Last accessed:[/]  "
-            f"{record.last_accessed.strftime('%Y-%m-%d %H:%M:%S')}"
-        )
-        lines.append(
-            f"[dim]Categories:[/]     "
-            f"{', '.join(record.categories) if record.categories else 'none'}"
-        )
-        lines.append(f"[dim]Source:[/]         {record.source or '-'}")
-        lines.append(f"[dim]Private:[/]        {'Yes' if record.private else 'No'}")
-
-        # -- Content block --
-        lines.append(f"\n{sep}")
-        lines.append("[bold]Content[/]\n")
-        lines.append(record.content)
-
-        # -- Metadata block --
-        if record.metadata:
-            lines.append(f"\n{sep}")
-            lines.append("[bold]Metadata[/]\n")
-            for k, v in record.metadata.items():
-                lines.append(f"[dim]{k}:[/] {v}")
-
-        return "\n".join(lines)
-
-    # -- Event handlers ------------------------------------------------------
-
-    def on_tree_node_selected(self, event: Tree.NodeSelected[str]) -> None:
-        """Load entries for the selected scope and populate the OptionList."""
-        path = event.node.data if event.node.data is not None else "/"
-        self._selected_scope = path
-        self._view_mode = "list"
-        panel = self.query_one("#info-panel", Static)
-        if self._memory is None:
-            panel.update(self._init_error or "No memory loaded.")
-            return
-        info = self._memory.info(path)
-        self._last_scope_info = info
-        self._entries = self._memory.list_records(scope=path, limit=200)
-        panel.update(_format_scope_info(info))
-        panel.border_title = "Detail"
-        entry_list = self.query_one("#entry-list", OptionList)
-        entry_list.border_title = f"Entries ({len(self._entries)})"
-        self._populate_entry_list()
-
-    def on_option_list_option_highlighted(
-        self, event: OptionList.OptionHighlighted
-    ) -> None:
-        """Live-update the info panel with the detail of the highlighted entry."""
-        panel = self.query_one("#info-panel", Static)
-        idx = event.option_index
-
-        if self._view_mode == "list":
-            if idx < len(self._entries):
-                record = self._entries[idx]
-                total = len(self._entries)
-                context = (
-                    f"[bold {_PRIMARY}]Entry {idx + 1} of {total}[/]  "
-                    f"[dim]in[/] [bold]{self._selected_scope}[/]"
-                )
-                panel.border_title = f"Entry {idx + 1} of {total}"
-                panel.update(self._format_record_detail(record, context_line=context))
-
-        elif self._view_mode == "recall":
-            if idx < len(self._recall_matches):
-                match = self._recall_matches[idx]
-                total = len(self._recall_matches)
-                panel.border_title = f"Match {idx + 1} of {total}"
-                score_color = _PRIMARY if match.score >= 0.5 else "dim"
-                header_lines: list[str] = [
-                    f"[bold {_PRIMARY}]Recall Match {idx + 1} of {total}[/]\n",
-                    f"[dim]Score:[/]          [{score_color}][bold]{match.score:.2f}[/][/]",
-                    (
-                        f"[dim]Match reasons:[/]  "
-                        f"{', '.join(match.match_reasons) if match.match_reasons else '-'}"
-                    ),
-                    (
-                        f"[dim]Evidence gaps:[/]  "
-                        f"{', '.join(match.evidence_gaps) if match.evidence_gaps else 'none'}"
-                    ),
-                    f"\n[bold {_PRIMARY}]{'─' * 44}[/]",
-                ]
-                record_detail = self._format_record_detail(match.record)
-                header_lines.append(record_detail)
-                panel.update("\n".join(header_lines))
-
-    def on_input_submitted(self, event: Input.Submitted) -> None:
-        query = event.value.strip()
-        if not query:
-            return
-        if self._memory is None:
-            panel = self.query_one("#info-panel", Static)
-            panel.update(self._init_error or "No memory loaded. Cannot recall.")
-            return
-        self.run_worker(self._do_recall(query), exclusive=True)
-
-    async def _do_recall(self, query: str) -> None:
-        """Execute a recall query and display results in the OptionList."""
-        panel = self.query_one("#info-panel", Static)
-        panel.loading = True
-        try:
-            scope = (
-                self._selected_scope
-                if self._selected_scope != "/"
-                else None
-            )
-            loop = asyncio.get_event_loop()
-            matches = await loop.run_in_executor(
-                None,
-                lambda: self._memory.recall(
-                    query, scope=scope, limit=10, depth="deep"
-                ),
-            )
-            self._recall_matches = matches or []
-            self._view_mode = "recall"
-
-            if not self._recall_matches:
-                panel.update("[dim]No memories found.[/]")
-                self.query_one("#entry-list", OptionList).clear_options()
-                return
-
-            info_lines: list[str] = []
-            if not self._custom_embedder:
-                info_lines.append(
-                    "[dim italic]Note: Using default OpenAI embedder. "
-                    "If memories were created with a different embedder, "
-                    "pass --embedder-provider to match.[/]\n"
-                )
-            info_lines.append(
-                f"[bold]Recall Results[/] [dim]"
-                f"({len(self._recall_matches)} matches)[/]\n"
-                f"[dim]Navigate the list below to view details.[/]"
-            )
-            panel.update("\n".join(info_lines))
-            panel.border_title = "Recall Detail"
-            entry_list = self.query_one("#entry-list", OptionList)
-            entry_list.border_title = f"Recall Results ({len(self._recall_matches)})"
-            self._populate_recall_list()
-        except Exception as e:
-            panel.update(f"[bold red]Error:[/] {e}")
-        finally:
-            panel.loading = False
--- a/lib/crewai/src/crewai/cli/reset_memories_command.py
+++ b/lib/crewai/src/crewai/cli/reset_memories_command.py
@@ -2,61 +2,43 @@ import subprocess

 import click

-from crewai.cli.utils import get_crews, get_flows
-from crewai.flow import Flow
-
-
-def _reset_flow_memory(flow: Flow) -> None:
-    """Reset memory for a single flow instance.
-
-    Handles Memory, MemoryScope (both have .reset()), and MemorySlice
-    (delegates to the underlying ._memory).  Silently succeeds when the
-    storage directory does not exist yet (nothing to reset).
-
-    Args:
-        flow: The flow instance whose memory should be reset.
-    """
-    mem = flow.memory
-    if mem is None:
-        return
-    try:
-        if hasattr(mem, "reset"):
-            mem.reset()
-        elif hasattr(mem, "_memory") and hasattr(mem._memory, "reset"):
-            mem._memory.reset()
-    except (FileNotFoundError, OSError):
-        pass
+from crewai.cli.utils import get_crews


 def reset_memories_command(
-    memory: bool,
-    knowledge: bool,
-    agent_knowledge: bool,
-    kickoff_outputs: bool,
-    all: bool,
+    long,
+    short,
+    entity,
+    knowledge,
+    agent_knowledge,
+    kickoff_outputs,
+    all,
 ) -> None:
-    """Reset the crew and flow memories.
+    """
+    Reset the crew memories.

    Args:
-        memory: Whether to reset the unified memory.
-        knowledge: Whether to reset the knowledge.
-        agent_knowledge: Whether to reset the agents knowledge.
-        kickoff_outputs: Whether to reset the latest kickoff task outputs.
-        all: Whether to reset all memories.
+      long (bool): Whether to reset the long-term memory.
+      short (bool): Whether to reset the short-term memory.
+      entity (bool): Whether to reset the entity memory.
+      kickoff_outputs (bool): Whether to reset the latest kickoff task outputs.
+      all (bool): Whether to reset all memories.
+      knowledge (bool): Whether to reset the knowledge.
+      agent_knowledge (bool): Whether to reset the agents knowledge.
    """
+
    try:
-        if not any([memory, kickoff_outputs, knowledge, agent_knowledge, all]):
+        if not any(
+            [long, short, entity, kickoff_outputs, knowledge, agent_knowledge, all]
+        ):
            click.echo(
                "No memory type specified. Please specify at least one type to reset."
            )
            return

        crews = get_crews()
-        flows = get_flows()
-
-        if not crews and not flows:
-            raise ValueError("No crew or flow found.")
-
+        if not crews:
+            raise ValueError("No crew found.")
        for crew in crews:
            if all:
                crew.reset_memories(command_type="all")
@@ -64,10 +46,20 @@ def reset_memories_command(
                    f"[Crew ({crew.name if crew.name else crew.id})] Reset memories command has been completed."
                )
                continue
-            if memory:
-                crew.reset_memories(command_type="memory")
+            if long:
+                crew.reset_memories(command_type="long")
                click.echo(
-                    f"[Crew ({crew.name if crew.name else crew.id})] Memory has been reset."
+                    f"[Crew ({crew.name if crew.name else crew.id})] Long term memory has been reset."
+                )
+            if short:
+                crew.reset_memories(command_type="short")
+                click.echo(
+                    f"[Crew ({crew.name if crew.name else crew.id})] Short term memory has been reset."
+                )
+            if entity:
+                crew.reset_memories(command_type="entity")
+                click.echo(
+                    f"[Crew ({crew.name if crew.name else crew.id})] Entity memory has been reset."
                )
            if kickoff_outputs:
                crew.reset_memories(command_type="kickoff_outputs")
@@ -85,20 +77,6 @@ def reset_memories_command(
                    f"[Crew ({crew.name if crew.name else crew.id})] Agents knowledge has been reset."
                )

-        for flow in flows:
-            flow_name = flow.name or flow.__class__.__name__
-            if all:
-                _reset_flow_memory(flow)
-                click.echo(
-                    f"[Flow ({flow_name})] Reset memories command has been completed."
-                )
-                continue
-            if memory:
-                _reset_flow_memory(flow)
-                click.echo(
-                    f"[Flow ({flow_name})] Memory has been reset."
-                )
-
    except subprocess.CalledProcessError as e:
        click.echo(f"An error occurred while resetting the memories: {e}", err=True)
        click.echo(e.output, err=True)
--- a/lib/crewai/src/crewai/cli/templates/crew/crew.py
+++ b/lib/crewai/src/crewai/cli/templates/crew/crew.py
@@ -1,6 +1,7 @@
 from crewai import Agent, Crew, Process, Task
 from crewai.project import CrewBase, agent, crew, task
 from crewai.agents.agent_builder.base_agent import BaseAgent
+from typing import List
 # If you want to run a snippet of code before or after the crew starts,
 # you can use the @before_kickoff and @after_kickoff decorators
 # https://docs.crewai.com/concepts/crews#example-crew-class-with-decorators
@@ -9,8 +10,8 @@ from crewai.agents.agent_builder.base_agent import BaseAgent
 class {{crew_name}}():
    """{{crew_name}} crew"""

-    agents: list[BaseAgent]
-    tasks: list[Task]
+    agents: List[BaseAgent]
+    tasks: List[Task]

    # Learn more about YAML configuration files here:
    # Agents: https://docs.crewai.com/concepts/agents#yaml-configuration-recommended
--- a/lib/crewai/src/crewai/cli/templates/flow/crews/poem_crew/poem_crew.py
+++ b/lib/crewai/src/crewai/cli/templates/flow/crews/poem_crew/poem_crew.py
@@ -1,3 +1,5 @@
+from typing import List
+
 from crewai import Agent, Crew, Process, Task
 from crewai.agents.agent_builder.base_agent import BaseAgent
 from crewai.project import CrewBase, agent, crew, task
@@ -11,8 +13,8 @@ from crewai.project import CrewBase, agent, crew, task
 class PoemCrew:
    """Poem Crew"""

-    agents: list[BaseAgent]
-    tasks: list[Task]
+    agents: List[BaseAgent]
+    tasks: List[Task]

    # Learn more about YAML configuration files here:
    # Agents: https://docs.crewai.com/concepts/agents#yaml-configuration-recommended
--- a/lib/crewai/src/crewai/cli/utils.py
+++ b/lib/crewai/src/crewai/cli/utils.py
@@ -386,109 +386,6 @@ def fetch_crews(module_attr: Any) -> list[Crew]:
    return crew_instances


-def get_flow_instance(module_attr: Any) -> Flow | None:
-    """Check if a module attribute is a user-defined Flow subclass and return an instance.
-
-    Args:
-        module_attr: An attribute from a loaded module.
-
-    Returns:
-        A Flow instance if the attribute is a valid user-defined Flow subclass,
-        None otherwise.
-    """
-    if (
-        isinstance(module_attr, type)
-        and issubclass(module_attr, Flow)
-        and module_attr is not Flow
-    ):
-        try:
-            return module_attr()
-        except Exception:
-            return None
-    return None
-
-
-_SKIP_DIRS = frozenset(
-    {".venv", "venv", ".git", "__pycache__", "node_modules", ".tox", ".nox"}
-)
-
-
-def get_flows(flow_path: str = "main.py") -> list[Flow]:
-    """Get the flow instances from project files.
-
-    Walks the project directory looking for files matching ``flow_path``
-    (default ``main.py``), loads each module, and extracts Flow subclass
-    instances.  Directories that are clearly not user source code (virtual
-    environments, ``.git``, etc.) are pruned to avoid noisy import errors.
-
-    Args:
-        flow_path: Filename to search for (default ``main.py``).
-
-    Returns:
-        A list of discovered Flow instances.
-    """
-    flow_instances: list[Flow] = []
-    try:
-        current_dir = os.getcwd()
-        if current_dir not in sys.path:
-            sys.path.insert(0, current_dir)
-
-        src_dir = os.path.join(current_dir, "src")
-        if os.path.isdir(src_dir) and src_dir not in sys.path:
-            sys.path.insert(0, src_dir)
-
-        search_paths = [".", "src"] if os.path.isdir("src") else ["."]
-
-        for search_path in search_paths:
-            for root, dirs, files in os.walk(search_path):
-                dirs[:] = [
-                    d
-                    for d in dirs
-                    if d not in _SKIP_DIRS and not d.startswith(".")
-                ]
-                if flow_path in files and "cli/templates" not in root:
-                    file_os_path = os.path.join(root, flow_path)
-                    try:
-                        spec = importlib.util.spec_from_file_location(
-                            "flow_module", file_os_path
-                        )
-                        if not spec or not spec.loader:
-                            continue
-
-                        module = importlib.util.module_from_spec(spec)
-                        sys.modules[spec.name] = module
-
-                        try:
-                            spec.loader.exec_module(module)
-
-                            for attr_name in dir(module):
-                                module_attr = getattr(module, attr_name)
-                                try:
-                                    if flow_instance := get_flow_instance(
-                                        module_attr
-                                    ):
-                                        flow_instances.append(flow_instance)
-                                except Exception:  # noqa: S112
-                                    continue
-
-                            if flow_instances:
-                                break
-
-                        except Exception:  # noqa: S112
-                            continue
-
-                    except (ImportError, AttributeError):
-                        continue
-
-            if flow_instances:
-                break
-
-    except Exception:  # noqa: S110
-        pass
-
-    return flow_instances
-
-
 def is_valid_tool(obj: Any) -> bool:
    from crewai.tools.base_tool import Tool

--- a/lib/crewai/src/crewai/context.py
+++ b/lib/crewai/src/crewai/context.py
@@ -1,4 +1,8 @@
+from collections.abc import Generator
+from contextlib import contextmanager
 import contextvars
+import os
+from typing import Any


 _platform_integration_token: contextvars.ContextVar[str | None] = (
@@ -6,9 +10,39 @@ _platform_integration_token: contextvars.ContextVar[str | None] = (
 )


+def set_platform_integration_token(integration_token: str) -> None:
+    """Set the platform integration token in the current context.
+
+    Args:
+        integration_token: The integration token to set.
+    """
+    _platform_integration_token.set(integration_token)
+
+
 def get_platform_integration_token() -> str | None:
-    """Get the platform integration token from the current context."""
-    return _platform_integration_token.get()
+    """Get the platform integration token from the current context or environment.
+
+    Returns:
+        The integration token if set, otherwise None.
+    """
+    token = _platform_integration_token.get()
+    if token is None:
+        token = os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN")
+    return token
+
+
+@contextmanager
+def platform_context(integration_token: str) -> Generator[None, Any, None]:
+    """Context manager to temporarily set the platform integration token.
+
+    Args:
+      integration_token: The integration token to set within the context.
+    """
+    token = _platform_integration_token.set(integration_token)
+    try:
+        yield
+    finally:
+        _platform_integration_token.reset(token)


 _current_task_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
--- a/lib/crewai/src/crewai/crew.py
+++ b/lib/crewai/src/crewai/crew.py
@@ -83,6 +83,10 @@ from crewai.knowledge.knowledge import Knowledge
 from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
 from crewai.llm import LLM
 from crewai.llms.base_llm import BaseLLM
+from crewai.memory.entity.entity_memory import EntityMemory
+from crewai.memory.external.external_memory import ExternalMemory
+from crewai.memory.long_term.long_term_memory import LongTermMemory
+from crewai.memory.short_term.short_term_memory import ShortTermMemory
 from crewai.process import Process
 from crewai.rag.embeddings.types import EmbedderConfig
 from crewai.rag.types import SearchResult
@@ -170,7 +174,10 @@ class Crew(FlowTrackable, BaseModel):
    _logger: Logger = PrivateAttr()
    _file_handler: FileHandler = PrivateAttr()
    _cache_handler: InstanceOf[CacheHandler] = PrivateAttr(default_factory=CacheHandler)
-    _memory: Any = PrivateAttr(default=None)  # Unified Memory | MemoryScope
+    _short_term_memory: InstanceOf[ShortTermMemory] | None = PrivateAttr()
+    _long_term_memory: InstanceOf[LongTermMemory] | None = PrivateAttr()
+    _entity_memory: InstanceOf[EntityMemory] | None = PrivateAttr()
+    _external_memory: InstanceOf[ExternalMemory] | None = PrivateAttr()
    _train: bool | None = PrivateAttr(default=False)
    _train_iteration: int | None = PrivateAttr()
    _inputs: dict[str, Any] | None = PrivateAttr(default=None)
@@ -188,12 +195,25 @@ class Crew(FlowTrackable, BaseModel):
    agents: list[BaseAgent] = Field(default_factory=list)
    process: Process = Field(default=Process.sequential)
    verbose: bool = Field(default=False)
-    memory: bool | Any = Field(
+    memory: bool = Field(
        default=False,
-        description=(
-            "Enable crew memory. Pass True for default Memory(), "
-            "or a Memory/MemoryScope/MemorySlice instance for custom configuration."
-        ),
+        description="If crew should use memory to store memories of it's execution",
+    )
+    short_term_memory: InstanceOf[ShortTermMemory] | None = Field(
+        default=None,
+        description="An Instance of the ShortTermMemory to be used by the Crew",
+    )
+    long_term_memory: InstanceOf[LongTermMemory] | None = Field(
+        default=None,
+        description="An Instance of the LongTermMemory to be used by the Crew",
+    )
+    entity_memory: InstanceOf[EntityMemory] | None = Field(
+        default=None,
+        description="An Instance of the EntityMemory to be used by the Crew",
+    )
+    external_memory: InstanceOf[ExternalMemory] | None = Field(
+        default=None,
+        description="An Instance of the ExternalMemory to be used by the Crew",
    )
    embedder: EmbedderConfig | None = Field(
        default=None,
@@ -352,23 +372,31 @@ class Crew(FlowTrackable, BaseModel):

        return self

+    def _initialize_default_memories(self) -> None:
+        self._long_term_memory = self._long_term_memory or LongTermMemory()
+        self._short_term_memory = self._short_term_memory or ShortTermMemory(
+            crew=self,
+            embedder_config=self.embedder,
+        )
+        self._entity_memory = self.entity_memory or EntityMemory(
+            crew=self, embedder_config=self.embedder
+        )
+
    @model_validator(mode="after")
    def create_crew_memory(self) -> Crew:
-        """Initialize unified memory, respecting crew embedder config."""
-        if self.memory is True:
-            from crewai.memory.unified_memory import Memory
+        """Initialize private memory attributes."""
+        self._external_memory = (
+            # External memory does not support a default value since it was
+            # designed to be managed entirely externally
+            self.external_memory.set_crew(self) if self.external_memory else None
+        )

-            embedder = None
-            if self.embedder is not None:
-                from crewai.rag.embeddings.factory import build_embedder
+        self._long_term_memory = self.long_term_memory
+        self._short_term_memory = self.short_term_memory
+        self._entity_memory = self.entity_memory

-                embedder = build_embedder(self.embedder)
-            self._memory = Memory(embedder=embedder)
-        elif self.memory:
-            # User passed a Memory / MemoryScope / MemorySlice instance
-            self._memory = self.memory
-        else:
-            self._memory = None
+        if self.memory:
+            self._initialize_default_memories()

        return self

@@ -740,9 +768,6 @@ class Crew(FlowTrackable, BaseModel):
            )
            raise
        finally:
-            # Ensure all background memory saves complete before returning
-            if self._memory is not None and hasattr(self._memory, "drain_writes"):
-                self._memory.drain_writes()
            clear_files(self.id)
            detach(token)

@@ -1298,11 +1323,6 @@ class Crew(FlowTrackable, BaseModel):
        if agent and (hasattr(agent, "mcps") and getattr(agent, "mcps", None)):
            tools = self._add_mcp_tools(task, tools)

-        # Add memory tools if memory is available (agent or crew level)
-        resolved_memory = getattr(agent, "memory", None) or self._memory
-        if resolved_memory is not None:
-            tools = self._add_memory_tools(tools, resolved_memory)
-
        files = get_all_files(self.id, task.id)
        if files:
            supported_types: list[str] = []
@@ -1410,22 +1430,6 @@ class Crew(FlowTrackable, BaseModel):
            return self._merge_tools(tools, cast(list[BaseTool], code_tools))
        return tools

-    def _add_memory_tools(
-        self, tools: list[BaseTool], memory: Any
-    ) -> list[BaseTool]:
-        """Add recall and remember tools when memory is available.
-
-        Args:
-            tools: Current list of tools.
-            memory: The resolved Memory, MemoryScope, or MemorySlice instance.
-
-        Returns:
-            Updated list with memory tools added.
-        """
-        from crewai.tools.memory_tools import create_memory_tools
-
-        return self._merge_tools(tools, create_memory_tools(memory))
-
    def _add_file_tools(
        self, tools: list[BaseTool], files: dict[str, Any]
    ) -> list[BaseTool]:
@@ -1670,7 +1674,10 @@ class Crew(FlowTrackable, BaseModel):
            "_execution_span",
            "_file_handler",
            "_cache_handler",
-            "_memory",
+            "_short_term_memory",
+            "_long_term_memory",
+            "_entity_memory",
+            "_external_memory",
            "agents",
            "tasks",
            "knowledge_sources",
@@ -1704,8 +1711,18 @@ class Crew(FlowTrackable, BaseModel):

        copied_data = self.model_dump(exclude=exclude)
        copied_data = {k: v for k, v in copied_data.items() if v is not None}
-        if getattr(self, "_memory", None):
-            copied_data["memory"] = self._memory
+        if self.short_term_memory:
+            copied_data["short_term_memory"] = self.short_term_memory.model_copy(
+                deep=True
+            )
+        if self.long_term_memory:
+            copied_data["long_term_memory"] = self.long_term_memory.model_copy(
+                deep=True
+            )
+        if self.entity_memory:
+            copied_data["entity_memory"] = self.entity_memory.model_copy(deep=True)
+        if self.external_memory:
+            copied_data["external_memory"] = self.external_memory.model_copy(deep=True)

        copied_data.pop("agents", None)
        copied_data.pop("tasks", None)
@@ -1836,24 +1853,23 @@ class Crew(FlowTrackable, BaseModel):

        Args:
            command_type: Type of memory to reset.
-                Valid options: 'memory', 'knowledge', 'agent_knowledge',
-                'kickoff_outputs', or 'all'. Legacy names 'long', 'short',
-                'entity', 'external' are treated as 'memory'.
+                Valid options: 'long', 'short', 'entity', 'knowledge', 'agent_knowledge'
+                'kickoff_outputs', or 'all'

        Raises:
            ValueError: If an invalid command type is provided.
            RuntimeError: If memory reset operation fails.
        """
-        legacy_memory = frozenset(["long", "short", "entity", "external"])
-        if command_type in legacy_memory:
-            command_type = "memory"
        valid_types = frozenset(
            [
-                "memory",
+                "long",
+                "short",
+                "entity",
                "knowledge",
                "agent_knowledge",
                "kickoff_outputs",
                "all",
+                "external",
            ]
        )

@@ -1959,10 +1975,25 @@ class Crew(FlowTrackable, BaseModel):
        ) + agent_knowledges

        return {
-            "memory": {
-                "system": getattr(self, "_memory", None),
+            "short": {
+                "system": getattr(self, "_short_term_memory", None),
                "reset": default_reset,
-                "name": "Memory",
+                "name": "Short Term",
+            },
+            "entity": {
+                "system": getattr(self, "_entity_memory", None),
+                "reset": default_reset,
+                "name": "Entity",
+            },
+            "external": {
+                "system": getattr(self, "_external_memory", None),
+                "reset": default_reset,
+                "name": "External",
+            },
+            "long": {
+                "system": getattr(self, "_long_term_memory", None),
+                "reset": default_reset,
+                "name": "Long Term",
            },
            "kickoff_outputs": {
                "system": getattr(self, "_task_output_handler", None),
--- a/lib/crewai/src/crewai/events/types/flow_events.py
+++ b/lib/crewai/src/crewai/events/types/flow_events.py
@@ -120,52 +120,6 @@ class FlowPlotEvent(FlowEvent):
    type: str = "flow_plot"


-class FlowInputRequestedEvent(FlowEvent):
-    """Event emitted when a flow requests user input via ``Flow.ask()``.
-
-    This event is emitted before the flow suspends waiting for user input,
-    allowing UI frameworks and observability tools to know when a flow
-    needs user interaction.
-
-    Attributes:
-        flow_name: Name of the flow requesting input.
-        method_name: Name of the flow method that called ``ask()``.
-        message: The question or prompt being shown to the user.
-        metadata: Optional metadata sent with the question (e.g., user ID,
-            channel, session context).
-    """
-
-    method_name: str
-    message: str
-    metadata: dict[str, Any] | None = None
-    type: str = "flow_input_requested"
-
-
-class FlowInputReceivedEvent(FlowEvent):
-    """Event emitted when user input is received after ``Flow.ask()``.
-
-    This event is emitted after the user provides input (or the request
-    times out), allowing UI frameworks and observability tools to track
-    input collection.
-
-    Attributes:
-        flow_name: Name of the flow that received input.
-        method_name: Name of the flow method that called ``ask()``.
-        message: The original question or prompt.
-        response: The user's response, or None if timed out / unavailable.
-        metadata: Optional metadata sent with the question.
-        response_metadata: Optional metadata from the provider about the
-            response (e.g., who responded, thread ID, timestamps).
-    """
-
-    method_name: str
-    message: str
-    response: str | None = None
-    metadata: dict[str, Any] | None = None
-    response_metadata: dict[str, Any] | None = None
-    type: str = "flow_input_received"
-
-
 class HumanFeedbackRequestedEvent(FlowEvent):
    """Event emitted when human feedback is requested.

--- a/lib/crewai/src/crewai/events/utils/console_formatter.py
+++ b/lib/crewai/src/crewai/events/utils/console_formatter.py
@@ -170,16 +170,16 @@ To enable tracing, do any one of these:
        """Create standardized status content with consistent formatting."""
        content = Text()
        content.append(f"{title}\n", style=f"{status_style} bold")
-        content.append("Name: ", style="white")
+        content.append("Name: \n", style="white")
        content.append(f"{name}\n", style=status_style)

        for label, value in fields.items():
-            content.append(f"{label}: ", style="white")
+            content.append(f"{label}: \n", style="white")
            content.append(
                f"{value}\n", style=fields.get(f"{label}_style", status_style)
            )
        if tool_args:
-            content.append("Tool Args: ", style="white")
+            content.append("Tool Args: \n", style="white")
            content.append(f"{tool_args}\n", style=status_style)

        return content
@@ -737,27 +737,6 @@ To enable tracing, do any one of these:

        self.print_panel(content, title, style)

-    @staticmethod
-    def _simplify_tools_field(fields: dict[str, Any]) -> dict[str, Any]:
-        """Simplify the tools field to show only tool names instead of full definitions.
-
-        Args:
-            fields: Dictionary of fields that may contain a 'tools' key with
-                    full tool objects.
-
-        Returns:
-            The fields dictionary with 'tools' replaced by a comma-separated
-            string of tool names.
-        """
-        if "tools" in fields:
-            tools = fields["tools"]
-            if tools:
-                tool_names = [getattr(t, "name", str(t)) for t in tools]
-                fields["tools"] = ", ".join(tool_names) if tool_names else "None"
-            else:
-                fields["tools"] = "None"
-        return fields
-
    def handle_lite_agent_execution(
        self,
        lite_agent_role: str,
@@ -769,8 +748,6 @@ To enable tracing, do any one of these:
        if not self.verbose:
            return

-        fields = self._simplify_tools_field(fields)
-
        if status == "started":
            self.create_lite_agent_branch(lite_agent_role)
            if fields:
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -1,7 +1,6 @@
 from __future__ import annotations

 from collections.abc import Callable, Coroutine
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 import json
 import threading
@@ -669,12 +668,9 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
        if not self.state.pending_tool_calls:
            return "native_tool_completed"

-        pending_tool_calls = list(self.state.pending_tool_calls)
-        self.state.pending_tool_calls.clear()
-
        # Group all tool calls into a single assistant message
        tool_calls_to_report = []
-        for tool_call in pending_tool_calls:
+        for tool_call in self.state.pending_tool_calls:
            info = extract_tool_call_info(tool_call)
            if not info:
                continue
@@ -699,85 +695,201 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                "content": None,
                "tool_calls": tool_calls_to_report,
            }
-            if all(type(tc).__qualname__ == "Part" for tc in pending_tool_calls):
-                assistant_message["raw_tool_call_parts"] = list(pending_tool_calls)
+            if all(
+                type(tc).__qualname__ == "Part" for tc in self.state.pending_tool_calls
+            ):
+                assistant_message["raw_tool_call_parts"] = list(
+                    self.state.pending_tool_calls
+                )
            self.state.messages.append(assistant_message)

-        runnable_tool_calls = [
-            tool_call
-            for tool_call in pending_tool_calls
-            if extract_tool_call_info(tool_call) is not None
-        ]
-        should_parallelize = self._should_parallelize_native_tool_calls(
-            runnable_tool_calls
-        )
+        # Now execute each tool
+        while self.state.pending_tool_calls:
+            tool_call = self.state.pending_tool_calls.pop(0)
+            info = extract_tool_call_info(tool_call)
+            if not info:
+                continue

-        execution_results: list[dict[str, Any]] = []
-        if should_parallelize:
-            max_workers = min(8, len(runnable_tool_calls))
-            with ThreadPoolExecutor(max_workers=max_workers) as pool:
-                future_to_idx = {
-                    pool.submit(self._execute_single_native_tool_call, tool_call): idx
-                    for idx, tool_call in enumerate(runnable_tool_calls)
-                }
-                ordered_results: list[dict[str, Any] | None] = [None] * len(
-                    runnable_tool_calls
+            call_id, func_name, func_args = info
+
+            # Parse arguments
+            if isinstance(func_args, str):
+                try:
+                    args_dict = json.loads(func_args)
+                except json.JSONDecodeError:
+                    args_dict = {}
+            else:
+                args_dict = func_args
+
+            # Get agent_key for event tracking
+            agent_key = (
+                getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+            )
+
+            # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
+            original_tool = None
+            for tool in self.original_tools or []:
+                if sanitize_tool_name(tool.name) == func_name:
+                    original_tool = tool
+                    break
+
+            # Check if tool has reached max usage count
+            max_usage_reached = False
+            if (
+                original_tool
+                and original_tool.max_usage_count is not None
+                and original_tool.current_usage_count >= original_tool.max_usage_count
+            ):
+                max_usage_reached = True
+
+            # Check cache before executing
+            from_cache = False
+            input_str = json.dumps(args_dict) if args_dict else ""
+            if self.tools_handler and self.tools_handler.cache:
+                cached_result = self.tools_handler.cache.read(
+                    tool=func_name, input=input_str
                )
-                for future in as_completed(future_to_idx):
-                    idx = future_to_idx[future]
-                    ordered_results[idx] = future.result()
-                execution_results = [
-                    result for result in ordered_results if result is not None
-                ]
-        else:
-            # Execute sequentially so result_as_answer tools can short-circuit
-            # immediately without running remaining calls.
-            for tool_call in runnable_tool_calls:
-                execution_result = self._execute_single_native_tool_call(tool_call)
-                call_id = cast(str, execution_result["call_id"])
-                func_name = cast(str, execution_result["func_name"])
-                result = cast(str, execution_result["result"])
-                from_cache = cast(bool, execution_result["from_cache"])
-                original_tool = execution_result["original_tool"]
+                if cached_result is not None:
+                    result = (
+                        str(cached_result)
+                        if not isinstance(cached_result, str)
+                        else cached_result
+                    )
+                    from_cache = True

-                tool_message: LLMMessage = {
-                    "role": "tool",
-                    "tool_call_id": call_id,
-                    "name": func_name,
-                    "content": result,
-                }
-                self.state.messages.append(tool_message)
+            # Emit tool usage started event
+            started_at = datetime.now()
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageStartedEvent(
+                    tool_name=func_name,
+                    tool_args=args_dict,
+                    from_agent=self.agent,
+                    from_task=self.task,
+                    agent_key=agent_key,
+                ),
+            )
+            error_event_emitted = False

-                # Log the tool execution
-                if self.agent and self.agent.verbose:
-                    cache_info = " (from cache)" if from_cache else ""
+            track_delegation_if_needed(func_name, args_dict, self.task)
+
+            structured_tool: CrewStructuredTool | None = None
+            for structured in self.tools or []:
+                if sanitize_tool_name(structured.name) == func_name:
+                    structured_tool = structured
+                    break
+
+            hook_blocked = False
+            before_hook_context = ToolCallHookContext(
+                tool_name=func_name,
+                tool_input=args_dict,
+                tool=structured_tool,  # type: ignore[arg-type]
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+            )
+            before_hooks = get_before_tool_call_hooks()
+            try:
+                for hook in before_hooks:
+                    hook_result = hook(before_hook_context)
+                    if hook_result is False:
+                        hook_blocked = True
+                        break
+            except Exception as hook_error:
+                if self.agent.verbose:
                    self._printer.print(
-                        content=f"Tool {func_name} executed with result{cache_info}: {result[:200]}...",
-                        color="green",
+                        content=f"Error in before_tool_call hook: {hook_error}",
+                        color="red",
                    )

-                if (
-                    original_tool
-                    and hasattr(original_tool, "result_as_answer")
-                    and original_tool.result_as_answer
-                ):
-                    self.state.current_answer = AgentFinish(
-                        thought="Tool result is the final answer",
+            if hook_blocked:
+                result = f"Tool execution blocked by hook. Tool: {func_name}"
+            elif not from_cache and not max_usage_reached:
+                result = "Tool not found"
+                if func_name in self._available_functions:
+                    try:
+                        tool_func = self._available_functions[func_name]
+                        raw_result = tool_func(**args_dict)
+
+                        # Add to cache after successful execution (before string conversion)
+                        if self.tools_handler and self.tools_handler.cache:
+                            should_cache = True
+                            if original_tool:
+                                should_cache = original_tool.cache_function(
+                                    args_dict, raw_result
+                                )
+                            if should_cache:
+                                self.tools_handler.cache.add(
+                                    tool=func_name, input=input_str, output=raw_result
+                                )
+
+                        # Convert to string for message
+                        result = (
+                            str(raw_result)
+                            if not isinstance(raw_result, str)
+                            else raw_result
+                        )
+                    except Exception as e:
+                        result = f"Error executing tool: {e}"
+                        if self.task:
+                            self.task.increment_tools_errors()
+                        # Emit tool usage error event
+                        crewai_event_bus.emit(
+                            self,
+                            event=ToolUsageErrorEvent(
+                                tool_name=func_name,
+                                tool_args=args_dict,
+                                from_agent=self.agent,
+                                from_task=self.task,
+                                agent_key=agent_key,
+                                error=e,
+                            ),
+                        )
+                        error_event_emitted = True
+            elif max_usage_reached and original_tool:
+                # Return error message when max usage limit is reached
+                result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
+
+            # Execute after_tool_call hooks (even if blocked, to allow logging/monitoring)
+            after_hook_context = ToolCallHookContext(
+                tool_name=func_name,
+                tool_input=args_dict,
+                tool=structured_tool,  # type: ignore[arg-type]
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+                tool_result=result,
+            )
+            after_hooks = get_after_tool_call_hooks()
+            try:
+                for after_hook in after_hooks:
+                    after_hook_result = after_hook(after_hook_context)
+                    if after_hook_result is not None:
+                        result = after_hook_result
+                        after_hook_context.tool_result = result
+            except Exception as hook_error:
+                if self.agent.verbose:
+                    self._printer.print(
+                        content=f"Error in after_tool_call hook: {hook_error}",
+                        color="red",
+                    )
+
+            if not error_event_emitted:
+                crewai_event_bus.emit(
+                    self,
+                    event=ToolUsageFinishedEvent(
                        output=result,
-                        text=result,
-                    )
-                    self.state.is_finished = True
-                    return "tool_result_is_final"
-
-            return "native_tool_completed"
-
-        for execution_result in execution_results:
-            call_id = cast(str, execution_result["call_id"])
-            func_name = cast(str, execution_result["func_name"])
-            result = cast(str, execution_result["result"])
-            from_cache = cast(bool, execution_result["from_cache"])
-            original_tool = execution_result["original_tool"]
+                        tool_name=func_name,
+                        tool_args=args_dict,
+                        from_agent=self.agent,
+                        from_task=self.task,
+                        agent_key=agent_key,
+                        started_at=started_at,
+                        finished_at=datetime.now(),
+                    ),
+                )

+            # Append tool result message
            tool_message: LLMMessage = {
                "role": "tool",
                "tool_call_id": call_id,
@@ -810,224 +922,6 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):

        return "native_tool_completed"

-    def _should_parallelize_native_tool_calls(self, tool_calls: list[Any]) -> bool:
-        """Determine if native tool calls are safe to run in parallel."""
-        if len(tool_calls) <= 1:
-            return False
-
-        for tool_call in tool_calls:
-            info = extract_tool_call_info(tool_call)
-            if not info:
-                continue
-            _, func_name, _ = info
-
-            original_tool = None
-            for tool in self.original_tools or []:
-                if sanitize_tool_name(tool.name) == func_name:
-                    original_tool = tool
-                    break
-
-            if not original_tool:
-                continue
-
-            if getattr(original_tool, "result_as_answer", False):
-                return False
-            if getattr(original_tool, "max_usage_count", None) is not None:
-                return False
-
-        return True
-
-    def _execute_single_native_tool_call(self, tool_call: Any) -> dict[str, Any]:
-        """Execute a single native tool call and return metadata/result."""
-        info = extract_tool_call_info(tool_call)
-        if not info:
-            raise ValueError("Invalid native tool call format")
-
-        call_id, func_name, func_args = info
-
-        # Parse arguments
-        if isinstance(func_args, str):
-            try:
-                args_dict = json.loads(func_args)
-            except json.JSONDecodeError:
-                args_dict = {}
-        else:
-            args_dict = func_args
-
-        # Get agent_key for event tracking
-        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
-
-        # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
-        original_tool = None
-        for tool in self.original_tools or []:
-            if sanitize_tool_name(tool.name) == func_name:
-                original_tool = tool
-                break
-
-        # Check if tool has reached max usage count
-        max_usage_reached = False
-        if (
-            original_tool
-            and original_tool.max_usage_count is not None
-            and original_tool.current_usage_count >= original_tool.max_usage_count
-        ):
-            max_usage_reached = True
-
-        # Check cache before executing
-        from_cache = False
-        input_str = json.dumps(args_dict) if args_dict else ""
-        if self.tools_handler and self.tools_handler.cache:
-            cached_result = self.tools_handler.cache.read(
-                tool=func_name, input=input_str
-            )
-            if cached_result is not None:
-                result = (
-                    str(cached_result)
-                    if not isinstance(cached_result, str)
-                    else cached_result
-                )
-                from_cache = True
-
-        # Emit tool usage started event
-        started_at = datetime.now()
-        crewai_event_bus.emit(
-            self,
-            event=ToolUsageStartedEvent(
-                tool_name=func_name,
-                tool_args=args_dict,
-                from_agent=self.agent,
-                from_task=self.task,
-                agent_key=agent_key,
-            ),
-        )
-        error_event_emitted = False
-
-        track_delegation_if_needed(func_name, args_dict, self.task)
-
-        structured_tool: CrewStructuredTool | None = None
-        for structured in self.tools or []:
-            if sanitize_tool_name(structured.name) == func_name:
-                structured_tool = structured
-                break
-
-        hook_blocked = False
-        before_hook_context = ToolCallHookContext(
-            tool_name=func_name,
-            tool_input=args_dict,
-            tool=structured_tool,  # type: ignore[arg-type]
-            agent=self.agent,
-            task=self.task,
-            crew=self.crew,
-        )
-        before_hooks = get_before_tool_call_hooks()
-        try:
-            for hook in before_hooks:
-                hook_result = hook(before_hook_context)
-                if hook_result is False:
-                    hook_blocked = True
-                    break
-        except Exception as hook_error:
-            if self.agent.verbose:
-                self._printer.print(
-                    content=f"Error in before_tool_call hook: {hook_error}",
-                    color="red",
-                )
-
-        if hook_blocked:
-            result = f"Tool execution blocked by hook. Tool: {func_name}"
-        elif not from_cache and not max_usage_reached:
-            result = "Tool not found"
-            if func_name in self._available_functions:
-                try:
-                    tool_func = self._available_functions[func_name]
-                    raw_result = tool_func(**args_dict)
-
-                    # Add to cache after successful execution (before string conversion)
-                    if self.tools_handler and self.tools_handler.cache:
-                        should_cache = True
-                        if original_tool:
-                            should_cache = original_tool.cache_function(
-                                args_dict, raw_result
-                            )
-                        if should_cache:
-                            self.tools_handler.cache.add(
-                                tool=func_name, input=input_str, output=raw_result
-                            )
-
-                    # Convert to string for message
-                    result = (
-                        str(raw_result)
-                        if not isinstance(raw_result, str)
-                        else raw_result
-                    )
-                except Exception as e:
-                    result = f"Error executing tool: {e}"
-                    if self.task:
-                        self.task.increment_tools_errors()
-                    # Emit tool usage error event
-                    crewai_event_bus.emit(
-                        self,
-                        event=ToolUsageErrorEvent(
-                            tool_name=func_name,
-                            tool_args=args_dict,
-                            from_agent=self.agent,
-                            from_task=self.task,
-                            agent_key=agent_key,
-                            error=e,
-                        ),
-                    )
-                    error_event_emitted = True
-        elif max_usage_reached and original_tool:
-            # Return error message when max usage limit is reached
-            result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
-
-        # Execute after_tool_call hooks (even if blocked, to allow logging/monitoring)
-        after_hook_context = ToolCallHookContext(
-            tool_name=func_name,
-            tool_input=args_dict,
-            tool=structured_tool,  # type: ignore[arg-type]
-            agent=self.agent,
-            task=self.task,
-            crew=self.crew,
-            tool_result=result,
-        )
-        after_hooks = get_after_tool_call_hooks()
-        try:
-            for after_hook in after_hooks:
-                after_hook_result = after_hook(after_hook_context)
-                if after_hook_result is not None:
-                    result = after_hook_result
-                    after_hook_context.tool_result = result
-        except Exception as hook_error:
-            if self.agent.verbose:
-                self._printer.print(
-                    content=f"Error in after_tool_call hook: {hook_error}",
-                    color="red",
-                )
-
-        if not error_event_emitted:
-            crewai_event_bus.emit(
-                self,
-                event=ToolUsageFinishedEvent(
-                    output=result,
-                    tool_name=func_name,
-                    tool_args=args_dict,
-                    from_agent=self.agent,
-                    from_task=self.task,
-                    agent_key=agent_key,
-                    started_at=started_at,
-                    finished_at=datetime.now(),
-                ),
-            )
-
-        return {
-            "call_id": call_id,
-            "func_name": func_name,
-            "result": result,
-            "from_cache": from_cache,
-            "original_tool": original_tool,
-        }
-
    def _extract_tool_name(self, tool_call: Any) -> str:
        """Extract tool name from various tool call formats."""
        if hasattr(tool_call, "function"):
@@ -1212,7 +1106,9 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            if self.state.ask_for_human_input:
                formatted_answer = self._handle_human_feedback(formatted_answer)

-            self._save_to_memory(formatted_answer)
+            self._create_short_term_memory(formatted_answer)
+            self._create_long_term_memory(formatted_answer)
+            self._create_external_memory(formatted_answer)

            return {"output": formatted_answer.output}

@@ -1295,7 +1191,9 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            if self.state.ask_for_human_input:
                formatted_answer = await self._ahandle_human_feedback(formatted_answer)

-            self._save_to_memory(formatted_answer)
+            self._create_short_term_memory(formatted_answer)
+            self._create_long_term_memory(formatted_answer)
+            self._create_external_memory(formatted_answer)

            return {"output": formatted_answer.output}

--- a/lib/crewai/src/crewai/flow/init.py
+++ b/lib/crewai/src/crewai/flow/init.py
@@ -7,7 +7,6 @@ from crewai.flow.async_feedback import (
 from crewai.flow.flow import Flow, and_, listen, or_, router, start
 from crewai.flow.flow_config import flow_config
 from crewai.flow.human_feedback import HumanFeedbackResult, human_feedback
-from crewai.flow.input_provider import InputProvider, InputResponse
 from crewai.flow.persistence import persist
 from crewai.flow.visualization import (
    FlowStructure,
@@ -23,8 +22,6 @@ __all__ = [
    "HumanFeedbackPending",
    "HumanFeedbackProvider",
    "HumanFeedbackResult",
-    "InputProvider",
-    "InputResponse",
    "PendingFeedbackContext",
    "and_",
    "build_flow_structure",
--- a/lib/crewai/src/crewai/flow/async_feedback/providers.py
+++ b/lib/crewai/src/crewai/flow/async_feedback/providers.py
@@ -1,8 +1,7 @@
-"""Default provider implementations for human feedback and user input.
+"""Default provider implementations for human feedback.

 This module provides the ConsoleProvider, which is the default synchronous
-provider that collects both feedback (for ``@human_feedback``) and user input
-(for ``Flow.ask()``) via console.
+provider that collects feedback via console input.
 """

 from __future__ import annotations
@@ -17,23 +16,20 @@ if TYPE_CHECKING:


 class ConsoleProvider:
-    """Default synchronous console-based provider for feedback and input.
+    """Default synchronous console-based feedback provider.

    This provider blocks execution and waits for console input from the user.
-    It serves two purposes:
-
-    - **Feedback** (``request_feedback``): Used by ``@human_feedback`` to
-      display method output and collect review feedback.
-    - **Input** (``request_input``): Used by ``Flow.ask()`` to prompt the
-      user with a question and collect a response.
+    It displays the method output with formatting and prompts for feedback.

    This is the default provider used when no custom provider is specified
-    in the ``@human_feedback`` decorator or on the Flow's ``input_provider``.
+    in the @human_feedback decorator.

-    Example (feedback):
+    Example:
        ```python
        from crewai.flow.async_feedback import ConsoleProvider

+
+        # Explicitly use console provider
        @human_feedback(
            message="Review this:",
            provider=ConsoleProvider(),
@@ -41,20 +37,9 @@ class ConsoleProvider:
        def my_method(self):
            return "Content to review"
        ```
-
-    Example (input):
-        ```python
-        from crewai.flow import Flow, start
-
-        class MyFlow(Flow):
-            @start()
-            def gather_info(self):
-                topic = self.ask("What topic should we research?")
-                return topic
-        ```
    """

-    def __init__(self, verbose: bool = True) -> None:
+    def __init__(self, verbose: bool = True):
        """Initialize the console provider.

        Args:
@@ -139,55 +124,3 @@ class ConsoleProvider:
        finally:
            # Resume live updates
            formatter.resume_live_updates()
-
-    def request_input(
-        self,
-        message: str,
-        flow: Flow[Any],
-        metadata: dict[str, Any] | None = None,
-    ) -> str | None:
-        """Request user input via console (blocking).
-
-        Displays the prompt message with formatting and waits for the user
-        to type their response. Used by ``Flow.ask()``.
-
-        Unlike ``request_feedback``, this method does not display an
-        "OUTPUT FOR REVIEW" panel or emit feedback-specific events (those
-        are handled by ``ask()`` itself).
-
-        Args:
-            message: The question or prompt to display to the user.
-            flow: The Flow instance requesting input.
-            metadata: Optional metadata from the caller. Ignored by the
-                console provider (console has no concept of user routing).
-
-        Returns:
-            The user's input as a stripped string. Returns empty string
-            if user presses Enter without input. Never returns None
-            (console input is always available).
-        """
-        from crewai.events.event_listener import event_listener
-
-        # Pause live updates during human input
-        formatter = event_listener.formatter
-        formatter.pause_live_updates()
-
-        try:
-            console = formatter.console
-
-            if self.verbose:
-                console.print()
-                console.print(message, style="yellow")
-                console.print()
-
-                response = input(">>> \n").strip()
-            else:
-                response = input(f"{message} ").strip()
-
-            # Add line break after input so formatter output starts clean
-            console.print()
-
-            return response
-        finally:
-            # Resume live updates
-            formatter.resume_live_updates()
--- a/lib/crewai/src/crewai/flow/flow.py
+++ b/lib/crewai/src/crewai/flow/flow.py
@@ -10,7 +10,6 @@ import asyncio
 from collections.abc import (
    Callable,
    ItemsView,
-    Iterable,
    Iterator,
    KeysView,
    Sequence,
@@ -18,7 +17,6 @@ from collections.abc import (
 )
 from concurrent.futures import Future
 import copy
-import enum
 import inspect
 import logging
 import threading
@@ -29,10 +27,8 @@ from typing import (
    Generic,
    Literal,
    ParamSpec,
-    SupportsIndex,
    TypeVar,
    cast,
-    overload,
 )
 from uuid import uuid4

@@ -81,12 +77,7 @@ from crewai.flow.flow_wrappers import (
    StartMethod,
 )
 from crewai.flow.persistence.base import FlowPersistence
-from crewai.flow.types import (
-    FlowExecutionData,
-    FlowMethodName,
-    InputHistoryEntry,
-    PendingListenerKey,
-)
+from crewai.flow.types import FlowExecutionData, FlowMethodName, PendingListenerKey
 from crewai.flow.utils import (
    _extract_all_methods,
    _extract_all_methods_recursive,
@@ -425,17 +416,13 @@ def and_(*conditions: str | FlowCondition | Callable[..., Any]) -> FlowCondition
    return {"type": AND_CONDITION, "conditions": processed_conditions}


-class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
+class LockedListProxy(Generic[T]):
    """Thread-safe proxy for list operations.

-    Subclasses ``list`` so that ``isinstance(proxy, list)`` returns True,
-    which is required by libraries like LanceDB and Pydantic that do strict
-    type checks. All mutations go through the lock; reads delegate to the
-    underlying list.
+    Wraps a list and uses a lock for all mutating operations.
    """

    def __init__(self, lst: list[T], lock: threading.Lock) -> None:
-        super().__init__()  # empty builtin list; all access goes through self._list
        self._list = lst
        self._lock = lock

@@ -443,11 +430,11 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.append(item)

-    def extend(self, items: Iterable[T]) -> None:
+    def extend(self, items: list[T]) -> None:
        with self._lock:
            self._list.extend(items)

-    def insert(self, index: SupportsIndex, item: T) -> None:
+    def insert(self, index: int, item: T) -> None:
        with self._lock:
            self._list.insert(index, item)

@@ -455,7 +442,7 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.remove(item)

-    def pop(self, index: SupportsIndex = -1) -> T:
+    def pop(self, index: int = -1) -> T:
        with self._lock:
            return self._list.pop(index)

@@ -463,23 +450,15 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.clear()

-    @overload
-    def __setitem__(self, index: SupportsIndex, value: T) -> None: ...
-    @overload
-    def __setitem__(self, index: slice, value: Iterable[T]) -> None: ...
-    def __setitem__(self, index: Any, value: Any) -> None:
+    def __setitem__(self, index: int, value: T) -> None:
        with self._lock:
            self._list[index] = value

-    def __delitem__(self, index: SupportsIndex | slice) -> None:
+    def __delitem__(self, index: int) -> None:
        with self._lock:
            del self._list[index]

-    @overload
-    def __getitem__(self, index: SupportsIndex) -> T: ...
-    @overload
-    def __getitem__(self, index: slice) -> list[T]: ...
-    def __getitem__(self, index: Any) -> Any:
+    def __getitem__(self, index: int) -> T:
        return self._list[index]

    def __len__(self) -> int:
@@ -497,31 +476,14 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
    def __bool__(self) -> bool:
        return bool(self._list)

-    def __eq__(self, other: object) -> bool:
-        """Compare based on the underlying list contents."""
-        if isinstance(other, LockedListProxy):
-            # Avoid deadlocks by acquiring locks in a consistent order.
-            first, second = (self, other) if id(self) <= id(other) else (other, self)
-            with first._lock:
-                with second._lock:
-                    return first._list == second._list
-        with self._lock:
-            return self._list == other

-    def __ne__(self, other: object) -> bool:
-        return not self.__eq__(other)
-
-
-class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
+class LockedDictProxy(Generic[T]):
    """Thread-safe proxy for dict operations.

-    Subclasses ``dict`` so that ``isinstance(proxy, dict)`` returns True,
-    which is required by libraries like Pydantic that do strict type checks.
-    All mutations go through the lock; reads delegate to the underlying dict.
+    Wraps a dict and uses a lock for all mutating operations.
    """

    def __init__(self, d: dict[str, T], lock: threading.Lock) -> None:
-        super().__init__()  # empty builtin dict; all access goes through self._dict
        self._dict = d
        self._lock = lock

@@ -533,11 +495,11 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            del self._dict[key]

-    def pop(self, key: str, *default: T) -> T:  # type: ignore[override]
+    def pop(self, key: str, *default: T) -> T:
        with self._lock:
            return self._dict.pop(key, *default)

-    def update(self, other: dict[str, T]) -> None:  # type: ignore[override]
+    def update(self, other: dict[str, T]) -> None:
        with self._lock:
            self._dict.update(other)

@@ -545,7 +507,7 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._dict.clear()

-    def setdefault(self, key: str, default: T) -> T:  # type: ignore[override]
+    def setdefault(self, key: str, default: T) -> T:
        with self._lock:
            return self._dict.setdefault(key, default)

@@ -561,16 +523,16 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    def __contains__(self, key: object) -> bool:
        return key in self._dict

-    def keys(self) -> KeysView[str]:  # type: ignore[override]
+    def keys(self) -> KeysView[str]:
        return self._dict.keys()

-    def values(self) -> ValuesView[T]:  # type: ignore[override]
+    def values(self) -> ValuesView[T]:
        return self._dict.values()

-    def items(self) -> ItemsView[str, T]:  # type: ignore[override]
+    def items(self) -> ItemsView[str, T]:
        return self._dict.items()

-    def get(self, key: str, default: T | None = None) -> T | None:  # type: ignore[override]
+    def get(self, key: str, default: T | None = None) -> T | None:
        return self._dict.get(key, default)

    def __repr__(self) -> str:
@@ -579,20 +541,6 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    def __bool__(self) -> bool:
        return bool(self._dict)

-    def __eq__(self, other: object) -> bool:
-        """Compare based on the underlying dict contents."""
-        if isinstance(other, LockedDictProxy):
-            # Avoid deadlocks by acquiring locks in a consistent order.
-            first, second = (self, other) if id(self) <= id(other) else (other, self)
-            with first._lock:
-                with second._lock:
-                    return first._dict == second._dict
-        with self._lock:
-            return self._dict == other
-
-    def __ne__(self, other: object) -> bool:
-        return not self.__eq__(other)
-

 class StateProxy(Generic[T]):
    """Proxy that provides thread-safe access to flow state.
@@ -752,10 +700,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
    name: str | None = None
    tracing: bool | None = None
    stream: bool = False
-    memory: Any = (
-        None  # Memory | MemoryScope | MemorySlice | None; auto-created if not set
-    )
-    input_provider: Any = None  # InputProvider | None; per-flow override for self.ask()

    def __class_getitem__(cls: type[Flow[T]], item: type[T]) -> type[Flow[T]]:
        class _FlowGeneric(cls):  # type: ignore
@@ -802,9 +746,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
        self._pending_feedback_context: PendingFeedbackContext | None = None
        self.suppress_flow_events: bool = suppress_flow_events

-        # User input history (for self.ask())
-        self._input_history: list[InputHistoryEntry] = []
-
        # Initialize state with initial values
        self._state = self._create_initial_state()
        self.tracing = tracing
@@ -826,14 +767,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
                ),
            )

-        # Auto-create memory if not provided at class or instance level.
-        # Internal flows (RecallFlow, EncodingFlow) set _skip_auto_memory
-        # to avoid creating a wasteful standalone Memory instance.
-        if self.memory is None and not getattr(self, "_skip_auto_memory", False):
-            from crewai.memory.unified_memory import Memory
-
-            self.memory = Memory()
-
        # Register all flow-related methods
        for method_name in dir(self):
            if not method_name.startswith("_"):
@@ -844,63 +777,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
                        method = method.__get__(self, self.__class__)
                    self._methods[method.__name__] = method

-    def recall(self, query: str, **kwargs: Any) -> Any:
-        """Recall relevant memories. Delegates to this flow's memory.
-
-        Args:
-            query: Natural language query.
-            **kwargs: Passed to memory.recall (e.g. scope, categories, limit, depth).
-
-        Returns:
-            Result of memory.recall(query, **kwargs).
-
-        Raises:
-            ValueError: If no memory is configured for this flow.
-        """
-        if self.memory is None:
-            raise ValueError("No memory configured for this flow")
-        return self.memory.recall(query, **kwargs)
-
-    def remember(self, content: str | list[str], **kwargs: Any) -> Any:
-        """Store one or more items in memory.
-
-        Pass a single string for synchronous save (returns the MemoryRecord).
-        Pass a list of strings for non-blocking batch save (returns immediately).
-
-        Args:
-            content: Text or list of texts to remember.
-            **kwargs: Passed to memory.remember / remember_many
-                      (e.g. scope, categories, metadata, importance).
-
-        Returns:
-            MemoryRecord for single item, empty list for batch (background save).
-
-        Raises:
-            ValueError: If no memory is configured for this flow.
-        """
-        if self.memory is None:
-            raise ValueError("No memory configured for this flow")
-        if isinstance(content, list):
-            return self.memory.remember_many(content, **kwargs)
-        return self.memory.remember(content, **kwargs)
-
-    def extract_memories(self, content: str) -> list[str]:
-        """Extract discrete memories from content. Delegates to this flow's memory.
-
-        Args:
-            content: Raw text (e.g. task + result dump).
-
-        Returns:
-            List of short, self-contained memory statements.
-
-        Raises:
-            ValueError: If no memory is configured for this flow.
-        """
-        if self.memory is None:
-            raise ValueError("No memory configured for this flow")
-        result: list[str] = self.memory.extract_memories(content)
-        return result
-
    def _mark_or_listener_fired(self, listener_name: FlowMethodName) -> bool:
        """Mark an OR listener as fired atomically.

@@ -1370,10 +1246,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
            ValueError: If structured state model lacks 'id' field
            TypeError: If state is neither BaseModel nor dictionary
        """
-        init_state = self.initial_state
-
        # Handle case where initial_state is None but we have a type parameter
-        if init_state is None and hasattr(self, "_initial_state_t"):
+        if self.initial_state is None and hasattr(self, "_initial_state_t"):
            state_type = self._initial_state_t
            if isinstance(state_type, type):
                if issubclass(state_type, FlowState):
@@ -1397,12 +1271,12 @@ class Flow(Generic[T], metaclass=FlowMeta):
                    return cast(T, {"id": str(uuid4())})

        # Handle case where no initial state is provided
-        if init_state is None:
+        if self.initial_state is None:
            return cast(T, {"id": str(uuid4())})

        # Handle case where initial_state is a type (class)
-        if isinstance(init_state, type):
-            state_class = init_state
+        if isinstance(self.initial_state, type):
+            state_class: type[T] = self.initial_state
            if issubclass(state_class, FlowState):
                return state_class()
            if issubclass(state_class, BaseModel):
@@ -1413,19 +1287,19 @@ class Flow(Generic[T], metaclass=FlowMeta):
                if not getattr(model_instance, "id", None):
                    object.__setattr__(model_instance, "id", str(uuid4()))
                return model_instance
-            if init_state is dict:
+            if self.initial_state is dict:
                return cast(T, {"id": str(uuid4())})

        # Handle dictionary instance case
-        if isinstance(init_state, dict):
-            new_state = dict(init_state)  # Copy to avoid mutations
+        if isinstance(self.initial_state, dict):
+            new_state = dict(self.initial_state)  # Copy to avoid mutations
            if "id" not in new_state:
                new_state["id"] = str(uuid4())
            return cast(T, new_state)

        # Handle BaseModel instance case
-        if isinstance(init_state, BaseModel):
-            model = cast(BaseModel, init_state)
+        if isinstance(self.initial_state, BaseModel):
+            model = cast(BaseModel, self.initial_state)
            if not hasattr(model, "id"):
                raise ValueError("Flow state model must have an 'id' field")

@@ -1824,13 +1698,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
                self._pending_and_listeners.clear()
                self._clear_or_listeners()
            else:
-                # Only enter resumption mode if there are completed methods to
-                # replay.  When _completed_methods is empty (e.g. a pure
-                # state-reload via kickoff(inputs={"id": ...})), the flow
-                # executes from scratch and the flag would incorrectly
-                # suppress cyclic re-execution on the second iteration.
-                if self._completed_methods:
-                    self._is_execution_resuming = True
+                # We're restoring from persistence, set the flag
+                self._is_execution_resuming = True

            if inputs:
                # Override the id in the state if it exists in inputs
@@ -2003,9 +1872,6 @@ class Flow(Generic[T], metaclass=FlowMeta):

            return final_output
        finally:
-            # Ensure all background memory saves complete before returning
-            if self.memory is not None and hasattr(self.memory, "drain_writes"):
-                self.memory.drain_writes()
            if request_id_token is not None:
                current_flow_request_id.reset(request_id_token)
            if flow_id_token is not None:
@@ -2148,24 +2014,15 @@ class Flow(Generic[T], metaclass=FlowMeta):
                if future:
                    self._event_futures.append(future)

-            # Set method name in context so ask() can read it without
-            # stack inspection.  Must happen before copy_context() so the
-            # value propagates into the thread pool for sync methods.
-            from crewai.flow.flow_context import current_flow_method_name
+            if asyncio.iscoroutinefunction(method):
+                result = await method(*args, **kwargs)
+            else:
+                # Run sync methods in thread pool for isolation
+                # This allows Agent.kickoff() to work synchronously inside Flow methods
+                import contextvars

-            method_name_token = current_flow_method_name.set(method_name)
-            try:
-                if asyncio.iscoroutinefunction(method):
-                    result = await method(*args, **kwargs)
-                else:
-                    # Run sync methods in thread pool for isolation
-                    # This allows Agent.kickoff() to work synchronously inside Flow methods
-                    import contextvars
-
-                    ctx = contextvars.copy_context()
-                    result = await asyncio.to_thread(ctx.run, method, *args, **kwargs)
-            finally:
-                current_flow_method_name.reset(method_name_token)
+                ctx = contextvars.copy_context()
+                result = await asyncio.to_thread(ctx.run, method, *args, **kwargs)

            # Auto-await coroutines returned from sync methods (enables AgentExecutor pattern)
            if asyncio.iscoroutine(result):
@@ -2198,8 +2055,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
            from crewai.flow.async_feedback.types import HumanFeedbackPending

            if isinstance(e, HumanFeedbackPending):
-                e.context.method_name = method_name
-
                # Auto-save pending feedback (create default persistence if needed)
                if self._persistence is None:
                    from crewai.flow.persistence import SQLiteFlowPersistence
@@ -2299,23 +2154,14 @@ class Flow(Generic[T], metaclass=FlowMeta):
                    router_name, router_input, current_triggering_event_id
                )
                if router_result:  # Only add non-None results
-                    router_result_str = (
-                        router_result.value
-                        if isinstance(router_result, enum.Enum)
-                        else str(router_result)
-                    )
-                    router_results.append(FlowMethodName(router_result_str))
+                    router_results.append(FlowMethodName(str(router_result)))
                    # If this was a human_feedback router, map the outcome to the feedback
                    if self.last_human_feedback is not None:
-                        router_result_to_feedback[router_result_str] = (
+                        router_result_to_feedback[str(router_result)] = (
                            self.last_human_feedback
                        )
                current_trigger = (
-                    FlowMethodName(
-                        router_result.value
-                        if isinstance(router_result, enum.Enum)
-                        else str(router_result)
-                    )
+                    FlowMethodName(str(router_result))
                    if router_result is not None
                    else FlowMethodName("")  # Update for next iteration of router chain
                )
@@ -2582,12 +2428,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
                return (None, None)
            # For cyclic flows, clear from completed to allow re-execution
            self._completed_methods.discard(listener_name)
-            # Clear ALL fired OR listeners so they can fire again in the new cycle.
-            # This mirrors what _execute_start_method does for start-method cycles.
-            # Only discarding the individual listener is insufficient because
-            # downstream or_() listeners (e.g., method_a listening to
-            # or_(handler_a, handler_b)) would remain suppressed across iterations.
-            self._clear_or_listeners()
+            # Also clear from fired OR listeners for cyclic flows
+            self._discard_or_listener(listener_name)

        try:
            method = self._methods[listener_name]
@@ -2631,206 +2473,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
                logger.error(f"Error executing listener {listener_name}: {e}")
            raise

-    # ── User Input (self.ask) ────────────────────────────────────────
-
-    def _resolve_input_provider(self) -> Any:
-        """Resolve the input provider using the priority chain.
-
-        Resolution order:
-        1. ``self.input_provider`` (per-flow override)
-        2. ``flow_config.input_provider`` (global default)
-        3. ``ConsoleInputProvider()`` (built-in fallback)
-
-        Returns:
-            An object implementing the ``InputProvider`` protocol.
-        """
-        from crewai.flow.async_feedback.providers import ConsoleProvider
-        from crewai.flow.flow_config import flow_config
-
-        if self.input_provider is not None:
-            return self.input_provider
-        if flow_config.input_provider is not None:
-            return flow_config.input_provider
-        return ConsoleProvider()
-
-    def _checkpoint_state_for_ask(self) -> None:
-        """Auto-checkpoint flow state before waiting for user input.
-
-        If persistence is configured, saves the current state so that
-        ``self.state`` is recoverable even if the process crashes while
-        waiting for input.
-
-        This is best-effort: if persistence is not configured, this is a no-op.
-        """
-        if self._persistence is None:
-            return
-        try:
-            state_data = (
-                self._state
-                if isinstance(self._state, dict)
-                else self._state.model_dump()
-            )
-            self._persistence.save_state(
-                flow_uuid=self.flow_id,
-                method_name="_ask_checkpoint",
-                state_data=state_data,
-            )
-        except Exception:
-            logger.debug("Failed to checkpoint state before ask()", exc_info=True)
-
-    def ask(
-        self,
-        message: str,
-        timeout: float | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> str | None:
-        """Request input from the user during flow execution.
-
-        Blocks the current thread until the user provides input or the
-        timeout expires. Works in both sync and async flow methods (the
-        flow framework runs sync methods in a thread pool via
-        ``asyncio.to_thread``, so the event loop stays free).
-
-        Timeout ensures flows always terminate. When timeout expires,
-        ``None`` is returned, enabling the pattern::
-
-            while (msg := self.ask("You: ", timeout=300)) is not None:
-                process(msg)
-
-        Before waiting for input, the current ``self.state`` is automatically
-        checkpointed to persistence (if configured) for durability.
-
-        Args:
-            message: The question or prompt to display to the user.
-            timeout: Maximum seconds to wait for input. ``None`` means
-                wait indefinitely. When timeout expires, returns ``None``.
-                Note: timeout is best-effort for the provider call --
-                ``ask()`` returns ``None`` promptly, but the underlying
-                ``request_input()`` may continue running in a background
-                thread until it completes naturally. Network providers
-                should implement their own internal timeouts.
-            metadata: Optional metadata to send to the input provider,
-                such as user ID, channel, session context. The provider
-                can use this to route the question to the right recipient.
-
-        Returns:
-            The user's input as a string, or ``None`` on timeout, disconnect,
-            or provider error. Empty string ``""`` means the user pressed
-            Enter without typing (intentional empty input).
-
-        Example:
-            ```python
-            class MyFlow(Flow):
-                @start()
-                def gather_info(self):
-                    topic = self.ask(
-                        "What topic should we research?",
-                        metadata={"user_id": "u123", "channel": "#research"},
-                    )
-                    if topic is None:
-                        return "No input received"
-                    return topic
-            ```
-        """
-        from concurrent.futures import (
-            ThreadPoolExecutor,
-            TimeoutError as FuturesTimeoutError,
-        )
-        from datetime import datetime
-
-        from crewai.events.types.flow_events import (
-            FlowInputReceivedEvent,
-            FlowInputRequestedEvent,
-        )
-        from crewai.flow.flow_context import current_flow_method_name
-        from crewai.flow.input_provider import InputResponse
-
-        method_name = current_flow_method_name.get("unknown")
-
-        # Emit input requested event
-        crewai_event_bus.emit(
-            self,
-            FlowInputRequestedEvent(
-                type="flow_input_requested",
-                flow_name=self.name or self.__class__.__name__,
-                method_name=method_name,
-                message=message,
-                metadata=metadata,
-            ),
-        )
-
-        # Auto-checkpoint state before waiting
-        self._checkpoint_state_for_ask()
-
-        provider = self._resolve_input_provider()
-        raw: str | InputResponse | None = None
-
-        try:
-            if timeout is not None:
-                # Manual executor management to avoid shutdown(wait=True)
-                # deadlock when the provider call outlives the timeout.
-                executor = ThreadPoolExecutor(max_workers=1)
-                future = executor.submit(
-                    provider.request_input, message, self, metadata
-                )
-                try:
-                    raw = future.result(timeout=timeout)
-                except FuturesTimeoutError:
-                    future.cancel()
-                    raw = None
-                finally:
-                    # wait=False so we don't block if the provider is still
-                    # running (e.g. input() stuck waiting for user).
-                    # cancel_futures=True cleans up any queued-but-not-started tasks.
-                    executor.shutdown(wait=False, cancel_futures=True)
-            else:
-                raw = provider.request_input(message, self, metadata=metadata)
-        except KeyboardInterrupt:
-            raise
-        except Exception:
-            logger.debug("Input provider error in ask()", exc_info=True)
-            raw = None
-
-        # Normalize provider response: str, InputResponse, or None
-        response: str | None = None
-        response_metadata: dict[str, Any] | None = None
-
-        if isinstance(raw, InputResponse):
-            response = raw.text
-            response_metadata = raw.metadata
-        elif isinstance(raw, str):
-            response = raw
-        else:
-            response = None
-
-        # Record in history
-        self._input_history.append(
-            {
-                "message": message,
-                "response": response,
-                "method_name": method_name,
-                "timestamp": datetime.now(),
-                "metadata": metadata,
-                "response_metadata": response_metadata,
-            }
-        )
-
-        # Emit input received event
-        crewai_event_bus.emit(
-            self,
-            FlowInputReceivedEvent(
-                type="flow_input_received",
-                flow_name=self.name or self.__class__.__name__,
-                method_name=method_name,
-                message=message,
-                response=response,
-                metadata=metadata,
-                response_metadata=response_metadata,
-            ),
-        )
-
-        return response
-
    def _request_human_feedback(
        self,
        message: str,
--- a/lib/crewai/src/crewai/flow/flow_config.py
+++ b/lib/crewai/src/crewai/flow/flow_config.py
@@ -11,7 +11,6 @@ from typing import TYPE_CHECKING, Any

 if TYPE_CHECKING:
    from crewai.flow.async_feedback.types import HumanFeedbackProvider
-    from crewai.flow.input_provider import InputProvider


 class FlowConfig:
@@ -21,15 +20,10 @@ class FlowConfig:
        hitl_provider: The human-in-the-loop feedback provider.
                       Defaults to None (uses console input).
                       Can be overridden by deployments at startup.
-        input_provider: The input provider used by ``Flow.ask()``.
-                        Defaults to None (uses ``ConsoleProvider``).
-                        Can be overridden by
-                        deployments at startup.
    """

    def __init__(self) -> None:
        self._hitl_provider: HumanFeedbackProvider | None = None
-        self._input_provider: InputProvider | None = None

    @property
    def hitl_provider(self) -> Any:
@@ -41,32 +35,6 @@ class FlowConfig:
        """Set the HITL provider."""
        self._hitl_provider = provider

-    @property
-    def input_provider(self) -> Any:
-        """Get the configured input provider for ``Flow.ask()``.
-
-        Returns:
-            The configured InputProvider instance, or None if not set
-            (in which case ``ConsoleInputProvider`` is used as default).
-        """
-        return self._input_provider
-
-    @input_provider.setter
-    def input_provider(self, provider: Any) -> None:
-        """Set the input provider for ``Flow.ask()``.
-
-        Args:
-            provider: An object implementing the ``InputProvider`` protocol.
-
-        Example:
-            ```python
-            from crewai.flow import flow_config
-
-            flow_config.input_provider = WebSocketInputProvider(...)
-            ```
-        """
-        self._input_provider = provider
-

 # Singleton instance
 flow_config = FlowConfig()
--- a/lib/crewai/src/crewai/flow/flow_context.py
+++ b/lib/crewai/src/crewai/flow/flow_context.py
@@ -14,7 +14,3 @@ current_flow_request_id: contextvars.ContextVar[str | None] = contextvars.Contex
 current_flow_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
    "flow_id", default=None
 )
-
-current_flow_method_name: contextvars.ContextVar[str] = contextvars.ContextVar(
-    "flow_method_name", default="unknown"
-)
--- a/lib/crewai/src/crewai/flow/human_feedback.py
+++ b/lib/crewai/src/crewai/flow/human_feedback.py
@@ -62,8 +62,6 @@ from datetime import datetime
 from functools import wraps
 from typing import TYPE_CHECKING, Any, TypeVar

-from pydantic import BaseModel, Field
-
 from crewai.flow.flow_wrappers import FlowMethod


@@ -134,12 +132,10 @@ class HumanFeedbackConfig:

    message: str
    emit: Sequence[str] | None = None
-    llm: str | BaseLLM | None = "gpt-4o-mini"
+    llm: str | BaseLLM | None = None
    default_outcome: str | None = None
    metadata: dict[str, Any] | None = None
    provider: HumanFeedbackProvider | None = None
-    learn: bool = False
-    learn_source: str = "hitl"


 class HumanFeedbackMethod(FlowMethod[Any, Any]):
@@ -159,36 +155,13 @@ class HumanFeedbackMethod(FlowMethod[Any, Any]):
    __human_feedback_config__: HumanFeedbackConfig | None = None


-class PreReviewResult(BaseModel):
-    """Structured output from the HITL pre-review LLM call."""
-
-    improved_output: str = Field(
-        description="The improved version of the output with past human feedback lessons applied.",
-    )
-
-
-class DistilledLessons(BaseModel):
-    """Structured output from the HITL lesson distillation LLM call."""
-
-    lessons: list[str] = Field(
-        default_factory=list,
-        description=(
-            "Generalizable lessons extracted from the human feedback. "
-            "Each lesson should be a reusable rule or preference. "
-            "Return an empty list if the feedback contains no generalizable guidance."
-        ),
-    )
-
-
 def human_feedback(
    message: str,
    emit: Sequence[str] | None = None,
-    llm: str | BaseLLM | None = "gpt-4o-mini",
+    llm: str | BaseLLM | None = None,
    default_outcome: str | None = None,
    metadata: dict[str, Any] | None = None,
    provider: HumanFeedbackProvider | None = None,
-    learn: bool = False,
-    learn_source: str = "hitl"
 ) -> Callable[[F], F]:
    """Decorator for Flow methods that require human feedback.

@@ -283,9 +256,7 @@ def human_feedback(
        if not llm:
            raise ValueError(
                "llm is required when emit is specified. "
-                "Provide an LLM model string (e.g., 'gpt-4o-mini') or a BaseLLM instance. "
-                "See the CrewAI Human-in-the-Loop (HITL) documentation for more information: "
-                "https://docs.crewai.com/en/learn/human-feedback-in-flows"
+                "Provide an LLM model string (e.g., 'gpt-4o-mini') or a BaseLLM instance."
            )
        if default_outcome is not None and default_outcome not in emit:
            raise ValueError(
@@ -298,101 +269,6 @@ def human_feedback(
    def decorator(func: F) -> F:
        """Inner decorator that wraps the function."""

-        # -- HITL learning helpers (only used when learn=True) --------
-
-        def _get_hitl_prompt(key: str) -> str:
-            """Read a HITL prompt from the i18n translations."""
-            from crewai.utilities.i18n import get_i18n
-
-            return get_i18n().slice(key)
-
-        def _resolve_llm_instance() -> Any:
-            """Resolve the ``llm`` parameter to a BaseLLM instance.
-
-            Uses the SAME model specified in the decorator so pre-review,
-            distillation, and outcome collapsing all share one model.
-            """
-            if llm is None:
-                from crewai.llm import LLM
-
-                return LLM(model="gpt-4o-mini")
-            if isinstance(llm, str):
-                from crewai.llm import LLM
-
-                return LLM(model=llm)
-            return llm  # already a BaseLLM instance
-
-        def _pre_review_with_lessons(
-            flow_instance: Flow[Any], method_output: Any
-        ) -> Any:
-            """Recall past HITL lessons and use LLM to pre-review the output."""
-            try:
-                query = f"human feedback lessons for {func.__name__}: {method_output!s}"
-                matches = flow_instance.memory.recall(
-                    query, source=learn_source
-                )
-                if not matches:
-                    return method_output
-
-                lessons = "\n".join(f"- {m.record.content}" for m in matches)
-                llm_inst = _resolve_llm_instance()
-                prompt = _get_hitl_prompt("hitl_pre_review_user").format(
-                    output=str(method_output),
-                    lessons=lessons,
-                )
-                messages = [
-                    {"role": "system", "content": _get_hitl_prompt("hitl_pre_review_system")},
-                    {"role": "user", "content": prompt},
-                ]
-                if getattr(llm_inst, "supports_function_calling", lambda: False)():
-                    response = llm_inst.call(messages, response_model=PreReviewResult)
-                    if isinstance(response, PreReviewResult):
-                        return response.improved_output
-                    return PreReviewResult.model_validate(response).improved_output
-                reviewed = llm_inst.call(messages)
-                return reviewed if isinstance(reviewed, str) else str(reviewed)
-            except Exception:
-                return method_output  # fallback to raw output on any failure
-
-        def _distill_and_store_lessons(
-            flow_instance: Flow[Any], method_output: Any, raw_feedback: str
-        ) -> None:
-            """Extract generalizable lessons from output + feedback, store in memory."""
-            try:
-                llm_inst = _resolve_llm_instance()
-                prompt = _get_hitl_prompt("hitl_distill_user").format(
-                    method_name=func.__name__,
-                    output=str(method_output),
-                    feedback=raw_feedback,
-                )
-                messages = [
-                    {"role": "system", "content": _get_hitl_prompt("hitl_distill_system")},
-                    {"role": "user", "content": prompt},
-                ]
-
-                lessons: list[str] = []
-                if getattr(llm_inst, "supports_function_calling", lambda: False)():
-                    response = llm_inst.call(messages, response_model=DistilledLessons)
-                    if isinstance(response, DistilledLessons):
-                        lessons = response.lessons
-                    else:
-                        lessons = DistilledLessons.model_validate(response).lessons
-                else:
-                    response = llm_inst.call(messages)
-                    if isinstance(response, str):
-                        lessons = [
-                            line.strip("- ").strip()
-                            for line in response.strip().split("\n")
-                            if line.strip() and line.strip() != "NONE"
-                        ]
-
-                if lessons:
-                    flow_instance.memory.remember_many(lessons, source=learn_source)
-            except Exception:  # noqa: S110
-                pass  # non-critical: don't fail the flow because lesson storage failed
-
-        # -- Core feedback helpers ------------------------------------
-
        def _request_feedback(flow_instance: Flow[Any], method_output: Any) -> str:
            """Request feedback using provider or default console."""
            from crewai.flow.async_feedback.types import PendingFeedbackContext
@@ -477,40 +353,28 @@ def human_feedback(
            # Async wrapper
            @wraps(func)
            async def async_wrapper(self: Flow[Any], *args: Any, **kwargs: Any) -> Any:
+                # Execute the original method
                method_output = await func(self, *args, **kwargs)

-                # Pre-review: apply past HITL lessons before human sees it
-                if learn and getattr(self, "memory", None) is not None:
-                    method_output = _pre_review_with_lessons(self, method_output)
-
+                # Request human feedback (may raise HumanFeedbackPending)
                raw_feedback = _request_feedback(self, method_output)
-                result = _process_feedback(self, method_output, raw_feedback)

-                # Distill: extract lessons from output + feedback, store in memory
-                if learn and getattr(self, "memory", None) is not None and raw_feedback.strip():
-                    _distill_and_store_lessons(self, method_output, raw_feedback)
-
-                return result
+                # Process and return
+                return _process_feedback(self, method_output, raw_feedback)

            wrapper: Any = async_wrapper
        else:
            # Sync wrapper
            @wraps(func)
            def sync_wrapper(self: Flow[Any], *args: Any, **kwargs: Any) -> Any:
+                # Execute the original method
                method_output = func(self, *args, **kwargs)

-                # Pre-review: apply past HITL lessons before human sees it
-                if learn and getattr(self, "memory", None) is not None:
-                    method_output = _pre_review_with_lessons(self, method_output)
-
+                # Request human feedback (may raise HumanFeedbackPending)
                raw_feedback = _request_feedback(self, method_output)
-                result = _process_feedback(self, method_output, raw_feedback)

-                # Distill: extract lessons from output + feedback, store in memory
-                if learn and getattr(self, "memory", None) is not None and raw_feedback.strip():
-                    _distill_and_store_lessons(self, method_output, raw_feedback)
-
-                return result
+                # Process and return
+                return _process_feedback(self, method_output, raw_feedback)

            wrapper = sync_wrapper

@@ -533,8 +397,6 @@ def human_feedback(
            default_outcome=default_outcome,
            metadata=metadata,
            provider=provider,
-            learn=learn,
-            learn_source=learn_source
        )
        wrapper.__is_flow_method__ = True

--- a/lib/crewai/src/crewai/flow/input_provider.py
+++ b/lib/crewai/src/crewai/flow/input_provider.py
@@ -1,151 +0,0 @@
-"""Input provider protocol for Flow.ask().
-
-This module provides the InputProvider protocol and InputResponse dataclass
-used by Flow.ask() to request input from users during flow execution.
-
-The default implementation is ``ConsoleProvider`` (from
-``crewai.flow.async_feedback.providers``), which serves both feedback
-and input collection via console.
-
-Example (default console input):
-    ```python
-    from crewai.flow import Flow, start
-
-
-    class MyFlow(Flow):
-        @start()
-        def gather_info(self):
-            topic = self.ask("What topic should we research?")
-            return topic
-    ```
-
-Example (custom provider with metadata):
-    ```python
-    from crewai.flow import Flow, start
-    from crewai.flow.input_provider import InputProvider, InputResponse
-
-
-    class SlackProvider:
-        def request_input(self, message, flow, metadata=None):
-            channel = metadata.get("channel", "#general") if metadata else "#general"
-            thread = self.post_question(channel, message)
-            reply = self.wait_for_reply(thread)
-            return InputResponse(
-                text=reply.text,
-                metadata={"responded_by": reply.user_id, "thread_id": thread.id},
-            )
-
-
-    class MyFlow(Flow):
-        input_provider = SlackProvider()
-
-        @start()
-        def gather_info(self):
-            topic = self.ask("What topic?", metadata={"channel": "#research"})
-            return topic
-    ```
-"""
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
-
-
-if TYPE_CHECKING:
-    from crewai.flow.flow import Flow
-
-
-@dataclass
-class InputResponse:
-    """Response from an InputProvider, optionally carrying metadata.
-
-    Simple providers can just return a string from ``request_input()``.
-    Providers that need to send metadata back (e.g., who responded,
-    thread ID, external timestamps) return an ``InputResponse`` instead.
-
-    ``ask()`` normalizes both cases -- callers always get ``str | None``.
-    The response metadata is stored in ``_input_history`` and emitted
-    in ``FlowInputReceivedEvent``.
-
-    Attributes:
-        text: The user's input text, or None if unavailable.
-        metadata: Optional metadata from the provider about the response
-            (e.g., who responded, thread ID, timestamps).
-
-    Example:
-        ```python
-        class MyProvider:
-            def request_input(self, message, flow, metadata=None):
-                response = get_response_from_external_system(message)
-                return InputResponse(
-                    text=response.text,
-                    metadata={"responded_by": response.user_id},
-                )
-        ```
-    """
-
-    text: str | None
-    metadata: dict[str, Any] | None = field(default=None)
-
-
-@runtime_checkable
-class InputProvider(Protocol):
-    """Protocol for user input collection strategies.
-
-    Implement this protocol to create custom input providers that integrate
-    with external systems like websockets, web UIs, Slack, or custom APIs.
-
-    The default provider is ``ConsoleProvider``, which blocks waiting for
-    console input via Python's built-in ``input()`` function.
-
-    Providers are always synchronous. The flow framework runs sync methods
-    in a thread pool (via ``asyncio.to_thread``), so ``ask()`` never blocks
-    the event loop even inside async flow methods.
-
-    Providers can return either:
-    - ``str | None`` for simple cases (no response metadata)
-    - ``InputResponse`` when they need to send metadata back with the answer
-
-    Example (simple):
-        ```python
-        class SimpleProvider:
-            def request_input(self, message: str, flow: Flow) -> str | None:
-                return input(message)
-        ```
-
-    Example (with metadata):
-        ```python
-        class SlackProvider:
-            def request_input(self, message, flow, metadata=None):
-                channel = metadata.get("channel") if metadata else "#general"
-                reply = self.post_and_wait(channel, message)
-                return InputResponse(
-                    text=reply.text,
-                    metadata={"responded_by": reply.user_id},
-                )
-        ```
-    """
-
-    def request_input(
-        self,
-        message: str,
-        flow: Flow[Any],
-        metadata: dict[str, Any] | None = None,
-    ) -> str | InputResponse | None:
-        """Request input from the user.
-
-        Args:
-            message: The question or prompt to display to the user.
-            flow: The Flow instance requesting input. Can be used to
-                access flow state, name, or other context.
-            metadata: Optional metadata from the caller, such as user ID,
-                channel, session context, etc. Providers can use this to
-                route the question to the right recipient.
-
-        Returns:
-            The user's input as a string, an ``InputResponse`` with text
-            and optional response metadata, or None if input is unavailable
-            (e.g., user cancelled, connection dropped).
-        """
-        ...
--- a/lib/crewai/src/crewai/flow/types.py
+++ b/lib/crewai/src/crewai/flow/types.py
@@ -4,7 +4,6 @@ This module contains TypedDict definitions and type aliases used throughout
 the Flow system.
 """

-from datetime import datetime
 from typing import (
    Annotated,
    Any,
@@ -102,30 +101,6 @@ class FlowData(TypedDict):
    flow_methods_attributes: list[FlowMethodData]


-class InputHistoryEntry(TypedDict):
-    """A single entry in the flow's input history from ``self.ask()``.
-
-    Each call to ``Flow.ask()`` appends one entry recording the question,
-    the user's response, which method asked, and any metadata exchanged
-    between the caller and the input provider.
-
-    Attributes:
-        message: The question or prompt that was displayed to the user.
-        response: The user's response, or None on timeout/error.
-        method_name: The flow method that called ``ask()``.
-        timestamp: When the input was received.
-        metadata: Metadata sent with the question (caller to provider).
-        response_metadata: Metadata received with the answer (provider to caller).
-    """
-
-    message: str
-    response: str | None
-    method_name: str
-    timestamp: datetime
-    metadata: dict[str, Any] | None
-    response_metadata: dict[str, Any] | None
-
-
 class FlowExecutionData(TypedDict):
    """Flow execution data.

--- a/lib/crewai/src/crewai/lite_agent.py
+++ b/lib/crewai/src/crewai/lite_agent.py
@@ -2,7 +2,6 @@ from __future__ import annotations

 import asyncio
 from collections.abc import Callable
-import time
 from functools import wraps
 import inspect
 import json
@@ -49,11 +48,6 @@ from crewai.events.types.agent_events import (
    LiteAgentExecutionErrorEvent,
    LiteAgentExecutionStartedEvent,
 )
-from crewai.events.types.memory_events import (
-    MemoryRetrievalCompletedEvent,
-    MemoryRetrievalFailedEvent,
-    MemoryRetrievalStartedEvent,
-)
 from crewai.events.types.logging_events import AgentLogsExecutionEvent
 from crewai.flow.flow_trackable import FlowTrackable
 from crewai.hooks.llm_hooks import get_after_llm_call_hooks, get_before_llm_call_hooks
@@ -250,10 +244,6 @@ class LiteAgent(FlowTrackable, BaseModel):
        description="A2A (Agent-to-Agent) configuration for delegating tasks to remote agents. "
        "Can be a single A2AConfig/A2AClientConfig/A2AServerConfig, or a list of configurations.",
    )
-    memory: bool | Any | None = Field(
-        default=None,
-        description="If True, use default Memory(). If Memory/MemoryScope/MemorySlice, use it for recall and remember.",
-    )
    tools_results: list[dict[str, Any]] = Field(
        default_factory=list, description="Results of the tools used by the agent."
    )
@@ -276,7 +266,6 @@ class LiteAgent(FlowTrackable, BaseModel):
    _after_llm_call_hooks: list[AfterLLMCallHookType] = PrivateAttr(
        default_factory=get_after_llm_call_hooks
    )
-    _memory: Any = PrivateAttr(default=None)

    @model_validator(mode="after")
    def emit_deprecation_warning(self) -> Self:
@@ -374,19 +363,6 @@ class LiteAgent(FlowTrackable, BaseModel):

        return self

-    @model_validator(mode="after")
-    def resolve_memory(self) -> Self:
-        """Resolve memory field to _memory: default Memory() when True, else user instance or None."""
-        if self.memory is True:
-            from crewai.memory.unified_memory import Memory
-
-            object.__setattr__(self, "_memory", Memory())
-        elif self.memory is not None and self.memory is not False:
-            object.__setattr__(self, "_memory", self.memory)
-        else:
-            object.__setattr__(self, "_memory", None)
-        return self
-
    @field_validator("guardrail", mode="before")
    @classmethod
    def validate_guardrail_function(
@@ -479,19 +455,6 @@ class LiteAgent(FlowTrackable, BaseModel):
        Returns:
            LiteAgentOutput: The result of the agent execution.
        """
-        # Inject memory tools once if memory is configured (mirrors Agent._prepare_kickoff)
-        if self._memory is not None:
-            from crewai.tools.memory_tools import create_memory_tools
-            from crewai.utilities.agent_utils import sanitize_tool_name
-
-            existing_names = {sanitize_tool_name(t.name) for t in self._parsed_tools}
-            memory_tools = [
-                mt for mt in create_memory_tools(self._memory)
-                if sanitize_tool_name(mt.name) not in existing_names
-            ]
-            if memory_tools:
-                self._parsed_tools = self._parsed_tools + parse_tools(memory_tools)
-
        # Create agent info for event emission
        agent_info = {
            "id": self.id,
@@ -511,7 +474,6 @@ class LiteAgent(FlowTrackable, BaseModel):
            self._messages = self._format_messages(
                messages, response_format=response_format, input_files=input_files
            )
-            self._inject_memory_context()

            return self._execute_core(
                agent_info=agent_info, response_format=response_format
@@ -534,80 +496,6 @@ class LiteAgent(FlowTrackable, BaseModel):
            )
            raise e

-    def _get_last_user_content(self) -> str:
-        """Get the last user message content from _messages for recall/input."""
-        for msg in reversed(self._messages):
-            if msg.get("role") == "user":
-                content = msg.get("content")
-                return content if isinstance(content, str) else ""
-        return ""
-
-    def _inject_memory_context(self) -> None:
-        """Recall relevant memories and append to the system message. No-op if _memory is None."""
-        if self._memory is None:
-            return
-        query = self._get_last_user_content()
-        crewai_event_bus.emit(
-            self,
-            event=MemoryRetrievalStartedEvent(
-                task_id=None,
-                source_type="lite_agent",
-            ),
-        )
-        start_time = time.time()
-        memory_block = ""
-        try:
-            matches = self._memory.recall(query, limit=10)
-            if matches:
-                memory_block = "Relevant memories:\n" + "\n".join(
-                    f"- {m.record.content}" for m in matches
-                )
-            if memory_block:
-                formatted = self.i18n.slice("memory").format(memory=memory_block)
-                if self._messages and self._messages[0].get("role") == "system":
-                    self._messages[0]["content"] = (
-                        self._messages[0].get("content", "") + "\n\n" + formatted
-                    )
-            crewai_event_bus.emit(
-                self,
-                event=MemoryRetrievalCompletedEvent(
-                    task_id=None,
-                    memory_content=memory_block,
-                    retrieval_time_ms=(time.time() - start_time) * 1000,
-                    source_type="lite_agent",
-                ),
-            )
-        except Exception as e:
-            crewai_event_bus.emit(
-                self,
-                event=MemoryRetrievalFailedEvent(
-                    task_id=None,
-                    source_type="lite_agent",
-                    error=str(e),
-                ),
-            )
-
-    def _save_to_memory(self, output_text: str) -> None:
-        """Extract discrete memories from the run and remember each. No-op if _memory is None."""
-        if self._memory is None:
-            return
-        input_str = self._get_last_user_content() or "User request"
-        try:
-            raw = (
-                f"Input: {input_str}\n"
-                f"Agent: {self.role}\n"
-                f"Result: {output_text}"
-            )
-            extracted = self._memory.extract_memories(raw)
-            if extracted:
-                self._memory.remember_many(extracted, agent_role=self.role)
-        except Exception as e:
-            if self.verbose:
-                self._printer.print(
-                    content=f"Failed to save to memory: {e}",
-                    color="yellow",
-                )
-
    def _execute_core(
        self, agent_info: dict[str, Any], response_format: type[BaseModel] | None = None
    ) -> LiteAgentOutput:
@@ -623,8 +511,6 @@ class LiteAgent(FlowTrackable, BaseModel):

        # Execute the agent using invoke loop
        agent_finish = self._invoke_loop()
-        if self._memory is not None:
-            self._save_to_memory(agent_finish.output)
        formatted_result: BaseModel | None = None

        active_response_format = response_format or self.response_format
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -419,22 +419,8 @@ class LLM(BaseLLM):

        # FALLBACK to LiteLLM
        if not LITELLM_AVAILABLE:
-            native_list = ", ".join(SUPPORTED_NATIVE_PROVIDERS)
-            error_msg = (
-                f"Unable to initialize LLM with model '{model}'. "
-                f"The model did not match any supported native provider "
-                f"({native_list}), and the LiteLLM fallback package is not "
-                f"installed.\n\n"
-                f"To fix this, either:\n"
-                f"  1. Install LiteLLM for broad model support: "
-                f"uv add litellm\n"
-                f"or\n"
-                f"pip install litellm\n\n"
-                f"For more details, see: "
-                f"https://docs.crewai.com/en/learn/llm-connections"
-            )
-            logger.error(error_msg)
-            raise ImportError(error_msg) from None
+            logger.error("LiteLLM is not available, falling back to LiteLLM")
+            raise ImportError("Fallback to LiteLLM is not available") from None

        instance = object.__new__(cls)
        super(LLM, instance).__init__(model=model, is_litellm=True, **kwargs)
--- a/lib/crewai/src/crewai/memory/init.py
+++ b/lib/crewai/src/crewai/memory/init.py
@@ -1,27 +1,13 @@
-"""Memory module: unified Memory with LLM analysis and pluggable storage."""
+from crewai.memory.entity.entity_memory import EntityMemory
+from crewai.memory.external.external_memory import ExternalMemory
+from crewai.memory.long_term.long_term_memory import LongTermMemory
+from crewai.memory.short_term.short_term_memory import ShortTermMemory

-from crewai.memory.encoding_flow import EncodingFlow
-from crewai.memory.memory_scope import MemoryScope, MemorySlice
-from crewai.memory.types import (
-    MemoryMatch,
-    MemoryRecord,
-    ScopeInfo,
-    compute_composite_score,
-    embed_text,
-    embed_texts,
-)
-from crewai.memory.unified_memory import Memory


 __all__ = [
-    "EncodingFlow",
-    "Memory",
-    "MemoryMatch",
-    "MemoryRecord",
-    "MemoryScope",
-    "MemorySlice",
-    "ScopeInfo",
-    "compute_composite_score",
-    "embed_text",
-    "embed_texts",
+    "EntityMemory",
+    "ExternalMemory",
+    "LongTermMemory",
+    "ShortTermMemory",
 ]
--- a/lib/crewai/src/crewai/memory/analyze.py
+++ b/lib/crewai/src/crewai/memory/analyze.py
@@ -1,371 +0,0 @@
-"""LLM-powered analysis for memory save and recall."""
-
-from __future__ import annotations
-
-import json
-import logging
-from typing import Any
-
-from pydantic import BaseModel, ConfigDict, Field
-
-from crewai.memory.types import MemoryRecord, ScopeInfo
-from crewai.utilities.i18n import get_i18n
-
-
-_logger = logging.getLogger(__name__)
-
-
-class ExtractedMetadata(BaseModel):
-    """Fixed schema for LLM-extracted metadata (OpenAI requires additionalProperties: false)."""
-
-    model_config = ConfigDict(extra="forbid")
-
-    entities: list[str] = Field(
-        default_factory=list,
-        description="Entities (people, orgs, places) mentioned in the content.",
-    )
-    dates: list[str] = Field(
-        default_factory=list,
-        description="Dates or time references in the content.",
-    )
-    topics: list[str] = Field(
-        default_factory=list,
-        description="Topics or themes in the content.",
-    )
-
-
-class MemoryAnalysis(BaseModel):
-    """LLM output for analyzing content before saving to memory."""
-
-    suggested_scope: str = Field(
-        description="Best matching existing scope or new path (e.g. /company/decisions).",
-    )
-    categories: list[str] = Field(
-        default_factory=list,
-        description="Categories for the memory (prefer existing, add new if needed).",
-    )
-    importance: float = Field(
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-        description="Importance score from 0.0 to 1.0.",
-    )
-    extracted_metadata: ExtractedMetadata = Field(
-        default_factory=ExtractedMetadata,
-        description="Entities, dates, topics extracted from the content.",
-    )
-
-
-class QueryAnalysis(BaseModel):
-    """LLM output for analyzing a recall query."""
-
-    keywords: list[str] = Field(
-        default_factory=list,
-        description="Key entities or keywords for filtering.",
-    )
-    suggested_scopes: list[str] = Field(
-        default_factory=list,
-        description="Scope paths to search (subset of available scopes).",
-    )
-    complexity: str = Field(
-        default="simple",
-        description="One of 'simple' (single fact) or 'complex' (aggregation/reasoning).",
-    )
-    recall_queries: list[str] = Field(
-        default_factory=list,
-        description=(
-            "1-3 short, targeted search phrases distilled from the query. "
-            "Each should be a concise question or keyword phrase optimized "
-            "for semantic vector search. If the query is already short and "
-            "focused, return it as a single item."
-        ),
-    )
-    time_filter: str | None = Field(
-        default=None,
-        description=(
-            "If the query references a specific time period (e.g. 'last week', "
-            "'yesterday', 'in January'), return an ISO 8601 date string representing "
-            "the earliest date that results should match (e.g. '2026-02-01'). "
-            "Return null if no time constraint is implied."
-        ),
-    )
-
-
-class ExtractedMemories(BaseModel):
-    """LLM output for extracting discrete memories from raw content."""
-
-    memories: list[str] = Field(
-        default_factory=list,
-        description="List of discrete, self-contained memory statements extracted from the content.",
-    )
-
-
-class ConsolidationAction(BaseModel):
-    """A single action in a consolidation plan."""
-
-    model_config = ConfigDict(extra="forbid")
-
-    action: str = Field(
-        description="One of 'keep', 'update', or 'delete'.",
-    )
-    record_id: str = Field(
-        description="ID of the existing record this action applies to.",
-    )
-    new_content: str | None = Field(
-        default=None,
-        description="Updated content text. Required when action is 'update'.",
-    )
-    reason: str = Field(
-        default="",
-        description="Brief reason for this action.",
-    )
-
-
-class ConsolidationPlan(BaseModel):
-    """LLM output for consolidating new content with existing memories."""
-
-    model_config = ConfigDict(extra="forbid")
-
-    actions: list[ConsolidationAction] = Field(
-        default_factory=list,
-        description="Actions to take on existing records (keep/update/delete).",
-    )
-    insert_new: bool = Field(
-        default=True,
-        description="Whether to also insert the new content as a separate record.",
-    )
-    insert_reason: str = Field(
-        default="",
-        description="Why the new content should or should not be inserted.",
-    )
-
-
-def _get_prompt(key: str) -> str:
-    """Retrieve a memory prompt from the i18n translations.
-
-    Args:
-        key: The prompt key under the "memory" section.
-
-    Returns:
-        The prompt string.
-    """
-    return get_i18n().memory(key)
-
-
-def extract_memories_from_content(content: str, llm: Any) -> list[str]:
-    """Use the LLM to extract discrete memory statements from raw content.
-
-    This is a pure helper: it does NOT store anything. Callers should call
-    memory.remember() on each returned string to persist them.
-
-    On LLM failure, returns the full content as a single memory so callers
-    still persist something rather than dropping the output.
-
-    Args:
-        content: Raw text (e.g. task description + result dump).
-        llm: The LLM instance to use.
-
-    Returns:
-        List of short, self-contained memory statements (or [content] on failure).
-    """
-    if not (content or "").strip():
-        return []
-    user = _get_prompt("extract_memories_user").format(content=content)
-    messages = [
-        {"role": "system", "content": _get_prompt("extract_memories_system")},
-        {"role": "user", "content": user},
-    ]
-    try:
-        if getattr(llm, "supports_function_calling", lambda: False)():
-            response = llm.call(messages, response_model=ExtractedMemories)
-            if isinstance(response, ExtractedMemories):
-                return response.memories
-            return ExtractedMemories.model_validate(response).memories
-        response = llm.call(messages)
-        if isinstance(response, ExtractedMemories):
-            return response.memories
-        if isinstance(response, str):
-            data = json.loads(response)
-            return ExtractedMemories.model_validate(data).memories
-        return ExtractedMemories.model_validate(response).memories
-    except Exception as e:
-        _logger.warning(
-            "Memory extraction failed, storing full content as single memory: %s",
-            e,
-            exc_info=False,
-        )
-        return [content]
-
-
-def analyze_query(
-    query: str,
-    available_scopes: list[str],
-    scope_info: ScopeInfo | None,
-    llm: Any,
-) -> QueryAnalysis:
-    """Use the LLM to analyze a recall query.
-
-    On LLM failure, returns safe defaults so recall degrades to plain vector search.
-
-    Args:
-        query: The user's recall query.
-        available_scopes: Scope paths that exist in the store.
-        scope_info: Optional info about the current scope.
-        llm: The LLM instance to use.
-
-    Returns:
-        QueryAnalysis with keywords, suggested_scopes, complexity, recall_queries, time_filter.
-    """
-    scope_desc = ""
-    if scope_info:
-        scope_desc = f"Current scope has {scope_info.record_count} records, categories: {scope_info.categories}"
-    user = _get_prompt("query_user").format(
-        query=query,
-        available_scopes=available_scopes or ["/"],
-        scope_desc=scope_desc,
-    )
-    messages = [
-        {"role": "system", "content": _get_prompt("query_system")},
-        {"role": "user", "content": user},
-    ]
-    try:
-        if getattr(llm, "supports_function_calling", lambda: False)():
-            response = llm.call(messages, response_model=QueryAnalysis)
-            if isinstance(response, QueryAnalysis):
-                return response
-            return QueryAnalysis.model_validate(response)
-        response = llm.call(messages)
-        if isinstance(response, QueryAnalysis):
-            return response
-        if isinstance(response, str):
-            data = json.loads(response)
-            return QueryAnalysis.model_validate(data)
-        return QueryAnalysis.model_validate(response)
-    except Exception as e:
-        _logger.warning(
-            "Query analysis failed, using defaults (complexity=simple): %s",
-            e,
-            exc_info=False,
-        )
-        scopes = (available_scopes or ["/"])[:5]
-        return QueryAnalysis(
-            keywords=[],
-            suggested_scopes=scopes,
-            complexity="simple",
-            recall_queries=[query],
-        )
-
-
-_SAVE_DEFAULTS = MemoryAnalysis(
-    suggested_scope="/",
-    categories=[],
-    importance=0.5,
-    extracted_metadata=ExtractedMetadata(),
-)
-
-
-def analyze_for_save(
-    content: str,
-    existing_scopes: list[str],
-    existing_categories: list[str],
-    llm: Any,
-) -> MemoryAnalysis:
-    """Infer scope, categories, importance, and metadata for a single memory.
-
-    Uses the small ``MemoryAnalysis`` schema (4 fields) for fast LLM response.
-    On failure, returns safe defaults so the memory still gets persisted.
-
-    Args:
-        content: The memory content to analyze.
-        existing_scopes: Current scope paths in the memory store.
-        existing_categories: Current categories in use.
-        llm: The LLM instance to use.
-
-    Returns:
-        MemoryAnalysis with suggested_scope, categories, importance, extracted_metadata.
-    """
-    user = _get_prompt("save_user").format(
-        content=content,
-        existing_scopes=existing_scopes or ["/"],
-        existing_categories=existing_categories or [],
-    )
-    messages = [
-        {"role": "system", "content": _get_prompt("save_system")},
-        {"role": "user", "content": user},
-    ]
-    try:
-        if getattr(llm, "supports_function_calling", lambda: False)():
-            response = llm.call(messages, response_model=MemoryAnalysis)
-            if isinstance(response, MemoryAnalysis):
-                return response
-            return MemoryAnalysis.model_validate(response)
-        response = llm.call(messages)
-        if isinstance(response, MemoryAnalysis):
-            return response
-        if isinstance(response, str):
-            data = json.loads(response)
-            return MemoryAnalysis.model_validate(data)
-        return MemoryAnalysis.model_validate(response)
-    except Exception as e:
-        _logger.warning(
-            "Memory save analysis failed, using defaults: %s", e, exc_info=False,
-        )
-        return _SAVE_DEFAULTS
-
-
-_CONSOLIDATION_DEFAULT = ConsolidationPlan(actions=[], insert_new=True)
-
-
-def analyze_for_consolidation(
-    new_content: str,
-    existing_records: list[MemoryRecord],
-    llm: Any,
-) -> ConsolidationPlan:
-    """Decide insert/update/delete for a single memory against similar existing records.
-
-    Uses the small ``ConsolidationPlan`` schema (3 fields) for fast LLM response.
-    On failure, returns a safe default (insert_new=True) so the memory still gets persisted.
-
-    Args:
-        new_content: The new content to store.
-        existing_records: Existing records that are semantically similar.
-        llm: The LLM instance to use.
-
-    Returns:
-        ConsolidationPlan with actions per record and whether to insert the new content.
-    """
-    if not existing_records:
-        return ConsolidationPlan(actions=[], insert_new=True)
-    records_lines: list[str] = []
-    for r in existing_records:
-        created = r.created_at.isoformat() if r.created_at else ""
-        records_lines.append(
-            f"- id={r.id} | scope={r.scope} | importance={r.importance:.2f} | created={created}\n"
-            f"  content: {r.content[:200]}{'...' if len(r.content) > 200 else ''}"
-        )
-    user = _get_prompt("consolidation_user").format(
-        new_content=new_content,
-        records_summary="\n\n".join(records_lines),
-    )
-    messages = [
-        {"role": "system", "content": _get_prompt("consolidation_system")},
-        {"role": "user", "content": user},
-    ]
-    try:
-        if getattr(llm, "supports_function_calling", lambda: False)():
-            response = llm.call(messages, response_model=ConsolidationPlan)
-            if isinstance(response, ConsolidationPlan):
-                return response
-            return ConsolidationPlan.model_validate(response)
-        response = llm.call(messages)
-        if isinstance(response, ConsolidationPlan):
-            return response
-        if isinstance(response, str):
-            data = json.loads(response)
-            return ConsolidationPlan.model_validate(data)
-        return ConsolidationPlan.model_validate(response)
-    except Exception as e:
-        _logger.warning(
-            "Consolidation analysis failed, defaulting to insert: %s", e, exc_info=False,
-        )
-        return _CONSOLIDATION_DEFAULT
--- a/lib/crewai/src/crewai/memory/contextual/init.py
+++ b/lib/crewai/src/crewai/memory/contextual/init.py
--- a/lib/crewai/src/crewai/memory/contextual/contextual_memory.py
+++ b/lib/crewai/src/crewai/memory/contextual/contextual_memory.py
@@ -0,0 +1,254 @@
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING
+
+from crewai.memory import (
+    EntityMemory,
+    ExternalMemory,
+    LongTermMemory,
+    ShortTermMemory,
+)
+
+
+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.task import Task
+
+
+class ContextualMemory:
+    """Aggregates and retrieves context from multiple memory sources."""
+
+    def __init__(
+        self,
+        stm: ShortTermMemory,
+        ltm: LongTermMemory,
+        em: EntityMemory,
+        exm: ExternalMemory,
+        agent: Agent | None = None,
+        task: Task | None = None,
+    ) -> None:
+        self.stm = stm
+        self.ltm = ltm
+        self.em = em
+        self.exm = exm
+        self.agent = agent
+        self.task = task
+
+        if self.stm is not None:
+            self.stm.agent = self.agent
+            self.stm.task = self.task
+        if self.ltm is not None:
+            self.ltm.agent = self.agent
+            self.ltm.task = self.task
+        if self.em is not None:
+            self.em.agent = self.agent
+            self.em.task = self.task
+        if self.exm is not None:
+            self.exm.agent = self.agent
+            self.exm.task = self.task
+
+    def build_context_for_task(self, task: Task, context: str) -> str:
+        """Build contextual information for a task synchronously.
+
+        Args:
+            task: The task to build context for.
+            context: Additional context string.
+
+        Returns:
+            Formatted context string from all memory sources.
+        """
+        query = f"{task.description} {context}".strip()
+
+        if query == "":
+            return ""
+
+        context_parts = [
+            self._fetch_ltm_context(task.description),
+            self._fetch_stm_context(query),
+            self._fetch_entity_context(query),
+            self._fetch_external_context(query),
+        ]
+        return "\n".join(filter(None, context_parts))
+
+    async def abuild_context_for_task(self, task: Task, context: str) -> str:
+        """Build contextual information for a task asynchronously.
+
+        Args:
+            task: The task to build context for.
+            context: Additional context string.
+
+        Returns:
+            Formatted context string from all memory sources.
+        """
+        query = f"{task.description} {context}".strip()
+
+        if query == "":
+            return ""
+
+        # Fetch all contexts concurrently
+        results = await asyncio.gather(
+            self._afetch_ltm_context(task.description),
+            self._afetch_stm_context(query),
+            self._afetch_entity_context(query),
+            self._afetch_external_context(query),
+        )
+
+        return "\n".join(filter(None, results))
+
+    def _fetch_stm_context(self, query: str) -> str:
+        """
+        Fetches recent relevant insights from STM related to the task's description and expected_output,
+        formatted as bullet points.
+        """
+
+        if self.stm is None:
+            return ""
+
+        stm_results = self.stm.search(query)
+        formatted_results = "\n".join(
+            [f"- {result['content']}" for result in stm_results]
+        )
+        return f"Recent Insights:\n{formatted_results}" if stm_results else ""
+
+    def _fetch_ltm_context(self, task: str) -> str | None:
+        """
+        Fetches historical data or insights from LTM that are relevant to the task's description and expected_output,
+        formatted as bullet points.
+        """
+
+        if self.ltm is None:
+            return ""
+
+        ltm_results = self.ltm.search(task, latest_n=2)
+        if not ltm_results:
+            return None
+
+        formatted_results = [
+            suggestion
+            for result in ltm_results
+            for suggestion in result["metadata"]["suggestions"]
+        ]
+        formatted_results = list(dict.fromkeys(formatted_results))
+        formatted_results = "\n".join([f"- {result}" for result in formatted_results])  # type: ignore # Incompatible types in assignment (expression has type "str", variable has type "list[str]")
+
+        return f"Historical Data:\n{formatted_results}" if ltm_results else ""
+
+    def _fetch_entity_context(self, query: str) -> str:
+        """
+        Fetches relevant entity information from Entity Memory related to the task's description and expected_output,
+        formatted as bullet points.
+        """
+        if self.em is None:
+            return ""
+
+        em_results = self.em.search(query)
+        formatted_results = "\n".join(
+            [f"- {result['content']}" for result in em_results]
+        )
+        return f"Entities:\n{formatted_results}" if em_results else ""
+
+    def _fetch_external_context(self, query: str) -> str:
+        """
+        Fetches and formats relevant information from External Memory.
+        Args:
+            query (str): The search query to find relevant information.
+        Returns:
+            str: Formatted information as bullet points, or an empty string if none found.
+        """
+        if self.exm is None:
+            return ""
+
+        external_memories = self.exm.search(query)
+
+        if not external_memories:
+            return ""
+
+        formatted_memories = "\n".join(
+            f"- {result['content']}" for result in external_memories
+        )
+        return f"External memories:\n{formatted_memories}"
+
+    async def _afetch_stm_context(self, query: str) -> str:
+        """Fetch recent relevant insights from STM asynchronously.
+
+        Args:
+            query: The search query.
+
+        Returns:
+            Formatted insights as bullet points, or empty string if none found.
+        """
+        if self.stm is None:
+            return ""
+
+        stm_results = await self.stm.asearch(query)
+        formatted_results = "\n".join(
+            [f"- {result['content']}" for result in stm_results]
+        )
+        return f"Recent Insights:\n{formatted_results}" if stm_results else ""
+
+    async def _afetch_ltm_context(self, task: str) -> str | None:
+        """Fetch historical data from LTM asynchronously.
+
+        Args:
+            task: The task description to search for.
+
+        Returns:
+            Formatted historical data as bullet points, or None if none found.
+        """
+        if self.ltm is None:
+            return ""
+
+        ltm_results = await self.ltm.asearch(task, latest_n=2)
+        if not ltm_results:
+            return None
+
+        formatted_results = [
+            suggestion
+            for result in ltm_results
+            for suggestion in result["metadata"]["suggestions"]
+        ]
+        formatted_results = list(dict.fromkeys(formatted_results))
+        formatted_results = "\n".join([f"- {result}" for result in formatted_results])  # type: ignore # Incompatible types in assignment (expression has type "str", variable has type "list[str]")
+
+        return f"Historical Data:\n{formatted_results}" if ltm_results else ""
+
+    async def _afetch_entity_context(self, query: str) -> str:
+        """Fetch relevant entity information asynchronously.
+
+        Args:
+            query: The search query.
+
+        Returns:
+            Formatted entity information as bullet points, or empty string if none found.
+        """
+        if self.em is None:
+            return ""
+
+        em_results = await self.em.asearch(query)
+        formatted_results = "\n".join(
+            [f"- {result['content']}" for result in em_results]
+        )
+        return f"Entities:\n{formatted_results}" if em_results else ""
+
+    async def _afetch_external_context(self, query: str) -> str:
+        """Fetch relevant information from External Memory asynchronously.
+
+        Args:
+            query: The search query.
+
+        Returns:
+            Formatted information as bullet points, or empty string if none found.
+        """
+        if self.exm is None:
+            return ""
+
+        external_memories = await self.exm.asearch(query)
+
+        if not external_memories:
+            return ""
+
+        formatted_memories = "\n".join(
+            f"- {result['content']}" for result in external_memories
+        )
+        return f"External memories:\n{formatted_memories}"
--- a/lib/crewai/src/crewai/memory/encoding_flow.py
+++ b/lib/crewai/src/crewai/memory/encoding_flow.py
@@ -1,444 +0,0 @@
-"""Batch-native encoding flow: full save pipeline for one or more memories.
-
-Orchestrates the encoding side of memory in a single Flow with 5 steps:
-1. Batch embed (ONE embedder call for all items)
-2. Intra-batch dedup (cosine matrix, drop near-exact duplicates)
-3. Parallel find similar (concurrent storage searches)
-4. Parallel analyze (N concurrent LLM calls -- field resolution + consolidation)
-5. Execute plans (batch re-embed updates + bulk insert)
-"""
-
-from __future__ import annotations
-
-from concurrent.futures import Future, ThreadPoolExecutor
-from datetime import datetime
-import math
-from typing import Any
-from uuid import uuid4
-
-from pydantic import BaseModel, Field
-
-from crewai.flow.flow import Flow, listen, start
-from crewai.memory.analyze import (
-    ConsolidationPlan,
-    MemoryAnalysis,
-    analyze_for_consolidation,
-    analyze_for_save,
-)
-from crewai.memory.types import MemoryConfig, MemoryRecord, embed_texts
-
-
-# ---------------------------------------------------------------------------
-# State models
-# ---------------------------------------------------------------------------
-
-
-class ItemState(BaseModel):
-    """Per-item tracking within a batch."""
-
-    content: str = ""
-    # Caller-provided (None = infer via LLM)
-    scope: str | None = None
-    categories: list[str] | None = None
-    metadata: dict[str, Any] | None = None
-    importance: float | None = None
-    source: str | None = None
-    private: bool = False
-    # Resolved values
-    resolved_scope: str = "/"
-    resolved_categories: list[str] = Field(default_factory=list)
-    resolved_metadata: dict[str, Any] = Field(default_factory=dict)
-    resolved_importance: float = 0.5
-    resolved_source: str | None = None
-    resolved_private: bool = False
-    # Embedding
-    embedding: list[float] = Field(default_factory=list)
-    # Intra-batch dedup
-    dropped: bool = False
-    # Consolidation
-    similar_records: list[MemoryRecord] = Field(default_factory=list)
-    top_similarity: float = 0.0
-    plan: ConsolidationPlan | None = None
-    result_record: MemoryRecord | None = None
-
-
-class EncodingState(BaseModel):
-    """Batch-level state for the encoding flow."""
-
-    id: str = Field(default_factory=lambda: str(uuid4()))
-    items: list[ItemState] = Field(default_factory=list)
-    # Aggregate stats
-    records_inserted: int = 0
-    records_updated: int = 0
-    records_deleted: int = 0
-    items_dropped_dedup: int = 0
-
-
-# ---------------------------------------------------------------------------
-# Flow
-# ---------------------------------------------------------------------------
-
-
-class EncodingFlow(Flow[EncodingState]):
-    """Batch-native encoding pipeline for memory.remember() / remember_many().
-
-    Processes N items through 5 sequential steps, maximising parallelism:
-    - ONE embedder call for all items
-    - N concurrent storage searches
-    - N concurrent individual LLM calls (field resolution + consolidation)
-    - ONE batch re-embed for updates + ONE bulk storage write
-    """
-
-    _skip_auto_memory: bool = True
-
-    initial_state = EncodingState
-
-    def __init__(
-        self,
-        storage: Any,
-        llm: Any,
-        embedder: Any,
-        config: MemoryConfig | None = None,
-    ) -> None:
-        super().__init__(suppress_flow_events=True)
-        self._storage = storage
-        self._llm = llm
-        self._embedder = embedder
-        self._config = config or MemoryConfig()
-
-    # ------------------------------------------------------------------
-    # Step 1: Batch embed (ONE embedder call)
-    # ------------------------------------------------------------------
-
-    @start()
-    def batch_embed(self) -> None:
-        """Embed all items in a single embedder call."""
-        items = list(self.state.items)
-        texts = [item.content for item in items]
-        embeddings = embed_texts(self._embedder, texts)
-        for item, emb in zip(items, embeddings, strict=False):
-            item.embedding = emb
-
-    # ------------------------------------------------------------------
-    # Step 2: Intra-batch dedup (cosine similarity matrix)
-    # ------------------------------------------------------------------
-
-    @listen(batch_embed)
-    def intra_batch_dedup(self) -> None:
-        """Drop near-exact duplicates within the batch."""
-        items = list(self.state.items)
-        if len(items) <= 1:
-            return
-
-        threshold = self._config.batch_dedup_threshold
-        n = len(items)
-        for j in range(1, n):
-            if items[j].dropped or not items[j].embedding:
-                continue
-            for i in range(j):
-                if items[i].dropped or not items[i].embedding:
-                    continue
-                sim = self._cosine_similarity(items[i].embedding, items[j].embedding)
-                if sim >= threshold:
-                    items[j].dropped = True
-                    self.state.items_dropped_dedup += 1
-                    break
-
-    @staticmethod
-    def _cosine_similarity(a: list[float], b: list[float]) -> float:
-        """Compute cosine similarity between two vectors."""
-        if len(a) != len(b) or not a:
-            return 0.0
-        dot = sum(x * y for x, y in zip(a, b, strict=False))
-        norm_a = math.sqrt(sum(x * x for x in a))
-        norm_b = math.sqrt(sum(x * x for x in b))
-        if norm_a == 0.0 or norm_b == 0.0:
-            return 0.0
-        return dot / (norm_a * norm_b)
-
-    # ------------------------------------------------------------------
-    # Step 3: Parallel find similar (concurrent storage searches)
-    # ------------------------------------------------------------------
-
-    @listen(intra_batch_dedup)
-    def parallel_find_similar(self) -> None:
-        """Search storage for similar records, concurrently for all active items."""
-        items = list(self.state.items)
-        active = [(i, item) for i, item in enumerate(items) if not item.dropped and item.embedding]
-
-        if not active:
-            return
-
-        def _search_one(item: ItemState) -> list[tuple[MemoryRecord, float]]:
-            scope_prefix = item.scope if item.scope and item.scope.strip("/") else None
-            return self._storage.search(
-                item.embedding,
-                scope_prefix=scope_prefix,
-                categories=None,
-                limit=self._config.consolidation_limit,
-                min_score=0.0,
-            )
-
-        if len(active) == 1:
-            _, item = active[0]
-            raw = _search_one(item)
-            item.similar_records = [r for r, _ in raw]
-            item.top_similarity = float(raw[0][1]) if raw else 0.0
-        else:
-            with ThreadPoolExecutor(max_workers=min(len(active), 8)) as pool:
-                futures = [(i, item, pool.submit(_search_one, item)) for i, item in active]
-                for _, item, future in futures:
-                    raw = future.result()
-                    item.similar_records = [r for r, _ in raw]
-                    item.top_similarity = float(raw[0][1]) if raw else 0.0
-
-    # ------------------------------------------------------------------
-    # Step 4: Parallel analyze (N concurrent LLM calls)
-    # ------------------------------------------------------------------
-
-    @listen(parallel_find_similar)
-    def parallel_analyze(self) -> None:
-        """Field resolution + consolidation via parallel individual LLM calls.
-
-        Classifies each active item into one of four groups:
-        - Group A: fields provided + no similar records -> fast insert, 0 LLM calls.
-        - Group B: fields provided + similar records above threshold -> 1 consolidation call.
-        - Group C: fields missing + no similar records -> 1 field-resolution call.
-        - Group D: fields missing + similar records above threshold -> 2 concurrent calls.
-
-        All LLM calls across all items run in parallel via ThreadPoolExecutor.
-        """
-        items = list(self.state.items)
-        threshold = self._config.consolidation_threshold
-
-        # Pre-fetch scope/category info (shared across all field-resolution calls)
-        any_needs_fields = any(
-            not it.dropped
-            and (it.scope is None or it.categories is None or it.importance is None)
-            for it in items
-        )
-        existing_scopes: list[str] = []
-        existing_categories: list[str] = []
-        if any_needs_fields:
-            existing_scopes = self._storage.list_scopes("/") or ["/"]
-            existing_categories = list(
-                self._storage.list_categories(scope_prefix=None).keys()
-            )
-
-        # Classify items and submit LLM calls
-        save_futures: dict[int, Future[MemoryAnalysis]] = {}
-        consol_futures: dict[int, Future[ConsolidationPlan]] = {}
-
-        pool = ThreadPoolExecutor(max_workers=10)
-        try:
-            for i, item in enumerate(items):
-                if item.dropped:
-                    continue
-
-                fields_provided = (
-                    item.scope is not None
-                    and item.categories is not None
-                    and item.importance is not None
-                )
-                has_similar = item.top_similarity >= threshold
-
-                if fields_provided and not has_similar:
-                    # Group A: fast path
-                    self._apply_defaults(item)
-                    item.plan = ConsolidationPlan(actions=[], insert_new=True)
-                elif fields_provided and has_similar:
-                    # Group B: consolidation only
-                    self._apply_defaults(item)
-                    consol_futures[i] = pool.submit(
-                        analyze_for_consolidation,
-                        item.content, list(item.similar_records), self._llm,
-                    )
-                elif not fields_provided and not has_similar:
-                    # Group C: field resolution only
-                    save_futures[i] = pool.submit(
-                        analyze_for_save,
-                        item.content, existing_scopes, existing_categories, self._llm,
-                    )
-                else:
-                    # Group D: both in parallel
-                    save_futures[i] = pool.submit(
-                        analyze_for_save,
-                        item.content, existing_scopes, existing_categories, self._llm,
-                    )
-                    consol_futures[i] = pool.submit(
-                        analyze_for_consolidation,
-                        item.content, list(item.similar_records), self._llm,
-                    )
-
-            # Collect field-resolution results
-            for i, future in save_futures.items():
-                analysis = future.result()
-                item = items[i]
-                item.resolved_scope = item.scope or analysis.suggested_scope or "/"
-                item.resolved_categories = (
-                    item.categories
-                    if item.categories is not None
-                    else analysis.categories
-                )
-                item.resolved_importance = (
-                    item.importance
-                    if item.importance is not None
-                    else analysis.importance
-                )
-                item.resolved_metadata = dict(
-                    item.metadata or {},
-                    **(
-                        analysis.extracted_metadata.model_dump()
-                        if analysis.extracted_metadata
-                        else {}
-                    ),
-                )
-                item.resolved_source = item.source
-                item.resolved_private = item.private
-                # If no consolidation future, it's Group C -> insert
-                if i not in consol_futures:
-                    item.plan = ConsolidationPlan(actions=[], insert_new=True)
-
-            # Collect consolidation results
-            for i, future in consol_futures.items():
-                items[i].plan = future.result()
-        finally:
-            pool.shutdown(wait=False)
-
-    def _apply_defaults(self, item: ItemState) -> None:
-        """Apply caller values with config defaults (fast path)."""
-        item.resolved_scope = item.scope or "/"
-        item.resolved_categories = item.categories or []
-        item.resolved_metadata = item.metadata or {}
-        item.resolved_importance = (
-            item.importance
-            if item.importance is not None
-            else self._config.default_importance
-        )
-        item.resolved_source = item.source
-        item.resolved_private = item.private
-
-    # ------------------------------------------------------------------
-    # Step 5: Execute plans (batch re-embed + bulk insert)
-    # ------------------------------------------------------------------
-
-    @listen(parallel_analyze)
-    def execute_plans(self) -> None:
-        """Apply all consolidation plans with batch re-embedding and bulk insert.
-
-        Actions are deduplicated across items before applying: when multiple
-        items reference the same existing record (e.g. both want to delete it),
-        only the first action is applied. This prevents LanceDB commit
-        conflicts from two operations targeting the same record.
-        """
-        items = list(self.state.items)
-        now = datetime.utcnow()
-
-        # --- Deduplicate actions across all items ---
-        # Multiple items may reference the same existing record (because their
-        # similar_records overlap). Collect one action per record_id, first wins.
-        # Also build a map from record_id to the original MemoryRecord for updates.
-        dedup_deletes: set[str] = set()  # record_ids to delete
-        dedup_updates: dict[str, tuple[int, str]] = {}  # record_id -> (item_idx, new_content)
-        all_similar: dict[str, MemoryRecord] = {}  # record_id -> MemoryRecord
-
-        for i, item in enumerate(items):
-            if item.dropped or item.plan is None:
-                continue
-            for r in item.similar_records:
-                if r.id not in all_similar:
-                    all_similar[r.id] = r
-            for action in item.plan.actions:
-                rid = action.record_id
-                if action.action == "delete" and rid not in dedup_deletes and rid not in dedup_updates:
-                    dedup_deletes.add(rid)
-                elif action.action == "update" and action.new_content and rid not in dedup_deletes and rid not in dedup_updates:
-                    dedup_updates[rid] = (i, action.new_content)
-
-        # --- Batch re-embed all update contents in ONE call ---
-        update_list = list(dedup_updates.items())  # [(record_id, (item_idx, new_content)), ...]
-        update_embeddings: list[list[float]] = []
-        if update_list:
-            update_contents = [content for _, (_, content) in update_list]
-            update_embeddings = embed_texts(self._embedder, update_contents)
-
-        update_emb_map: dict[str, list[float]] = {}
-        for (rid, _), emb in zip(update_list, update_embeddings, strict=False):
-            update_emb_map[rid] = emb
-
-        # --- Apply all storage mutations under one lock ---
-        # Hold the write lock for the entire delete + update + insert sequence
-        # so no other pipeline can interleave and cause version conflicts.
-        # The lock is reentrant (RLock), so the individual storage methods
-        # can re-acquire it without deadlocking.
-        # Collect records to insert (outside lock -- pure data assembly)
-        to_insert: list[tuple[int, MemoryRecord]] = []
-        for i, item in enumerate(items):
-            if item.dropped or item.plan is None:
-                continue
-            if item.plan.insert_new:
-                to_insert.append((i, MemoryRecord(
-                    content=item.content,
-                    scope=item.resolved_scope,
-                    categories=item.resolved_categories,
-                    metadata=item.resolved_metadata,
-                    importance=item.resolved_importance,
-                    embedding=item.embedding if item.embedding else None,
-                    source=item.resolved_source,
-                    private=item.resolved_private,
-                )))
-
-        # All storage mutations under one lock so no other pipeline can
-        # interleave and cause version conflicts. The lock is reentrant
-        # (RLock) so the individual storage methods re-acquire it safely.
-        updated_records: dict[str, MemoryRecord] = {}
-        with self._storage.write_lock:
-            if dedup_deletes:
-                self._storage.delete(record_ids=list(dedup_deletes))
-                self.state.records_deleted += len(dedup_deletes)
-
-            for rid, (_item_idx, new_content) in dedup_updates.items():
-                existing = all_similar.get(rid)
-                if existing is not None:
-                    new_emb = update_emb_map.get(rid, [])
-                    updated = MemoryRecord(
-                        id=existing.id,
-                        content=new_content,
-                        scope=existing.scope,
-                        categories=existing.categories,
-                        metadata=existing.metadata,
-                        importance=existing.importance,
-                        created_at=existing.created_at,
-                        last_accessed=now,
-                        embedding=new_emb if new_emb else existing.embedding,
-                    )
-                    self._storage.update(updated)
-                    self.state.records_updated += 1
-                    updated_records[rid] = updated
-
-            if to_insert:
-                records = [r for _, r in to_insert]
-                self._storage.save(records)
-                self.state.records_inserted += len(records)
-                for idx, record in to_insert:
-                    items[idx].result_record = record
-
-        # Set result_record for non-insert items (after lock, using updated_records)
-        for _i, item in enumerate(items):
-            if item.dropped or item.plan is None or item.plan.insert_new:
-                continue
-            if item.result_record is not None:
-                continue
-            first_updated = next(
-                (
-                    updated_records[a.record_id]
-                    for a in item.plan.actions
-                    if a.action == "update" and a.record_id in updated_records
-                ),
-                None,
-            )
-            item.result_record = (
-                first_updated
-                if first_updated is not None
-                else (item.similar_records[0] if item.similar_records else None)
-            )
--- a/lib/crewai/src/crewai/memory/entity/init.py
+++ b/lib/crewai/src/crewai/memory/entity/init.py
--- a/lib/crewai/src/crewai/memory/entity/entity_memory.py
+++ b/lib/crewai/src/crewai/memory/entity/entity_memory.py
@@ -0,0 +1,404 @@
+import time
+from typing import Any
+
+from pydantic import PrivateAttr
+
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.memory_events import (
+    MemoryQueryCompletedEvent,
+    MemoryQueryFailedEvent,
+    MemoryQueryStartedEvent,
+    MemorySaveCompletedEvent,
+    MemorySaveFailedEvent,
+    MemorySaveStartedEvent,
+)
+from crewai.memory.entity.entity_memory_item import EntityMemoryItem
+from crewai.memory.memory import Memory
+from crewai.memory.storage.rag_storage import RAGStorage
+
+
+class EntityMemory(Memory):
+    """
+    EntityMemory class for managing structured information about entities
+    and their relationships using SQLite storage.
+    Inherits from the Memory class.
+    """
+
+    _memory_provider: str | None = PrivateAttr()
+
+    def __init__(
+        self,
+        crew: Any = None,
+        embedder_config: Any = None,
+        storage: Any = None,
+        path: str | None = None,
+    ) -> None:
+        memory_provider = None
+        if embedder_config and isinstance(embedder_config, dict):
+            memory_provider = embedder_config.get("provider")
+
+        if memory_provider == "mem0":
+            try:
+                from crewai.memory.storage.mem0_storage import Mem0Storage
+            except ImportError as e:
+                raise ImportError(
+                    "Mem0 is not installed. Please install it with `pip install mem0ai`."
+                ) from e
+            config = (
+                embedder_config.get("config")
+                if embedder_config and isinstance(embedder_config, dict)
+                else None
+            )
+            storage = Mem0Storage(type="short_term", crew=crew, config=config)  # type: ignore[no-untyped-call]
+        else:
+            storage = (
+                storage
+                if storage
+                else RAGStorage(
+                    type="entities",
+                    allow_reset=True,
+                    embedder_config=embedder_config,
+                    crew=crew,
+                    path=path,
+                )
+            )
+
+        super().__init__(storage=storage)
+        self._memory_provider = memory_provider
+
+    def save(
+        self,
+        value: EntityMemoryItem | list[EntityMemoryItem],
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Saves one or more entity items into the SQLite storage.
+
+        Args:
+            value: Single EntityMemoryItem or list of EntityMemoryItems to save.
+            metadata: Optional metadata dict (included for supertype compatibility but not used).
+
+        Notes:
+            The metadata parameter is included to satisfy the supertype signature but is not
+            used - entity metadata is extracted from the EntityMemoryItem objects themselves.
+        """
+
+        if not value:
+            return
+
+        items = value if isinstance(value, list) else [value]
+        is_batch = len(items) > 1
+
+        metadata = {"entity_count": len(items)} if is_batch else items[0].metadata
+        crewai_event_bus.emit(
+            self,
+            event=MemorySaveStartedEvent(
+                metadata=metadata,
+                source_type="entity_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        saved_count = 0
+        errors = []
+
+        def save_single_item(item: EntityMemoryItem) -> tuple[bool, str | None]:
+            """Save a single item and return success status."""
+            try:
+                if self._memory_provider == "mem0":
+                    data = f"""
+                    Remember details about the following entity:
+                    Name: {item.name}
+                    Type: {item.type}
+                    Entity Description: {item.description}
+                    """
+                else:
+                    data = f"{item.name}({item.type}): {item.description}"
+
+                super(EntityMemory, self).save(data, item.metadata)
+                return True, None
+            except Exception as e:
+                return False, f"{item.name}: {e!s}"
+
+        try:
+            for item in items:
+                success, error = save_single_item(item)
+                if success:
+                    saved_count += 1
+                else:
+                    errors.append(error)
+
+            if is_batch:
+                emit_value = f"Saved {saved_count} entities"
+                metadata = {"entity_count": saved_count, "errors": errors}
+            else:
+                emit_value = f"{items[0].name}({items[0].type}): {items[0].description}"
+                metadata = items[0].metadata
+
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveCompletedEvent(
+                    value=emit_value,
+                    metadata=metadata,
+                    save_time_ms=(time.time() - start_time) * 1000,
+                    source_type="entity_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            if errors:
+                raise Exception(
+                    f"Partial save: {len(errors)} failed out of {len(items)}"
+                )
+
+        except Exception as e:
+            fail_metadata = (
+                {"entity_count": len(items), "saved": saved_count}
+                if is_batch
+                else items[0].metadata
+            )
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveFailedEvent(
+                    metadata=fail_metadata,
+                    error=str(e),
+                    source_type="entity_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+            raise
+
+    def search(
+        self,
+        query: str,
+        limit: int = 5,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search entity memory for relevant entries.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching memory entries.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemoryQueryStartedEvent(
+                query=query,
+                limit=limit,
+                score_threshold=score_threshold,
+                source_type="entity_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            results = super().search(
+                query=query, limit=limit, score_threshold=score_threshold
+            )
+
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryCompletedEvent(
+                    query=query,
+                    results=results,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    query_time_ms=(time.time() - start_time) * 1000,
+                    source_type="entity_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            return results
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryFailedEvent(
+                    query=query,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    error=str(e),
+                    source_type="entity_memory",
+                ),
+            )
+            raise
+
+    async def asave(
+        self,
+        value: EntityMemoryItem | list[EntityMemoryItem],
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Save entity items asynchronously.
+
+        Args:
+            value: Single EntityMemoryItem or list of EntityMemoryItems to save.
+            metadata: Optional metadata dict (not used, for signature compatibility).
+        """
+        if not value:
+            return
+
+        items = value if isinstance(value, list) else [value]
+        is_batch = len(items) > 1
+
+        metadata = {"entity_count": len(items)} if is_batch else items[0].metadata
+        crewai_event_bus.emit(
+            self,
+            event=MemorySaveStartedEvent(
+                metadata=metadata,
+                source_type="entity_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        saved_count = 0
+        errors: list[str | None] = []
+
+        async def save_single_item(item: EntityMemoryItem) -> tuple[bool, str | None]:
+            """Save a single item asynchronously."""
+            try:
+                if self._memory_provider == "mem0":
+                    data = f"""
+                    Remember details about the following entity:
+                    Name: {item.name}
+                    Type: {item.type}
+                    Entity Description: {item.description}
+                    """
+                else:
+                    data = f"{item.name}({item.type}): {item.description}"
+
+                await super(EntityMemory, self).asave(data, item.metadata)
+                return True, None
+            except Exception as e:
+                return False, f"{item.name}: {e!s}"
+
+        try:
+            for item in items:
+                success, error = await save_single_item(item)
+                if success:
+                    saved_count += 1
+                else:
+                    errors.append(error)
+
+            if is_batch:
+                emit_value = f"Saved {saved_count} entities"
+                metadata = {"entity_count": saved_count, "errors": errors}
+            else:
+                emit_value = f"{items[0].name}({items[0].type}): {items[0].description}"
+                metadata = items[0].metadata
+
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveCompletedEvent(
+                    value=emit_value,
+                    metadata=metadata,
+                    save_time_ms=(time.time() - start_time) * 1000,
+                    source_type="entity_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            if errors:
+                raise Exception(
+                    f"Partial save: {len(errors)} failed out of {len(items)}"
+                )
+
+        except Exception as e:
+            fail_metadata = (
+                {"entity_count": len(items), "saved": saved_count}
+                if is_batch
+                else items[0].metadata
+            )
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveFailedEvent(
+                    metadata=fail_metadata,
+                    error=str(e),
+                    source_type="entity_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+            raise
+
+    async def asearch(
+        self,
+        query: str,
+        limit: int = 5,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search entity memory asynchronously.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching memory entries.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemoryQueryStartedEvent(
+                query=query,
+                limit=limit,
+                score_threshold=score_threshold,
+                source_type="entity_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            results = await super().asearch(
+                query=query, limit=limit, score_threshold=score_threshold
+            )
+
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryCompletedEvent(
+                    query=query,
+                    results=results,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    query_time_ms=(time.time() - start_time) * 1000,
+                    source_type="entity_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            return results
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryFailedEvent(
+                    query=query,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    error=str(e),
+                    source_type="entity_memory",
+                ),
+            )
+            raise
+
+    def reset(self) -> None:
+        try:
+            self.storage.reset()
+        except Exception as e:
+            raise Exception(
+                f"An error occurred while resetting the entity memory: {e}"
+            ) from e
--- a/lib/crewai/src/crewai/memory/entity/entity_memory_item.py
+++ b/lib/crewai/src/crewai/memory/entity/entity_memory_item.py
@@ -0,0 +1,12 @@
+class EntityMemoryItem:
+    def __init__(
+        self,
+        name: str,
+        type: str,
+        description: str,
+        relationships: str,
+    ):
+        self.name = name
+        self.type = type
+        self.description = description
+        self.metadata = {"relationships": relationships}
--- a/lib/crewai/src/crewai/memory/external/init.py
+++ b/lib/crewai/src/crewai/memory/external/init.py
--- a/lib/crewai/src/crewai/memory/external/external_memory.py
+++ b/lib/crewai/src/crewai/memory/external/external_memory.py
@@ -0,0 +1,301 @@
+from __future__ import annotations
+
+import time
+from typing import TYPE_CHECKING, Any
+
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.memory_events import (
+    MemoryQueryCompletedEvent,
+    MemoryQueryFailedEvent,
+    MemoryQueryStartedEvent,
+    MemorySaveCompletedEvent,
+    MemorySaveFailedEvent,
+    MemorySaveStartedEvent,
+)
+from crewai.memory.external.external_memory_item import ExternalMemoryItem
+from crewai.memory.memory import Memory
+from crewai.memory.storage.interface import Storage
+from crewai.rag.embeddings.types import ProviderSpec
+
+
+if TYPE_CHECKING:
+    from crewai.memory.storage.mem0_storage import Mem0Storage
+
+
+class ExternalMemory(Memory):
+    def __init__(self, storage: Storage | None = None, **data: Any):
+        super().__init__(storage=storage, **data)
+
+    @staticmethod
+    def _configure_mem0(crew: Any, config: dict[str, Any]) -> Mem0Storage:
+        from crewai.memory.storage.mem0_storage import Mem0Storage
+
+        return Mem0Storage(type="external", crew=crew, config=config)  # type: ignore[no-untyped-call]
+
+    @staticmethod
+    def external_supported_storages() -> dict[str, Any]:
+        return {
+            "mem0": ExternalMemory._configure_mem0,
+        }
+
+    @staticmethod
+    def create_storage(
+        crew: Any, embedder_config: dict[str, Any] | ProviderSpec | None
+    ) -> Storage:
+        if not embedder_config:
+            raise ValueError("embedder_config is required")
+
+        if "provider" not in embedder_config:
+            raise ValueError("embedder_config must include a 'provider' key")
+
+        provider = embedder_config["provider"]
+        supported_storages = ExternalMemory.external_supported_storages()
+        if provider not in supported_storages:
+            raise ValueError(f"Provider {provider} not supported")
+
+        storage: Storage = supported_storages[provider](
+            crew, embedder_config.get("config", {})
+        )
+        return storage
+
+    def save(
+        self,
+        value: Any,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Saves a value into the external storage."""
+        crewai_event_bus.emit(
+            self,
+            event=MemorySaveStartedEvent(
+                value=value,
+                metadata=metadata,
+                source_type="external_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            item = ExternalMemoryItem(
+                value=value,
+                metadata=metadata,
+                agent=self.agent.role if self.agent else None,
+            )
+            super().save(value=item.value, metadata=item.metadata)
+
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveCompletedEvent(
+                    value=value,
+                    metadata=metadata,
+                    save_time_ms=(time.time() - start_time) * 1000,
+                    source_type="external_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveFailedEvent(
+                    value=value,
+                    metadata=metadata,
+                    error=str(e),
+                    source_type="external_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+            raise
+
+    def search(
+        self,
+        query: str,
+        limit: int = 5,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search external memory for relevant entries.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching memory entries.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemoryQueryStartedEvent(
+                query=query,
+                limit=limit,
+                score_threshold=score_threshold,
+                source_type="external_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            results = super().search(
+                query=query, limit=limit, score_threshold=score_threshold
+            )
+
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryCompletedEvent(
+                    query=query,
+                    results=results,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    query_time_ms=(time.time() - start_time) * 1000,
+                    source_type="external_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            return results
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryFailedEvent(
+                    query=query,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    error=str(e),
+                    source_type="external_memory",
+                ),
+            )
+            raise
+
+    async def asave(
+        self,
+        value: Any,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Save a value to external memory asynchronously.
+
+        Args:
+            value: The value to save.
+            metadata: Optional metadata to associate with the value.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemorySaveStartedEvent(
+                value=value,
+                metadata=metadata,
+                source_type="external_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            item = ExternalMemoryItem(
+                value=value,
+                metadata=metadata,
+                agent=self.agent.role if self.agent else None,
+            )
+            await super().asave(value=item.value, metadata=item.metadata)
+
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveCompletedEvent(
+                    value=value,
+                    metadata=metadata,
+                    save_time_ms=(time.time() - start_time) * 1000,
+                    source_type="external_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveFailedEvent(
+                    value=value,
+                    metadata=metadata,
+                    error=str(e),
+                    source_type="external_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+            raise
+
+    async def asearch(
+        self,
+        query: str,
+        limit: int = 5,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search external memory asynchronously.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching memory entries.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemoryQueryStartedEvent(
+                query=query,
+                limit=limit,
+                score_threshold=score_threshold,
+                source_type="external_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            results = await super().asearch(
+                query=query, limit=limit, score_threshold=score_threshold
+            )
+
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryCompletedEvent(
+                    query=query,
+                    results=results,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    query_time_ms=(time.time() - start_time) * 1000,
+                    source_type="external_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            return results
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryFailedEvent(
+                    query=query,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    error=str(e),
+                    source_type="external_memory",
+                ),
+            )
+            raise
+
+    def reset(self) -> None:
+        self.storage.reset()
+
+    def set_crew(self, crew: Any) -> ExternalMemory:
+        super().set_crew(crew)
+
+        if not self.storage:
+            self.storage = self.create_storage(crew, self.embedder_config)  # type: ignore[arg-type]
+
+        return self
--- a/lib/crewai/src/crewai/memory/external/external_memory_item.py
+++ b/lib/crewai/src/crewai/memory/external/external_memory_item.py
@@ -0,0 +1,13 @@
+from typing import Any
+
+
+class ExternalMemoryItem:
+    def __init__(
+        self,
+        value: Any,
+        metadata: dict[str, Any] | None = None,
+        agent: str | None = None,
+    ):
+        self.value = value
+        self.metadata = metadata
+        self.agent = agent
--- a/lib/crewai/src/crewai/memory/long_term/init.py
+++ b/lib/crewai/src/crewai/memory/long_term/init.py
--- a/lib/crewai/src/crewai/memory/long_term/long_term_memory.py
+++ b/lib/crewai/src/crewai/memory/long_term/long_term_memory.py
@@ -0,0 +1,255 @@
+import time
+from typing import Any
+
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.memory_events import (
+    MemoryQueryCompletedEvent,
+    MemoryQueryFailedEvent,
+    MemoryQueryStartedEvent,
+    MemorySaveCompletedEvent,
+    MemorySaveFailedEvent,
+    MemorySaveStartedEvent,
+)
+from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem
+from crewai.memory.memory import Memory
+from crewai.memory.storage.ltm_sqlite_storage import LTMSQLiteStorage
+
+
+class LongTermMemory(Memory):
+    """
+    LongTermMemory class for managing cross runs data related to overall crew's
+    execution and performance.
+    Inherits from the Memory class and utilizes an instance of a class that
+    adheres to the Storage for data storage, specifically working with
+    LongTermMemoryItem instances.
+    """
+
+    def __init__(
+        self,
+        storage: LTMSQLiteStorage | None = None,
+        path: str | None = None,
+    ) -> None:
+        if not storage:
+            storage = LTMSQLiteStorage(db_path=path) if path else LTMSQLiteStorage()
+        super().__init__(storage=storage)
+
+    def save(self, item: LongTermMemoryItem) -> None:  # type: ignore # BUG?: Signature of "save" incompatible with supertype "Memory"
+        crewai_event_bus.emit(
+            self,
+            event=MemorySaveStartedEvent(
+                value=item.task,
+                metadata=item.metadata,
+                agent_role=item.agent,
+                source_type="long_term_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            metadata = item.metadata
+            metadata.update(
+                {"agent": item.agent, "expected_output": item.expected_output}
+            )
+            self.storage.save(
+                task_description=item.task,
+                score=metadata["quality"],
+                metadata=metadata,
+                datetime=item.datetime,
+            )
+
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveCompletedEvent(
+                    value=item.task,
+                    metadata=item.metadata,
+                    agent_role=item.agent,
+                    save_time_ms=(time.time() - start_time) * 1000,
+                    source_type="long_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveFailedEvent(
+                    value=item.task,
+                    metadata=item.metadata,
+                    agent_role=item.agent,
+                    error=str(e),
+                    source_type="long_term_memory",
+                ),
+            )
+            raise
+
+    def search(  # type: ignore[override]
+        self,
+        task: str,
+        latest_n: int = 3,
+    ) -> list[dict[str, Any]]:
+        """Search long-term memory for relevant entries.
+
+        Args:
+            task: The task description to search for.
+            latest_n: Maximum number of results to return.
+
+        Returns:
+            List of matching memory entries.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemoryQueryStartedEvent(
+                query=task,
+                limit=latest_n,
+                source_type="long_term_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            results = self.storage.load(task, latest_n)
+
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryCompletedEvent(
+                    query=task,
+                    results=results,
+                    limit=latest_n,
+                    query_time_ms=(time.time() - start_time) * 1000,
+                    source_type="long_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            return results or []
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryFailedEvent(
+                    query=task,
+                    limit=latest_n,
+                    error=str(e),
+                    source_type="long_term_memory",
+                ),
+            )
+            raise
+
+    async def asave(self, item: LongTermMemoryItem) -> None:  # type: ignore[override]
+        """Save an item to long-term memory asynchronously.
+
+        Args:
+            item: The LongTermMemoryItem to save.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemorySaveStartedEvent(
+                value=item.task,
+                metadata=item.metadata,
+                agent_role=item.agent,
+                source_type="long_term_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            metadata = item.metadata
+            metadata.update(
+                {"agent": item.agent, "expected_output": item.expected_output}
+            )
+            await self.storage.asave(
+                task_description=item.task,
+                score=metadata["quality"],
+                metadata=metadata,
+                datetime=item.datetime,
+            )
+
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveCompletedEvent(
+                    value=item.task,
+                    metadata=item.metadata,
+                    agent_role=item.agent,
+                    save_time_ms=(time.time() - start_time) * 1000,
+                    source_type="long_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveFailedEvent(
+                    value=item.task,
+                    metadata=item.metadata,
+                    agent_role=item.agent,
+                    error=str(e),
+                    source_type="long_term_memory",
+                ),
+            )
+            raise
+
+    async def asearch(  # type: ignore[override]
+        self,
+        task: str,
+        latest_n: int = 3,
+    ) -> list[dict[str, Any]]:
+        """Search long-term memory asynchronously.
+
+        Args:
+            task: The task description to search for.
+            latest_n: Maximum number of results to return.
+
+        Returns:
+            List of matching memory entries.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemoryQueryStartedEvent(
+                query=task,
+                limit=latest_n,
+                source_type="long_term_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            results = await self.storage.aload(task, latest_n)
+
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryCompletedEvent(
+                    query=task,
+                    results=results,
+                    limit=latest_n,
+                    query_time_ms=(time.time() - start_time) * 1000,
+                    source_type="long_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            return results or []
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryFailedEvent(
+                    query=task,
+                    limit=latest_n,
+                    error=str(e),
+                    source_type="long_term_memory",
+                ),
+            )
+            raise
+
+    def reset(self) -> None:
+        """Reset long-term memory."""
+        self.storage.reset()
--- a/lib/crewai/src/crewai/memory/long_term/long_term_memory_item.py
+++ b/lib/crewai/src/crewai/memory/long_term/long_term_memory_item.py
@@ -0,0 +1,19 @@
+from typing import Any
+
+
+class LongTermMemoryItem:
+    def __init__(
+        self,
+        agent: str,
+        task: str,
+        expected_output: str,
+        datetime: str,
+        quality: int | float | None = None,
+        metadata: dict[str, Any] | None = None,
+    ):
+        self.task = task
+        self.agent = agent
+        self.quality = quality
+        self.datetime = datetime
+        self.expected_output = expected_output
+        self.metadata = metadata if metadata is not None else {}
--- a/lib/crewai/src/crewai/memory/memory.py
+++ b/lib/crewai/src/crewai/memory/memory.py
@@ -0,0 +1,121 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from pydantic import BaseModel
+
+from crewai.rag.embeddings.types import EmbedderConfig
+
+
+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.task import Task
+
+
+class Memory(BaseModel):
+    """Base class for memory, supporting agent tags and generic metadata."""
+
+    embedder_config: EmbedderConfig | dict[str, Any] | None = None
+    crew: Any | None = None
+
+    storage: Any
+    _agent: Agent | None = None
+    _task: Task | None = None
+
+    def __init__(self, storage: Any, **data: Any):
+        super().__init__(storage=storage, **data)
+
+    @property
+    def task(self) -> Task | None:
+        """Get the current task associated with this memory."""
+        return self._task
+
+    @task.setter
+    def task(self, task: Task | None) -> None:
+        """Set the current task associated with this memory."""
+        self._task = task
+
+    @property
+    def agent(self) -> Agent | None:
+        """Get the current agent associated with this memory."""
+        return self._agent
+
+    @agent.setter
+    def agent(self, agent: Agent | None) -> None:
+        """Set the current agent associated with this memory."""
+        self._agent = agent
+
+    def save(
+        self,
+        value: Any,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Save a value to memory.
+
+        Args:
+            value: The value to save.
+            metadata: Optional metadata to associate with the value.
+        """
+        metadata = metadata or {}
+        self.storage.save(value, metadata)
+
+    async def asave(
+        self,
+        value: Any,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Save a value to memory asynchronously.
+
+        Args:
+            value: The value to save.
+            metadata: Optional metadata to associate with the value.
+        """
+        metadata = metadata or {}
+        await self.storage.asave(value, metadata)
+
+    def search(
+        self,
+        query: str,
+        limit: int = 5,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search memory for relevant entries.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching memory entries.
+        """
+        results: list[Any] = self.storage.search(
+            query=query, limit=limit, score_threshold=score_threshold
+        )
+        return results
+
+    async def asearch(
+        self,
+        query: str,
+        limit: int = 5,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search memory for relevant entries asynchronously.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching memory entries.
+        """
+        results: list[Any] = await self.storage.asearch(
+            query=query, limit=limit, score_threshold=score_threshold
+        )
+        return results
+
+    def set_crew(self, crew: Any) -> Memory:
+        """Set the crew for this memory instance."""
+        self.crew = crew
+        return self
--- a/lib/crewai/src/crewai/memory/memory_scope.py
+++ b/lib/crewai/src/crewai/memory/memory_scope.py
@@ -1,272 +0,0 @@
-"""Scoped and sliced views over unified Memory."""
-
-from __future__ import annotations
-
-from datetime import datetime
-from typing import TYPE_CHECKING, Any
-
-
-if TYPE_CHECKING:
-    from crewai.memory.unified_memory import Memory
-
-from crewai.memory.types import (
-    _RECALL_OVERSAMPLE_FACTOR,
-    MemoryMatch,
-    MemoryRecord,
-    ScopeInfo,
-)
-
-
-class MemoryScope:
-    """View of Memory restricted to a root path. All operations are scoped under that path."""
-
-    def __init__(self, memory: Memory, root_path: str) -> None:
-        """Initialize scope.
-
-        Args:
-            memory: The underlying Memory instance.
-            root_path: Root path for this scope (e.g. /agent/1).
-        """
-        self._memory = memory
-        self._root = root_path.rstrip("/") or ""
-        if self._root and not self._root.startswith("/"):
-            self._root = "/" + self._root
-
-    def _scope_path(self, scope: str | None) -> str:
-        if not scope or scope == "/":
-            return self._root or "/"
-        s = scope.rstrip("/")
-        if not s.startswith("/"):
-            s = "/" + s
-        if not self._root:
-            return s
-        base = self._root.rstrip("/")
-        return f"{base}{s}"
-
-    def remember(
-        self,
-        content: str,
-        scope: str | None = "/",
-        categories: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        importance: float | None = None,
-        source: str | None = None,
-        private: bool = False,
-    ) -> MemoryRecord:
-        """Remember content; scope is relative to this scope's root."""
-        path = self._scope_path(scope)
-        return self._memory.remember(
-            content,
-            scope=path,
-            categories=categories,
-            metadata=metadata,
-            importance=importance,
-            source=source,
-            private=private,
-        )
-
-    def recall(
-        self,
-        query: str,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        limit: int = 10,
-        depth: str = "deep",
-        source: str | None = None,
-        include_private: bool = False,
-    ) -> list[MemoryMatch]:
-        """Recall within this scope (root path and below)."""
-        search_scope = self._scope_path(scope) if scope else (self._root or "/")
-        return self._memory.recall(
-            query,
-            scope=search_scope,
-            categories=categories,
-            limit=limit,
-            depth=depth,
-            source=source,
-            include_private=include_private,
-        )
-
-    def extract_memories(self, content: str) -> list[str]:
-        """Extract discrete memories from content; delegates to underlying Memory."""
-        return self._memory.extract_memories(content)
-
-    def forget(
-        self,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        older_than: datetime | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-        record_ids: list[str] | None = None,
-    ) -> int:
-        """Forget within this scope."""
-        prefix = self._scope_path(scope) if scope else (self._root or "/")
-        return self._memory.forget(
-            scope=prefix,
-            categories=categories,
-            older_than=older_than,
-            metadata_filter=metadata_filter,
-            record_ids=record_ids,
-        )
-
-    def list_scopes(self, path: str = "/") -> list[str]:
-        """List child scopes under path (relative to this scope's root)."""
-        full = self._scope_path(path)
-        return self._memory.list_scopes(full)
-
-    def info(self, path: str = "/") -> ScopeInfo:
-        """Info for path under this scope."""
-        full = self._scope_path(path)
-        return self._memory.info(full)
-
-    def tree(self, path: str = "/", max_depth: int = 3) -> str:
-        """Tree under path within this scope."""
-        full = self._scope_path(path)
-        return self._memory.tree(full, max_depth=max_depth)
-
-    def list_categories(self, path: str | None = None) -> dict[str, int]:
-        """Categories in this scope; path None means this scope root."""
-        full = self._scope_path(path) if path else (self._root or "/")
-        return self._memory.list_categories(full)
-
-    def reset(self, scope: str | None = None) -> None:
-        """Reset within this scope."""
-        prefix = self._scope_path(scope) if scope else (self._root or "/")
-        self._memory.reset(scope=prefix)
-
-    def subscope(self, path: str) -> MemoryScope:
-        """Return a narrower scope under this scope."""
-        child = path.strip("/")
-        if not child:
-            return MemoryScope(self._memory, self._root or "/")
-        base = self._root.rstrip("/") or ""
-        new_root = f"{base}/{child}" if base else f"/{child}"
-        return MemoryScope(self._memory, new_root)
-
-
-class MemorySlice:
-    """View over multiple scopes: recall searches all, remember requires explicit scope unless read_only."""
-
-    def __init__(
-        self,
-        memory: Memory,
-        scopes: list[str],
-        categories: list[str] | None = None,
-        read_only: bool = True,
-    ) -> None:
-        """Initialize slice.
-
-        Args:
-            memory: The underlying Memory instance.
-            scopes: List of scope paths to include.
-            categories: Optional category filter for recall.
-            read_only: If True, remember() raises PermissionError.
-        """
-        self._memory = memory
-        self._scopes = [s.rstrip("/") or "/" for s in scopes]
-        self._categories = categories
-        self._read_only = read_only
-
-    def remember(
-        self,
-        content: str,
-        scope: str,
-        categories: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        importance: float | None = None,
-        source: str | None = None,
-        private: bool = False,
-    ) -> MemoryRecord:
-        """Remember into an explicit scope. Required when read_only=False."""
-        if self._read_only:
-            raise PermissionError("This MemorySlice is read-only")
-        return self._memory.remember(
-            content,
-            scope=scope,
-            categories=categories,
-            metadata=metadata,
-            importance=importance,
-            source=source,
-            private=private,
-        )
-
-    def recall(
-        self,
-        query: str,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        limit: int = 10,
-        depth: str = "deep",
-        source: str | None = None,
-        include_private: bool = False,
-    ) -> list[MemoryMatch]:
-        """Recall across all slice scopes; results merged and re-ranked."""
-        cats = categories or self._categories
-        all_matches: list[MemoryMatch] = []
-        for sc in self._scopes:
-            matches = self._memory.recall(
-                query,
-                scope=sc,
-                categories=cats,
-                limit=limit * _RECALL_OVERSAMPLE_FACTOR,
-                depth=depth,
-                source=source,
-                include_private=include_private,
-            )
-            all_matches.extend(matches)
-        seen_ids: set[str] = set()
-        unique: list[MemoryMatch] = []
-        for m in sorted(all_matches, key=lambda x: x.score, reverse=True):
-            if m.record.id not in seen_ids:
-                seen_ids.add(m.record.id)
-                unique.append(m)
-                if len(unique) >= limit:
-                    break
-        return unique
-
-    def extract_memories(self, content: str) -> list[str]:
-        """Extract discrete memories from content; delegates to underlying Memory."""
-        return self._memory.extract_memories(content)
-
-    def list_scopes(self, path: str = "/") -> list[str]:
-        """List scopes across all slice roots."""
-        out: list[str] = []
-        for sc in self._scopes:
-            full = f"{sc.rstrip('/')}{path}" if sc != "/" else path
-            out.extend(self._memory.list_scopes(full))
-        return sorted(set(out))
-
-    def info(self, path: str = "/") -> ScopeInfo:
-        """Aggregate info across slice scopes (record counts summed)."""
-        total_records = 0
-        all_categories: set[str] = set()
-        oldest: datetime | None = None
-        newest: datetime | None = None
-        children: list[str] = []
-        for sc in self._scopes:
-            full = f"{sc.rstrip('/')}{path}" if sc != "/" else path
-            inf = self._memory.info(full)
-            total_records += inf.record_count
-            all_categories.update(inf.categories)
-            if inf.oldest_record:
-                oldest = inf.oldest_record if oldest is None else min(oldest, inf.oldest_record)
-            if inf.newest_record:
-                newest = inf.newest_record if newest is None else max(newest, inf.newest_record)
-            children.extend(inf.child_scopes)
-        return ScopeInfo(
-            path=path,
-            record_count=total_records,
-            categories=sorted(all_categories),
-            oldest_record=oldest,
-            newest_record=newest,
-            child_scopes=sorted(set(children)),
-        )
-
-    def list_categories(self, path: str | None = None) -> dict[str, int]:
-        """Categories and counts across slice scopes."""
-        counts: dict[str, int] = {}
-        for sc in self._scopes:
-            full = (f"{sc.rstrip('/')}{path}" if sc != "/" else path) if path else sc
-            for k, v in self._memory.list_categories(full).items():
-                counts[k] = counts.get(k, 0) + v
-        return counts
--- a/lib/crewai/src/crewai/memory/recall_flow.py
+++ b/lib/crewai/src/crewai/memory/recall_flow.py
@@ -1,367 +0,0 @@
-"""RLM-inspired intelligent recall flow for memory retrieval.
-
-Implements adaptive-depth retrieval with:
- LLM query distillation into targeted sub-queries
- Keyword-driven category filtering
- Time-based filtering from temporal hints
- Parallel multi-query, multi-scope search
- Confidence-based routing with iterative deepening (budget loop)
- Evidence gap tracking propagated to results
-"""
-
-from __future__ import annotations
-
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from datetime import datetime
-from typing import Any
-from uuid import uuid4
-
-from pydantic import BaseModel, Field
-
-from crewai.flow.flow import Flow, listen, router, start
-from crewai.memory.analyze import QueryAnalysis, analyze_query
-from crewai.memory.types import (
-    _RECALL_OVERSAMPLE_FACTOR,
-    MemoryConfig,
-    MemoryMatch,
-    MemoryRecord,
-    compute_composite_score,
-    embed_texts,
-)
-
-
-class RecallState(BaseModel):
-    """State for the recall flow."""
-
-    id: str = Field(default_factory=lambda: str(uuid4()))
-    query: str = ""
-    scope: str | None = None
-    categories: list[str] | None = None
-    inferred_categories: list[str] = Field(default_factory=list)
-    time_cutoff: datetime | None = None
-    source: str | None = None
-    include_private: bool = False
-    limit: int = 10
-    query_embeddings: list[tuple[str, list[float]]] = Field(default_factory=list)
-    query_analysis: QueryAnalysis | None = None
-    candidate_scopes: list[str] = Field(default_factory=list)
-    chunk_findings: list[Any] = Field(default_factory=list)
-    evidence_gaps: list[str] = Field(default_factory=list)
-    confidence: float = 0.0
-    final_results: list[MemoryMatch] = Field(default_factory=list)
-    exploration_budget: int = 1
-
-
-class RecallFlow(Flow[RecallState]):
-    """RLM-inspired intelligent memory recall flow.
-
-    Analyzes the query via LLM to produce targeted sub-queries and filters,
-    embeds each sub-query, searches across candidate scopes in parallel,
-    and iteratively deepens exploration when confidence is low.
-    """
-
-    _skip_auto_memory: bool = True
-
-    initial_state = RecallState
-
-    def __init__(
-        self,
-        storage: Any,
-        llm: Any,
-        embedder: Any,
-        config: MemoryConfig | None = None,
-    ) -> None:
-        super().__init__(suppress_flow_events=True)
-        self._storage = storage
-        self._llm = llm
-        self._embedder = embedder
-        self._config = config or MemoryConfig()
-
-    # ------------------------------------------------------------------
-    # Helpers
-    # ------------------------------------------------------------------
-
-    def _merged_categories(self) -> list[str] | None:
-        """Merge caller-supplied and LLM-inferred categories."""
-        merged = list(
-            set((self.state.categories or []) + self.state.inferred_categories)
-        )
-        return merged or None
-
-    def _do_search(self) -> list[dict[str, Any]]:
-        """Run parallel search across (embeddings x scopes) with filters.
-
-        Populates ``state.chunk_findings`` and ``state.confidence``.
-        Returns the findings list.
-        """
-        search_categories = self._merged_categories()
-
-        def _search_one(
-            embedding: list[float], scope: str
-        ) -> tuple[str, list[tuple[MemoryRecord, float]]]:
-            raw = self._storage.search(
-                embedding,
-                scope_prefix=scope,
-                categories=search_categories,
-                limit=self.state.limit * _RECALL_OVERSAMPLE_FACTOR,
-                min_score=0.0,
-            )
-            # Post-filter by time cutoff
-            if self.state.time_cutoff and raw:
-                raw = [
-                    (r, s) for r, s in raw if r.created_at >= self.state.time_cutoff
-                ]
-            # Privacy filter
-            if not self.state.include_private and raw:
-                raw = [
-                    (r, s) for r, s in raw
-                    if not r.private or r.source == self.state.source
-                ]
-            return scope, raw
-
-        # Build (embedding, scope) task list
-        tasks: list[tuple[list[float], str]] = [
-            (embedding, scope)
-            for _query_text, embedding in self.state.query_embeddings
-            for scope in self.state.candidate_scopes
-        ]
-
-        findings: list[dict[str, Any]] = []
-
-        if len(tasks) <= 1:
-            for emb, sc in tasks:
-                scope, results = _search_one(emb, sc)
-                if results:
-                    top_composite, _ = compute_composite_score(
-                        results[0][0], results[0][1], self._config
-                    )
-                    findings.append({
-                        "scope": scope,
-                        "results": results,
-                        "top_score": top_composite,
-                    })
-        else:
-            with ThreadPoolExecutor(max_workers=min(len(tasks), 4)) as pool:
-                futures = {
-                    pool.submit(_search_one, emb, sc): (emb, sc)
-                    for emb, sc in tasks
-                }
-                for future in as_completed(futures):
-                    scope, results = future.result()
-                    if results:
-                        top_composite, _ = compute_composite_score(
-                            results[0][0], results[0][1], self._config
-                        )
-                        findings.append({
-                            "scope": scope,
-                            "results": results,
-                            "top_score": top_composite,
-                        })
-
-        self.state.chunk_findings = findings
-        self.state.confidence = max(
-            (f["top_score"] for f in findings), default=0.0
-        )
-        return findings
-
-    # ------------------------------------------------------------------
-    # Flow steps
-    # ------------------------------------------------------------------
-
-    @start()
-    def analyze_query_step(self) -> QueryAnalysis:
-        """Analyze the query, embed distilled sub-queries, extract filters.
-
-        Short queries (below ``query_analysis_threshold`` characters) skip
-        the LLM call entirely and embed the raw query directly -- saving
-        ~1-3s per recall. Longer queries (e.g. full task descriptions)
-        benefit from LLM distillation into targeted sub-queries.
-
-        Sub-queries are embedded in a single batch ``embed_texts()`` call
-        rather than sequential ``embed_text()`` calls.
-        """
-        self.state.exploration_budget = self._config.exploration_budget
-
-        query_len = len(self.state.query)
-        skip_llm = query_len < self._config.query_analysis_threshold
-
-        if skip_llm:
-            # Short query: skip LLM, embed raw query directly
-            analysis = QueryAnalysis(
-                keywords=[],
-                suggested_scopes=[],
-                complexity="simple",
-                recall_queries=[self.state.query],
-            )
-            self.state.query_analysis = analysis
-        else:
-            # Long query: use LLM to distill sub-queries and extract filters
-            available = self._storage.list_scopes(self.state.scope or "/")
-            if not available:
-                available = ["/"]
-            scope_info = (
-                self._storage.get_scope_info(self.state.scope or "/")
-                if self.state.scope
-                else None
-            )
-            analysis = analyze_query(
-                self.state.query,
-                available,
-                scope_info,
-                self._llm,
-            )
-            self.state.query_analysis = analysis
-
-            # Wire keywords -> category filter
-            if analysis.keywords:
-                self.state.inferred_categories = analysis.keywords
-
-            # Parse time_filter into a datetime cutoff
-            if analysis.time_filter:
-                try:
-                    self.state.time_cutoff = datetime.fromisoformat(analysis.time_filter)
-                except ValueError:
-                    pass
-
-        # Batch-embed all sub-queries in ONE call
-        queries = analysis.recall_queries if analysis.recall_queries else [self.state.query]
-        queries = queries[:3]
-        embeddings = embed_texts(self._embedder, queries)
-        pairs: list[tuple[str, list[float]]] = [
-            (q, emb) for q, emb in zip(queries, embeddings, strict=False) if emb
-        ]
-        if not pairs:
-            # Fallback: embed the raw query if distilled queries all failed
-            fallback_emb = embed_texts(self._embedder, [self.state.query])
-            if fallback_emb and fallback_emb[0]:
-                pairs = [(self.state.query, fallback_emb[0])]
-        self.state.query_embeddings = pairs
-        return analysis
-
-    @listen(analyze_query_step)
-    def filter_and_chunk(self) -> list[str]:
-        """Select candidate scopes based on LLM analysis."""
-        analysis = self.state.query_analysis
-        scope_prefix = (self.state.scope or "/").rstrip("/") or "/"
-        if analysis and analysis.suggested_scopes:
-            candidates = [s for s in analysis.suggested_scopes if s]
-        else:
-            candidates = self._storage.list_scopes(scope_prefix)
-        if not candidates:
-            info = self._storage.get_scope_info(scope_prefix)
-            if info.record_count > 0:
-                candidates = [scope_prefix]
-            else:
-                candidates = [scope_prefix]
-        self.state.candidate_scopes = candidates[:20]
-        return self.state.candidate_scopes
-
-    @listen(filter_and_chunk)
-    def search_chunks(self) -> list[Any]:
-        """Initial parallel search across (embeddings x scopes) with filters."""
-        return self._do_search()
-
-    @router(search_chunks)
-    def decide_depth(self) -> str:
-        """Route based on confidence, complexity, and remaining budget."""
-        analysis = self.state.query_analysis
-        if (
-            analysis
-            and analysis.complexity == "complex"
-            and self.state.confidence < self._config.complex_query_threshold
-        ):
-            if self.state.exploration_budget > 0:
-                return "explore_deeper"
-        if self.state.confidence >= self._config.confidence_threshold_high:
-            return "synthesize"
-        if (
-            self.state.exploration_budget > 0
-            and self.state.confidence < self._config.confidence_threshold_low
-        ):
-            return "explore_deeper"
-        return "synthesize"
-
-    @listen("explore_deeper")
-    def recursive_exploration(self) -> list[Any]:
-        """Feed top results back to LLM for deeper context extraction.
-
-        Decrements the exploration budget so the loop terminates.
-        """
-        self.state.exploration_budget -= 1
-
-        enhanced = []
-        for finding in self.state.chunk_findings:
-            if not finding.get("results"):
-                continue
-            content_parts = [r[0].content for r in finding["results"][:5]]
-            chunk_text = "\n---\n".join(content_parts)
-            prompt = (
-                f"Query: {self.state.query}\n\n"
-                f"Relevant memory excerpts:\n{chunk_text}\n\n"
-                "Extract the most relevant information for the query. "
-                "If something is missing, say what's missing in one short line."
-            )
-            try:
-                response = self._llm.call([{"role": "user", "content": prompt}])
-                if isinstance(response, str) and "missing" in response.lower():
-                    self.state.evidence_gaps.append(response[:200])
-                enhanced.append({
-                    "scope": finding["scope"],
-                    "extraction": response,
-                    "results": finding["results"],
-                })
-            except Exception:
-                enhanced.append({
-                    "scope": finding["scope"],
-                    "extraction": "",
-                    "results": finding["results"],
-                })
-        self.state.chunk_findings = enhanced
-        return enhanced
-
-    @listen(recursive_exploration)
-    def re_search(self) -> list[Any]:
-        """Re-search after exploration to update confidence for the router loop."""
-        return self._do_search()
-
-    @router(re_search)
-    def re_decide_depth(self) -> str:
-        """Re-evaluate depth after re-search. Same logic as decide_depth."""
-        return self.decide_depth()
-
-    @listen("synthesize")
-    def synthesize_results(self) -> list[MemoryMatch]:
-        """Deduplicate, composite-score, rank, and attach evidence gaps."""
-        seen_ids: set[str] = set()
-        matches: list[MemoryMatch] = []
-        for finding in self.state.chunk_findings:
-            if not isinstance(finding, dict):
-                continue
-            results = finding.get("results", [])
-            if not isinstance(results, list):
-                continue
-            for item in results:
-                if isinstance(item, (list, tuple)) and len(item) >= 2:
-                    record, score = item[0], item[1]
-                else:
-                    continue
-                if isinstance(record, MemoryRecord) and record.id not in seen_ids:
-                    seen_ids.add(record.id)
-                    composite, reasons = compute_composite_score(
-                        record, float(score), self._config
-                    )
-                    matches.append(
-                        MemoryMatch(
-                            record=record,
-                            score=composite,
-                            match_reasons=reasons,
-                        )
-                    )
-        matches.sort(key=lambda m: m.score, reverse=True)
-        self.state.final_results = matches[: self.state.limit]
-
-        # Attach evidence gaps to the first result so callers can inspect them
-        if self.state.evidence_gaps and self.state.final_results:
-            self.state.final_results[0].evidence_gaps = list(self.state.evidence_gaps)
-
-        return self.state.final_results
--- a/lib/crewai/src/crewai/memory/short_term/init.py
+++ b/lib/crewai/src/crewai/memory/short_term/init.py
--- a/lib/crewai/src/crewai/memory/short_term/short_term_memory.py
+++ b/lib/crewai/src/crewai/memory/short_term/short_term_memory.py
@@ -0,0 +1,318 @@
+from __future__ import annotations
+
+import time
+from typing import Any
+
+from pydantic import PrivateAttr
+
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.memory_events import (
+    MemoryQueryCompletedEvent,
+    MemoryQueryFailedEvent,
+    MemoryQueryStartedEvent,
+    MemorySaveCompletedEvent,
+    MemorySaveFailedEvent,
+    MemorySaveStartedEvent,
+)
+from crewai.memory.memory import Memory
+from crewai.memory.short_term.short_term_memory_item import ShortTermMemoryItem
+from crewai.memory.storage.rag_storage import RAGStorage
+
+
+class ShortTermMemory(Memory):
+    """
+    ShortTermMemory class for managing transient data related to immediate tasks
+    and interactions.
+    Inherits from the Memory class and utilizes an instance of a class that
+    adheres to the Storage for data storage, specifically working with
+    MemoryItem instances.
+    """
+
+    _memory_provider: str | None = PrivateAttr()
+
+    def __init__(
+        self,
+        crew: Any = None,
+        embedder_config: Any = None,
+        storage: Any = None,
+        path: str | None = None,
+    ) -> None:
+        memory_provider = None
+        if embedder_config and isinstance(embedder_config, dict):
+            memory_provider = embedder_config.get("provider")
+
+        if memory_provider == "mem0":
+            try:
+                from crewai.memory.storage.mem0_storage import Mem0Storage
+            except ImportError as e:
+                raise ImportError(
+                    "Mem0 is not installed. Please install it with `pip install mem0ai`."
+                ) from e
+            config = (
+                embedder_config.get("config")
+                if embedder_config and isinstance(embedder_config, dict)
+                else None
+            )
+            storage = Mem0Storage(type="short_term", crew=crew, config=config)  # type: ignore[no-untyped-call]
+        else:
+            storage = (
+                storage
+                if storage
+                else RAGStorage(
+                    type="short_term",
+                    embedder_config=embedder_config,
+                    crew=crew,
+                    path=path,
+                )
+            )
+        super().__init__(storage=storage)
+        self._memory_provider = memory_provider
+
+    def save(
+        self,
+        value: Any,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        crewai_event_bus.emit(
+            self,
+            event=MemorySaveStartedEvent(
+                value=value,
+                metadata=metadata,
+                source_type="short_term_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            item = ShortTermMemoryItem(
+                data=value,
+                metadata=metadata,
+                agent=self.agent.role if self.agent else None,
+            )
+            if self._memory_provider == "mem0":
+                item.data = (
+                    f"Remember the following insights from Agent run: {item.data}"
+                )
+
+            super().save(value=item.data, metadata=item.metadata)
+
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveCompletedEvent(
+                    value=value,
+                    metadata=metadata,
+                    # agent_role=agent,
+                    save_time_ms=(time.time() - start_time) * 1000,
+                    source_type="short_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveFailedEvent(
+                    value=value,
+                    metadata=metadata,
+                    error=str(e),
+                    source_type="short_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+            raise
+
+    def search(
+        self,
+        query: str,
+        limit: int = 5,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search short-term memory for relevant entries.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching memory entries.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemoryQueryStartedEvent(
+                query=query,
+                limit=limit,
+                score_threshold=score_threshold,
+                source_type="short_term_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            results = self.storage.search(
+                query=query, limit=limit, score_threshold=score_threshold
+            )
+
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryCompletedEvent(
+                    query=query,
+                    results=results,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    query_time_ms=(time.time() - start_time) * 1000,
+                    source_type="short_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            return list(results)
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryFailedEvent(
+                    query=query,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    error=str(e),
+                    source_type="short_term_memory",
+                ),
+            )
+            raise
+
+    async def asave(
+        self,
+        value: Any,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Save a value to short-term memory asynchronously.
+
+        Args:
+            value: The value to save.
+            metadata: Optional metadata to associate with the value.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemorySaveStartedEvent(
+                value=value,
+                metadata=metadata,
+                source_type="short_term_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            item = ShortTermMemoryItem(
+                data=value,
+                metadata=metadata,
+                agent=self.agent.role if self.agent else None,
+            )
+            if self._memory_provider == "mem0":
+                item.data = (
+                    f"Remember the following insights from Agent run: {item.data}"
+                )
+
+            await super().asave(value=item.data, metadata=item.metadata)
+
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveCompletedEvent(
+                    value=value,
+                    metadata=metadata,
+                    save_time_ms=(time.time() - start_time) * 1000,
+                    source_type="short_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemorySaveFailedEvent(
+                    value=value,
+                    metadata=metadata,
+                    error=str(e),
+                    source_type="short_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+            raise
+
+    async def asearch(
+        self,
+        query: str,
+        limit: int = 5,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search short-term memory asynchronously.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching memory entries.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=MemoryQueryStartedEvent(
+                query=query,
+                limit=limit,
+                score_threshold=score_threshold,
+                source_type="short_term_memory",
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        start_time = time.time()
+        try:
+            results = await self.storage.asearch(
+                query=query, limit=limit, score_threshold=score_threshold
+            )
+
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryCompletedEvent(
+                    query=query,
+                    results=results,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    query_time_ms=(time.time() - start_time) * 1000,
+                    source_type="short_term_memory",
+                    from_agent=self.agent,
+                    from_task=self.task,
+                ),
+            )
+
+            return list(results)
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=MemoryQueryFailedEvent(
+                    query=query,
+                    limit=limit,
+                    score_threshold=score_threshold,
+                    error=str(e),
+                    source_type="short_term_memory",
+                ),
+            )
+            raise
+
+    def reset(self) -> None:
+        try:
+            self.storage.reset()
+        except Exception as e:
+            raise Exception(
+                f"An error occurred while resetting the short-term memory: {e}"
+            ) from e
--- a/lib/crewai/src/crewai/memory/short_term/short_term_memory_item.py
+++ b/lib/crewai/src/crewai/memory/short_term/short_term_memory_item.py
@@ -0,0 +1,13 @@
+from typing import Any
+
+
+class ShortTermMemoryItem:
+    def __init__(
+        self,
+        data: Any,
+        agent: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ):
+        self.data = data
+        self.agent = agent
+        self.metadata = metadata if metadata is not None else {}
--- a/lib/crewai/src/crewai/memory/storage/backend.py
+++ b/lib/crewai/src/crewai/memory/storage/backend.py
@@ -1,179 +0,0 @@
-"""Storage backend protocol for the unified memory system."""
-
-from __future__ import annotations
-
-from datetime import datetime
-from typing import Any, Protocol, runtime_checkable
-
-from crewai.memory.types import MemoryRecord, ScopeInfo
-
-
-@runtime_checkable
-class StorageBackend(Protocol):
-    """Protocol for pluggable memory storage backends."""
-
-    def save(self, records: list[MemoryRecord]) -> None:
-        """Save memory records to storage.
-
-        Args:
-            records: List of memory records to persist.
-        """
-        ...
-
-    def search(
-        self,
-        query_embedding: list[float],
-        scope_prefix: str | None = None,
-        categories: list[str] | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-        limit: int = 10,
-        min_score: float = 0.0,
-    ) -> list[tuple[MemoryRecord, float]]:
-        """Search for memories by vector similarity with optional filters.
-
-        Args:
-            query_embedding: Embedding vector for the query.
-            scope_prefix: Optional scope path prefix to filter results.
-            categories: Optional list of categories to filter by.
-            metadata_filter: Optional metadata key-value filter.
-            limit: Maximum number of results to return.
-            min_score: Minimum similarity score threshold.
-
-        Returns:
-            List of (MemoryRecord, score) tuples ordered by relevance.
-        """
-        ...
-
-    def delete(
-        self,
-        scope_prefix: str | None = None,
-        categories: list[str] | None = None,
-        record_ids: list[str] | None = None,
-        older_than: datetime | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-    ) -> int:
-        """Delete memories matching the given criteria.
-
-        Args:
-            scope_prefix: Optional scope path prefix.
-            categories: Optional list of categories.
-            record_ids: Optional list of record IDs to delete.
-            older_than: Optional cutoff datetime (delete older records).
-            metadata_filter: Optional metadata key-value filter.
-
-        Returns:
-            Number of records deleted.
-        """
-        ...
-
-    def update(self, record: MemoryRecord) -> None:
-        """Update an existing record. Replaces the record with the same ID."""
-        ...
-
-    def get_record(self, record_id: str) -> MemoryRecord | None:
-        """Return a single record by ID, or None if not found.
-
-        Args:
-            record_id: The unique ID of the record.
-
-        Returns:
-            The MemoryRecord, or None if no record with that ID exists.
-        """
-        ...
-
-    def list_records(
-        self,
-        scope_prefix: str | None = None,
-        limit: int = 200,
-        offset: int = 0,
-    ) -> list[MemoryRecord]:
-        """List records in a scope, newest first.
-
-        Args:
-            scope_prefix: Optional scope path prefix to filter by.
-            limit: Maximum number of records to return.
-            offset: Number of records to skip (for pagination).
-
-        Returns:
-            List of MemoryRecord, ordered by created_at descending.
-        """
-        ...
-
-    def get_scope_info(self, scope: str) -> ScopeInfo:
-        """Get information about a scope.
-
-        Args:
-            scope: The scope path.
-
-        Returns:
-            ScopeInfo with record count, categories, date range, child scopes.
-        """
-        ...
-
-    def list_scopes(self, parent: str = "/") -> list[str]:
-        """List immediate child scopes under a parent path.
-
-        Args:
-            parent: Parent scope path (default root).
-
-        Returns:
-            List of immediate child scope paths.
-        """
-        ...
-
-    def list_categories(self, scope_prefix: str | None = None) -> dict[str, int]:
-        """List categories and their counts within a scope.
-
-        Args:
-            scope_prefix: Optional scope to limit to (None = global).
-
-        Returns:
-            Mapping of category name to record count.
-        """
-        ...
-
-    def count(self, scope_prefix: str | None = None) -> int:
-        """Count records in scope (and subscopes).
-
-        Args:
-            scope_prefix: Optional scope path (None = all).
-
-        Returns:
-            Number of records.
-        """
-        ...
-
-    def reset(self, scope_prefix: str | None = None) -> None:
-        """Reset (delete all) memories in scope.
-
-        Args:
-            scope_prefix: Optional scope path (None = reset all).
-        """
-        ...
-
-    async def asave(self, records: list[MemoryRecord]) -> None:
-        """Save memory records asynchronously."""
-        ...
-
-    async def asearch(
-        self,
-        query_embedding: list[float],
-        scope_prefix: str | None = None,
-        categories: list[str] | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-        limit: int = 10,
-        min_score: float = 0.0,
-    ) -> list[tuple[MemoryRecord, float]]:
-        """Search for memories asynchronously."""
-        ...
-
-    async def adelete(
-        self,
-        scope_prefix: str | None = None,
-        categories: list[str] | None = None,
-        record_ids: list[str] | None = None,
-        older_than: datetime | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-    ) -> int:
-        """Delete memories asynchronously."""
-        ...
--- a/lib/crewai/src/crewai/memory/storage/interface.py
+++ b/lib/crewai/src/crewai/memory/storage/interface.py
@@ -0,0 +1,16 @@
+from typing import Any
+
+
+class Storage:
+    """Abstract base class defining the storage interface"""
+
+    def save(self, value: Any, metadata: dict[str, Any]) -> None:
+        pass
+
+    def search(
+        self, query: str, limit: int, score_threshold: float
+    ) -> dict[str, Any] | list[Any]:
+        return {}
+
+    def reset(self) -> None:
+        pass
--- a/lib/crewai/src/crewai/memory/storage/lancedb_storage.py
+++ b/lib/crewai/src/crewai/memory/storage/lancedb_storage.py
@@ -1,536 +0,0 @@
-"""LanceDB storage backend for the unified memory system."""
-
-from __future__ import annotations
-
-from datetime import datetime
-import json
-import logging
-import os
-from pathlib import Path
-import threading
-import time
-from typing import Any, ClassVar
-
-import lancedb
-
-from crewai.memory.types import MemoryRecord, ScopeInfo
-
-
-_logger = logging.getLogger(__name__)
-
-# Default embedding vector dimensionality (matches OpenAI text-embedding-3-small).
-# Used when creating new tables and for zero-vector placeholder scans.
-# Callers can override via the ``vector_dim`` constructor parameter.
-DEFAULT_VECTOR_DIM = 1536
-
-# Safety cap on the number of rows returned by a single scan query.
-# Prevents unbounded memory use when scanning large tables for scope info,
-# listing, or deletion. Internal only -- not user-configurable.
-_SCAN_ROWS_LIMIT = 50_000
-
-# Retry settings for LanceDB commit conflicts (optimistic concurrency).
-# Under heavy write load (many concurrent saves), the table version can
-# advance rapidly. 5 retries with 0.2s base delay (0.2 + 0.4 + 0.8 + 1.6 + 3.2 = 6.2s max)
-# gives enough headroom to catch up with version advancement.
-_MAX_RETRIES = 5
-_RETRY_BASE_DELAY = 0.2  # seconds; doubles on each retry
-
-
-class LanceDBStorage:
-    """LanceDB-backed storage for the unified memory system."""
-
-    # Class-level registry: maps resolved database path -> shared write lock.
-    # When multiple Memory instances (e.g. agent + crew) independently create
-    # LanceDBStorage pointing at the same directory, they share one lock so
-    # their writes don't conflict.
-    # Uses RLock (reentrant) so callers can hold the lock for a batch of
-    # operations while the individual methods re-acquire it without deadlocking.
-    _path_locks: ClassVar[dict[str, threading.RLock]] = {}
-    _path_locks_guard: ClassVar[threading.Lock] = threading.Lock()
-
-    def __init__(
-        self,
-        path: str | Path | None = None,
-        table_name: str = "memories",
-        vector_dim: int | None = None,
-    ) -> None:
-        """Initialize LanceDB storage.
-
-        Args:
-            path: Directory path for the LanceDB database. Defaults to
-                  ``$CREWAI_STORAGE_DIR/memory`` if the env var is set,
-                  otherwise ``db_storage_path() / memory`` (platform data dir).
-            table_name: Name of the table for memory records.
-            vector_dim: Dimensionality of the embedding vector. When ``None``
-                  (default), the dimension is auto-detected from the existing
-                  table schema or from the first saved embedding.
-        """
-        if path is None:
-            storage_dir = os.environ.get("CREWAI_STORAGE_DIR")
-            if storage_dir:
-                path = Path(storage_dir) / "memory"
-            else:
-                from crewai.utilities.paths import db_storage_path
-
-                path = Path(db_storage_path()) / "memory"
-        self._path = Path(path)
-        self._path.mkdir(parents=True, exist_ok=True)
-        self._table_name = table_name
-        self._db = lancedb.connect(str(self._path))
-
-        # Get or create a shared write lock for this database path.
-        resolved = str(self._path.resolve())
-        with LanceDBStorage._path_locks_guard:
-            if resolved not in LanceDBStorage._path_locks:
-                LanceDBStorage._path_locks[resolved] = threading.RLock()
-            self._write_lock = LanceDBStorage._path_locks[resolved]
-
-        # Try to open an existing table and infer dimension from its schema.
-        # If no table exists yet, defer creation until the first save so the
-        # dimension can be auto-detected from the embedder's actual output.
-        try:
-            self._table: lancedb.table.Table | None = self._db.open_table(self._table_name)
-            self._vector_dim: int = self._infer_dim_from_table(self._table)
-        except Exception:
-            self._table = None
-            self._vector_dim = vector_dim or 0  # 0 = not yet known
-
-        # Explicit dim provided: create the table immediately if it doesn't exist.
-        if self._table is None and vector_dim is not None:
-            self._vector_dim = vector_dim
-            self._table = self._create_table(vector_dim)
-
-    @property
-    def write_lock(self) -> threading.RLock:
-        """The shared reentrant write lock for this database path.
-
-        Callers can acquire this to hold the lock across multiple storage
-        operations (e.g. delete + update + save as one atomic batch).
-        Individual methods also acquire it internally, but since it's
-        reentrant (RLock), the same thread won't deadlock.
-        """
-        return self._write_lock
-
-    @staticmethod
-    def _infer_dim_from_table(table: lancedb.table.Table) -> int:
-        """Read vector dimension from an existing table's schema."""
-        schema = table.schema
-        for field in schema:
-            if field.name == "vector":
-                try:
-                    return field.type.list_size
-                except Exception:
-                    break
-        return DEFAULT_VECTOR_DIM
-
-    def _retry_write(self, op: str, *args: Any, **kwargs: Any) -> Any:
-        """Execute a table operation with retry on LanceDB commit conflicts.
-
-        Args:
-            op: Method name on the table object (e.g. "add", "delete").
-            *args, **kwargs: Passed to the table method.
-
-        LanceDB uses optimistic concurrency: if two transactions overlap,
-        the second to commit fails with an ``OSError`` containing
-        "Commit conflict". This helper retries with exponential backoff,
-        refreshing the table reference before each retry so the retried
-        call uses the latest committed version (not a stale reference).
-        """
-        delay = _RETRY_BASE_DELAY
-        for attempt in range(_MAX_RETRIES + 1):
-            try:
-                return getattr(self._table, op)(*args, **kwargs)
-            except OSError as e:  # noqa: PERF203
-                if "Commit conflict" not in str(e) or attempt >= _MAX_RETRIES:
-                    raise
-                _logger.debug(
-                    "LanceDB commit conflict on %s (attempt %d/%d), retrying in %.1fs",
-                    op, attempt + 1, _MAX_RETRIES, delay,
-                )
-                # Refresh table to pick up the latest version before retrying.
-                # The next getattr(self._table, op) will use the fresh table.
-                try:
-                    self._table = self._db.open_table(self._table_name)
-                except Exception:  # noqa: S110
-                    pass  # table refresh is best-effort
-                time.sleep(delay)
-                delay *= 2
-        return None  # unreachable, but satisfies type checker
-
-    def _create_table(self, vector_dim: int) -> lancedb.table.Table:
-        """Create a new table with the given vector dimension."""
-        placeholder = [
-            {
-                "id": "__schema_placeholder__",
-                "content": "",
-                "scope": "/",
-                "categories_str": "[]",
-                "metadata_str": "{}",
-                "importance": 0.5,
-                "created_at": datetime.utcnow().isoformat(),
-                "last_accessed": datetime.utcnow().isoformat(),
-                "source": "",
-                "private": False,
-                "vector": [0.0] * vector_dim,
-            }
-        ]
-        table = self._db.create_table(self._table_name, placeholder)
-        table.delete("id = '__schema_placeholder__'")
-        return table
-
-    def _ensure_table(self, vector_dim: int | None = None) -> lancedb.table.Table:
-        """Return the table, creating it lazily if needed.
-
-        Args:
-            vector_dim: Dimension hint (e.g. from the first embedding).
-                  Falls back to the stored ``_vector_dim`` or ``DEFAULT_VECTOR_DIM``.
-        """
-        if self._table is not None:
-            return self._table
-        dim = vector_dim or self._vector_dim or DEFAULT_VECTOR_DIM
-        self._vector_dim = dim
-        self._table = self._create_table(dim)
-        return self._table
-
-    def _record_to_row(self, record: MemoryRecord) -> dict[str, Any]:
-        return {
-            "id": record.id,
-            "content": record.content,
-            "scope": record.scope,
-            "categories_str": json.dumps(record.categories),
-            "metadata_str": json.dumps(record.metadata),
-            "importance": record.importance,
-            "created_at": record.created_at.isoformat(),
-            "last_accessed": record.last_accessed.isoformat(),
-            "source": record.source or "",
-            "private": record.private,
-            "vector": record.embedding if record.embedding else [0.0] * self._vector_dim,
-        }
-
-    def _row_to_record(self, row: dict[str, Any]) -> MemoryRecord:
-        def _parse_dt(val: Any) -> datetime:
-            if val is None:
-                return datetime.utcnow()
-            if isinstance(val, datetime):
-                return val
-            s = str(val)
-            return datetime.fromisoformat(s.replace("Z", "+00:00"))
-
-        return MemoryRecord(
-            id=str(row["id"]),
-            content=str(row["content"]),
-            scope=str(row["scope"]),
-            categories=json.loads(row["categories_str"]) if row.get("categories_str") else [],
-            metadata=json.loads(row["metadata_str"]) if row.get("metadata_str") else {},
-            importance=float(row.get("importance", 0.5)),
-            created_at=_parse_dt(row.get("created_at")),
-            last_accessed=_parse_dt(row.get("last_accessed")),
-            embedding=row.get("vector"),
-            source=row.get("source") or None,
-            private=bool(row.get("private", False)),
-        )
-
-    def save(self, records: list[MemoryRecord]) -> None:
-        if not records:
-            return
-        # Auto-detect dimension from the first real embedding.
-        dim = None
-        for r in records:
-            if r.embedding and len(r.embedding) > 0:
-                dim = len(r.embedding)
-                break
-        with self._write_lock:
-            self._ensure_table(vector_dim=dim)
-            rows = [self._record_to_row(r) for r in records]
-            for r in rows:
-                if r["vector"] is None or len(r["vector"]) != self._vector_dim:
-                    r["vector"] = [0.0] * self._vector_dim
-            self._retry_write("add", rows)
-
-    def update(self, record: MemoryRecord) -> None:
-        """Update a record by ID. Preserves created_at, updates last_accessed."""
-        with self._write_lock:
-            self._ensure_table()
-            safe_id = str(record.id).replace("'", "''")
-            self._retry_write("delete", f"id = '{safe_id}'")
-            row = self._record_to_row(record)
-            if row["vector"] is None or len(row["vector"]) != self._vector_dim:
-                row["vector"] = [0.0] * self._vector_dim
-            self._retry_write("add", [row])
-
-    def touch_records(self, record_ids: list[str]) -> None:
-        """Update last_accessed to now for the given record IDs.
-
-        Args:
-            record_ids: IDs of records to touch.
-        """
-        if not record_ids or self._table is None:
-            return
-        with self._write_lock:
-            now = datetime.utcnow().isoformat()
-            for rid in record_ids:
-                safe_id = str(rid).replace("'", "''")
-                rows = (
-                    self._table.search([0.0] * self._vector_dim)
-                    .where(f"id = '{safe_id}'")
-                    .limit(1)
-                    .to_list()
-                )
-                if rows:
-                    rows[0]["last_accessed"] = now
-                    self._retry_write("delete", f"id = '{safe_id}'")
-                    self._retry_write("add", [rows[0]])
-
-    def get_record(self, record_id: str) -> MemoryRecord | None:
-        """Return a single record by ID, or None if not found."""
-        if self._table is None:
-            return None
-        safe_id = str(record_id).replace("'", "''")
-        rows = self._table.search([0.0] * self._vector_dim).where(f"id = '{safe_id}'").limit(1).to_list()
-        if not rows:
-            return None
-        return self._row_to_record(rows[0])
-
-    def search(
-        self,
-        query_embedding: list[float],
-        scope_prefix: str | None = None,
-        categories: list[str] | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-        limit: int = 10,
-        min_score: float = 0.0,
-    ) -> list[tuple[MemoryRecord, float]]:
-        if self._table is None:
-            return []
-        query = self._table.search(query_embedding)
-        if scope_prefix is not None and scope_prefix.strip("/"):
-            prefix = scope_prefix.rstrip("/")
-            like_val = prefix + "%"
-            query = query.where(f"scope LIKE '{like_val}'")
-        results = query.limit(limit * 3 if (categories or metadata_filter) else limit).to_list()
-        out: list[tuple[MemoryRecord, float]] = []
-        for row in results:
-            record = self._row_to_record(row)
-            if categories and not any(c in record.categories for c in categories):
-                continue
-            if metadata_filter and not all(record.metadata.get(k) == v for k, v in metadata_filter.items()):
-                continue
-            distance = row.get("_distance", 0.0)
-            score = 1.0 / (1.0 + float(distance)) if distance is not None else 1.0
-            if score >= min_score:
-                out.append((record, score))
-            if len(out) >= limit:
-                break
-        return out[:limit]
-
-    def delete(
-        self,
-        scope_prefix: str | None = None,
-        categories: list[str] | None = None,
-        record_ids: list[str] | None = None,
-        older_than: datetime | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-    ) -> int:
-        if self._table is None:
-            return 0
-        with self._write_lock:
-            if record_ids and not (categories or metadata_filter):
-                before = self._table.count_rows()
-                ids_expr = ", ".join(f"'{rid}'" for rid in record_ids)
-                self._retry_write("delete", f"id IN ({ids_expr})")
-                return before - self._table.count_rows()
-            if categories or metadata_filter:
-                rows = self._scan_rows(scope_prefix)
-                to_delete: list[str] = []
-                for row in rows:
-                    record = self._row_to_record(row)
-                    if categories and not any(c in record.categories for c in categories):
-                        continue
-                    if metadata_filter and not all(record.metadata.get(k) == v for k, v in metadata_filter.items()):
-                        continue
-                    if older_than and record.created_at >= older_than:
-                        continue
-                    to_delete.append(record.id)
-                if not to_delete:
-                    return 0
-                before = self._table.count_rows()
-                ids_expr = ", ".join(f"'{rid}'" for rid in to_delete)
-                self._retry_write("delete", f"id IN ({ids_expr})")
-                return before - self._table.count_rows()
-            conditions = []
-            if scope_prefix is not None and scope_prefix.strip("/"):
-                prefix = scope_prefix.rstrip("/")
-                if not prefix.startswith("/"):
-                    prefix = "/" + prefix
-                conditions.append(f"scope LIKE '{prefix}%' OR scope = '/'")
-            if older_than is not None:
-                conditions.append(f"created_at < '{older_than.isoformat()}'")
-            if not conditions:
-                before = self._table.count_rows()
-                self._retry_write("delete", "id != ''")
-                return before - self._table.count_rows()
-            where_expr = " AND ".join(conditions)
-            before = self._table.count_rows()
-            self._retry_write("delete", where_expr)
-            return before - self._table.count_rows()
-
-    def _scan_rows(self, scope_prefix: str | None = None, limit: int = _SCAN_ROWS_LIMIT) -> list[dict[str, Any]]:
-        """Scan rows optionally filtered by scope prefix."""
-        if self._table is None:
-            return []
-        q = self._table.search([0.0] * self._vector_dim)
-        if scope_prefix is not None and scope_prefix.strip("/"):
-            q = q.where(f"scope LIKE '{scope_prefix.rstrip('/')}%'")
-        return q.limit(limit).to_list()
-
-    def list_records(
-        self, scope_prefix: str | None = None, limit: int = 200, offset: int = 0
-    ) -> list[MemoryRecord]:
-        """List records in a scope, newest first.
-
-        Args:
-            scope_prefix: Optional scope path prefix to filter by.
-            limit: Maximum number of records to return.
-            offset: Number of records to skip (for pagination).
-
-        Returns:
-            List of MemoryRecord, ordered by created_at descending.
-        """
-        rows = self._scan_rows(scope_prefix, limit=limit + offset)
-        records = [self._row_to_record(r) for r in rows]
-        records.sort(key=lambda r: r.created_at, reverse=True)
-        return records[offset : offset + limit]
-
-    def get_scope_info(self, scope: str) -> ScopeInfo:
-        scope = scope.rstrip("/") or "/"
-        prefix = scope if scope != "/" else ""
-        if prefix and not prefix.startswith("/"):
-            prefix = "/" + prefix
-        rows = self._scan_rows(prefix or None)
-        if not rows:
-            return ScopeInfo(
-                path=scope or "/",
-                record_count=0,
-                categories=[],
-                oldest_record=None,
-                newest_record=None,
-                child_scopes=[],
-            )
-        categories_set: set[str] = set()
-        oldest: datetime | None = None
-        newest: datetime | None = None
-        child_prefix = (prefix + "/") if prefix else "/"
-        children: set[str] = set()
-        for row in rows:
-            sc = str(row.get("scope", ""))
-            if child_prefix and sc.startswith(child_prefix):
-                rest = sc[len(child_prefix):]
-                first_component = rest.split("/", 1)[0]
-                if first_component:
-                    children.add(child_prefix + first_component)
-            try:
-                cat_str = row.get("categories_str") or "[]"
-                categories_set.update(json.loads(cat_str))
-            except Exception:  # noqa: S110
-                pass
-            created = row.get("created_at")
-            if created:
-                dt = datetime.fromisoformat(str(created).replace("Z", "+00:00")) if isinstance(created, str) else created
-                if isinstance(dt, datetime):
-                    if oldest is None or dt < oldest:
-                        oldest = dt
-                    if newest is None or dt > newest:
-                        newest = dt
-        return ScopeInfo(
-            path=scope or "/",
-            record_count=len(rows),
-            categories=sorted(categories_set),
-            oldest_record=oldest,
-            newest_record=newest,
-            child_scopes=sorted(children),
-        )
-
-    def list_scopes(self, parent: str = "/") -> list[str]:
-        parent = parent.rstrip("/") or ""
-        prefix = (parent + "/") if parent else "/"
-        rows = self._scan_rows(prefix if prefix != "/" else None)
-        children: set[str] = set()
-        for row in rows:
-            sc = str(row.get("scope", ""))
-            if sc.startswith(prefix) and sc != (prefix.rstrip("/") or "/"):
-                rest = sc[len(prefix):]
-                first_component = rest.split("/", 1)[0]
-                if first_component:
-                    children.add(prefix + first_component)
-        return sorted(children)
-
-    def list_categories(self, scope_prefix: str | None = None) -> dict[str, int]:
-        rows = self._scan_rows(scope_prefix)
-        counts: dict[str, int] = {}
-        for row in rows:
-            cat_str = row.get("categories_str") or "[]"
-            try:
-                parsed = json.loads(cat_str)
-            except Exception:  # noqa: S112
-                continue
-            for c in parsed:
-                counts[c] = counts.get(c, 0) + 1
-        return counts
-
-    def count(self, scope_prefix: str | None = None) -> int:
-        if self._table is None:
-            return 0
-        if scope_prefix is None or scope_prefix.strip("/") == "":
-            return self._table.count_rows()
-        info = self.get_scope_info(scope_prefix)
-        return info.record_count
-
-    def reset(self, scope_prefix: str | None = None) -> None:
-        if scope_prefix is None or scope_prefix.strip("/") == "":
-            if self._table is not None:
-                self._db.drop_table(self._table_name)
-            self._table = None
-            # Dimension is preserved; table will be recreated on next save.
-            return
-        if self._table is None:
-            return
-        prefix = scope_prefix.rstrip("/")
-        if prefix:
-            self._table.delete(f"scope >= '{prefix}' AND scope < '{prefix}/\uFFFF'")
-
-    async def asave(self, records: list[MemoryRecord]) -> None:
-        self.save(records)
-
-    async def asearch(
-        self,
-        query_embedding: list[float],
-        scope_prefix: str | None = None,
-        categories: list[str] | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-        limit: int = 10,
-        min_score: float = 0.0,
-    ) -> list[tuple[MemoryRecord, float]]:
-        return self.search(
-            query_embedding,
-            scope_prefix=scope_prefix,
-            categories=categories,
-            metadata_filter=metadata_filter,
-            limit=limit,
-            min_score=min_score,
-        )
-
-    async def adelete(
-        self,
-        scope_prefix: str | None = None,
-        categories: list[str] | None = None,
-        record_ids: list[str] | None = None,
-        older_than: datetime | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-    ) -> int:
-        return self.delete(
-            scope_prefix=scope_prefix,
-            categories=categories,
-            record_ids=record_ids,
-            older_than=older_than,
-            metadata_filter=metadata_filter,
-        )
--- a/lib/crewai/src/crewai/memory/storage/ltm_sqlite_storage.py
+++ b/lib/crewai/src/crewai/memory/storage/ltm_sqlite_storage.py
@@ -0,0 +1,215 @@
+import json
+from pathlib import Path
+import sqlite3
+from typing import Any
+
+import aiosqlite
+
+from crewai.utilities import Printer
+from crewai.utilities.paths import db_storage_path
+
+
+class LTMSQLiteStorage:
+    """SQLite storage class for long-term memory data."""
+
+    def __init__(self, db_path: str | None = None, verbose: bool = True) -> None:
+        """Initialize the SQLite storage.
+
+        Args:
+            db_path: Optional path to the database file.
+            verbose: Whether to print error messages.
+        """
+        if db_path is None:
+            db_path = str(Path(db_storage_path()) / "long_term_memory_storage.db")
+        self.db_path = db_path
+        self._verbose = verbose
+        self._printer: Printer = Printer()
+        Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
+        self._initialize_db()
+
+    def _initialize_db(self) -> None:
+        """Initialize the SQLite database and create LTM table."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    """
+                    CREATE TABLE IF NOT EXISTS long_term_memories (
+                        id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        task_description TEXT,
+                        metadata TEXT,
+                        datetime TEXT,
+                        score REAL
+                    )
+                """
+                )
+
+                conn.commit()
+        except sqlite3.Error as e:
+            if self._verbose:
+                self._printer.print(
+                    content=f"MEMORY ERROR: An error occurred during database initialization: {e}",
+                    color="red",
+                )
+
+    def save(
+        self,
+        task_description: str,
+        metadata: dict[str, Any],
+        datetime: str,
+        score: int | float,
+    ) -> None:
+        """Saves data to the LTM table with error handling."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    """
+                INSERT INTO long_term_memories (task_description, metadata, datetime, score)
+                VALUES (?, ?, ?, ?)
+            """,
+                    (task_description, json.dumps(metadata), datetime, score),
+                )
+                conn.commit()
+        except sqlite3.Error as e:
+            if self._verbose:
+                self._printer.print(
+                    content=f"MEMORY ERROR: An error occurred while saving to LTM: {e}",
+                    color="red",
+                )
+
+    def load(self, task_description: str, latest_n: int) -> list[dict[str, Any]] | None:
+        """Queries the LTM table by task description with error handling."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    f"""
+                    SELECT metadata, datetime, score
+                    FROM long_term_memories
+                    WHERE task_description = ?
+                    ORDER BY datetime DESC, score ASC
+                    LIMIT {latest_n}
+                """,  # nosec # noqa: S608
+                    (task_description,),
+                )
+                rows = cursor.fetchall()
+                if rows:
+                    return [
+                        {
+                            "metadata": json.loads(row[0]),
+                            "datetime": row[1],
+                            "score": row[2],
+                        }
+                        for row in rows
+                    ]
+
+        except sqlite3.Error as e:
+            if self._verbose:
+                self._printer.print(
+                    content=f"MEMORY ERROR: An error occurred while querying LTM: {e}",
+                    color="red",
+                )
+        return None
+
+    def reset(self) -> None:
+        """Resets the LTM table with error handling."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute("DELETE FROM long_term_memories")
+                conn.commit()
+
+        except sqlite3.Error as e:
+            if self._verbose:
+                self._printer.print(
+                    content=f"MEMORY ERROR: An error occurred while deleting all rows in LTM: {e}",
+                    color="red",
+                )
+
+    async def asave(
+        self,
+        task_description: str,
+        metadata: dict[str, Any],
+        datetime: str,
+        score: int | float,
+    ) -> None:
+        """Save data to the LTM table asynchronously.
+
+        Args:
+            task_description: Description of the task.
+            metadata: Metadata associated with the memory.
+            datetime: Timestamp of the memory.
+            score: Quality score of the memory.
+        """
+        try:
+            async with aiosqlite.connect(self.db_path) as conn:
+                await conn.execute(
+                    """
+                    INSERT INTO long_term_memories (task_description, metadata, datetime, score)
+                    VALUES (?, ?, ?, ?)
+                    """,
+                    (task_description, json.dumps(metadata), datetime, score),
+                )
+                await conn.commit()
+        except aiosqlite.Error as e:
+            if self._verbose:
+                self._printer.print(
+                    content=f"MEMORY ERROR: An error occurred while saving to LTM: {e}",
+                    color="red",
+                )
+
+    async def aload(
+        self, task_description: str, latest_n: int
+    ) -> list[dict[str, Any]] | None:
+        """Query the LTM table by task description asynchronously.
+
+        Args:
+            task_description: Description of the task to search for.
+            latest_n: Maximum number of results to return.
+
+        Returns:
+            List of matching memory entries or None if error occurs.
+        """
+        try:
+            async with aiosqlite.connect(self.db_path) as conn:
+                cursor = await conn.execute(
+                    f"""
+                    SELECT metadata, datetime, score
+                    FROM long_term_memories
+                    WHERE task_description = ?
+                    ORDER BY datetime DESC, score ASC
+                    LIMIT {latest_n}
+                    """,  # nosec # noqa: S608
+                    (task_description,),
+                )
+                rows = await cursor.fetchall()
+                if rows:
+                    return [
+                        {
+                            "metadata": json.loads(row[0]),
+                            "datetime": row[1],
+                            "score": row[2],
+                        }
+                        for row in rows
+                    ]
+        except aiosqlite.Error as e:
+            if self._verbose:
+                self._printer.print(
+                    content=f"MEMORY ERROR: An error occurred while querying LTM: {e}",
+                    color="red",
+                )
+        return None
+
+    async def areset(self) -> None:
+        """Reset the LTM table asynchronously."""
+        try:
+            async with aiosqlite.connect(self.db_path) as conn:
+                await conn.execute("DELETE FROM long_term_memories")
+                await conn.commit()
+        except aiosqlite.Error as e:
+            if self._verbose:
+                self._printer.print(
+                    content=f"MEMORY ERROR: An error occurred while deleting all rows in LTM: {e}",
+                    color="red",
+                )
--- a/lib/crewai/src/crewai/memory/storage/mem0_storage.py
+++ b/lib/crewai/src/crewai/memory/storage/mem0_storage.py
@@ -0,0 +1,230 @@
+from collections import defaultdict
+from collections.abc import Iterable
+import os
+import re
+from typing import Any
+
+from mem0 import Memory, MemoryClient  # type: ignore[import-untyped,import-not-found]
+
+from crewai.memory.storage.interface import Storage
+from crewai.rag.chromadb.utils import _sanitize_collection_name
+
+
+MAX_AGENT_ID_LENGTH_MEM0 = 255
+
+
+class Mem0Storage(Storage):
+    """
+    Extends Storage to handle embedding and searching across entities using Mem0.
+    """
+
+    def __init__(self, type, crew=None, config=None):
+        super().__init__()
+
+        self._validate_type(type)
+        self.memory_type = type
+        self.crew = crew
+        self.config = config or {}
+
+        self._extract_config_values()
+        self._initialize_memory()
+
+    def _validate_type(self, type):
+        supported_types = {"short_term", "long_term", "entities", "external"}
+        if type not in supported_types:
+            raise ValueError(
+                f"Invalid type '{type}' for Mem0Storage. "
+                f"Must be one of: {', '.join(supported_types)}"
+            )
+
+    def _extract_config_values(self):
+        self.mem0_run_id = self.config.get("run_id")
+        self.includes = self.config.get("includes")
+        self.excludes = self.config.get("excludes")
+        self.custom_categories = self.config.get("custom_categories")
+        self.infer = self.config.get("infer", True)
+
+    def _initialize_memory(self):
+        api_key = self.config.get("api_key") or os.getenv("MEM0_API_KEY")
+        org_id = self.config.get("org_id")
+        project_id = self.config.get("project_id")
+        local_config = self.config.get("local_mem0_config")
+
+        if api_key:
+            self.memory = (
+                MemoryClient(api_key=api_key, org_id=org_id, project_id=project_id)
+                if org_id and project_id
+                else MemoryClient(api_key=api_key)
+            )
+            if self.custom_categories:
+                self.memory.update_project(custom_categories=self.custom_categories)
+        else:
+            self.memory = (
+                Memory.from_config(local_config)
+                if local_config and len(local_config)
+                else Memory()
+            )
+
+    def _create_filter_for_search(self):
+        """
+        Returns:
+            dict: A filter dictionary containing AND conditions for querying data.
+                - Includes user_id and agent_id if both are present.
+                - Includes user_id if only user_id is present.
+                - Includes agent_id if only agent_id is present.
+                - Includes run_id if memory_type is 'short_term' and
+                  mem0_run_id is present.
+        """
+        filter = defaultdict(list)
+
+        if self.memory_type == "short_term" and self.mem0_run_id:
+            filter["AND"].append({"run_id": self.mem0_run_id})
+        else:
+            user_id = self.config.get("user_id", "")
+            agent_id = self.config.get("agent_id", "")
+
+            if user_id and agent_id:
+                filter["OR"].append({"user_id": user_id})
+                filter["OR"].append({"agent_id": agent_id})
+            elif user_id:
+                filter["AND"].append({"user_id": user_id})
+            elif agent_id:
+                filter["AND"].append({"agent_id": agent_id})
+
+        return filter
+
+    def save(self, value: Any, metadata: dict[str, Any]) -> None:
+        def _last_content(messages: Iterable[dict[str, Any]], role: str) -> str:
+            return next(
+                (
+                    m.get("content", "")
+                    for m in reversed(list(messages))
+                    if m.get("role") == role
+                ),
+                "",
+            )
+
+        conversations = []
+        messages = metadata.pop("messages", None)
+        if messages:
+            last_user = _last_content(messages, "user")
+            last_assistant = _last_content(messages, "assistant")
+
+            if user_msg := self._get_user_message(last_user):
+                conversations.append({"role": "user", "content": user_msg})
+
+            if assistant_msg := self._get_assistant_message(last_assistant):
+                conversations.append({"role": "assistant", "content": assistant_msg})
+        else:
+            conversations.append({"role": "assistant", "content": value})
+
+        user_id = self.config.get("user_id", "")
+
+        base_metadata = {
+            "short_term": "short_term",
+            "long_term": "long_term",
+            "entities": "entity",
+            "external": "external",
+        }
+
+        # Shared base params
+        params: dict[str, Any] = {
+            "metadata": {"type": base_metadata[self.memory_type], **metadata},
+            "infer": self.infer,
+        }
+
+        # MemoryClient-specific overrides
+        if isinstance(self.memory, MemoryClient):
+            params["includes"] = self.includes
+            params["excludes"] = self.excludes
+            params["output_format"] = "v1.1"
+            params["version"] = "v2"
+
+        if self.memory_type == "short_term" and self.mem0_run_id:
+            params["run_id"] = self.mem0_run_id
+
+        if user_id:
+            params["user_id"] = user_id
+
+        if agent_id := self.config.get("agent_id", self._get_agent_name()):
+            params["agent_id"] = agent_id
+
+        self.memory.add(conversations, **params)
+
+    def search(
+        self, query: str, limit: int = 5, score_threshold: float = 0.6
+    ) -> list[Any]:
+        params = {
+            "query": query,
+            "limit": limit,
+            "version": "v2",
+            "output_format": "v1.1",
+        }
+
+        if user_id := self.config.get("user_id", ""):
+            params["user_id"] = user_id
+
+        memory_type_map = {
+            "short_term": {"type": "short_term"},
+            "long_term": {"type": "long_term"},
+            "entities": {"type": "entity"},
+            "external": {"type": "external"},
+        }
+
+        if self.memory_type in memory_type_map:
+            params["metadata"] = memory_type_map[self.memory_type]
+            if self.memory_type == "short_term":
+                params["run_id"] = self.mem0_run_id
+
+        # Discard the filters for now since we create the filters
+        # automatically when the crew is created.
+
+        params["filters"] = self._create_filter_for_search()
+        params["threshold"] = score_threshold
+
+        if isinstance(self.memory, Memory):
+            del params["metadata"], params["version"], params["output_format"]
+            if params.get("run_id"):
+                del params["run_id"]
+
+        results = self.memory.search(**params)
+
+        # This makes it compatible for Contextual Memory to retrieve
+        for result in results["results"]:
+            result["content"] = result["memory"]
+
+        return [r for r in results["results"]]
+
+    def reset(self):
+        if self.memory:
+            self.memory.reset()
+
+    def _sanitize_role(self, role: str) -> str:
+        """
+        Sanitizes agent roles to ensure valid directory names.
+        """
+        return role.replace("\n", "").replace(" ", "_").replace("/", "_")
+
+    def _get_agent_name(self) -> str:
+        if not self.crew:
+            return ""
+
+        agents = self.crew.agents
+        agents = [self._sanitize_role(agent.role) for agent in agents]
+        agents = "_".join(agents)
+        return _sanitize_collection_name(
+            name=agents, max_collection_length=MAX_AGENT_ID_LENGTH_MEM0
+        )
+
+    def _get_assistant_message(self, text: str) -> str:
+        marker = "Final Answer:"
+        if marker in text:
+            return text.split(marker, 1)[1].strip()
+        return text
+
+    def _get_user_message(self, text: str) -> str:
+        pattern = r"User message:\s*(.*)"
+        match = re.search(pattern, text)
+        if match:
+            return match.group(1).strip()
+        return text
--- a/lib/crewai/src/crewai/memory/storage/rag_storage.py
+++ b/lib/crewai/src/crewai/memory/storage/rag_storage.py
@@ -0,0 +1,315 @@
+from __future__ import annotations
+
+import logging
+import traceback
+from typing import TYPE_CHECKING, Any, cast
+import warnings
+
+from crewai.rag.chromadb.config import ChromaDBConfig
+from crewai.rag.chromadb.types import ChromaEmbeddingFunctionWrapper
+from crewai.rag.config.utils import get_rag_client
+from crewai.rag.embeddings.factory import build_embedder
+from crewai.rag.factory import create_client
+from crewai.rag.storage.base_rag_storage import BaseRAGStorage
+from crewai.utilities.constants import MAX_FILE_NAME_LENGTH
+from crewai.utilities.paths import db_storage_path
+
+
+if TYPE_CHECKING:
+    from crewai.crew import Crew
+    from crewai.rag.core.base_client import BaseClient
+    from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
+    from crewai.rag.embeddings.types import ProviderSpec
+    from crewai.rag.types import BaseRecord
+
+
+class RAGStorage(BaseRAGStorage):
+    """
+    Extends Storage to handle embeddings for memory entries, improving
+    search efficiency.
+    """
+
+    def __init__(
+        self,
+        type: str,
+        allow_reset: bool = True,
+        embedder_config: ProviderSpec | BaseEmbeddingsProvider[Any] | None = None,
+        crew: Crew | None = None,
+        path: str | None = None,
+    ) -> None:
+        super().__init__(type, allow_reset, embedder_config, crew)
+        crew_agents = crew.agents if crew else []
+        sanitized_roles = [self._sanitize_role(agent.role) for agent in crew_agents]
+        agents_str = "_".join(sanitized_roles)
+        self.agents = agents_str
+        self.storage_file_name = self._build_storage_file_name(type, agents_str)
+
+        self.type = type
+        self._client: BaseClient | None = None
+
+        self.allow_reset = allow_reset
+        self.path = path
+
+        warnings.filterwarnings(
+            "ignore",
+            message=r".*'model_fields'.*is deprecated.*",
+            module=r"^chromadb(\.|$)",
+        )
+
+        if self.embedder_config:
+            embedding_function = build_embedder(self.embedder_config)
+
+            try:
+                _ = embedding_function(["test"])
+            except Exception as e:
+                provider = (
+                    self.embedder_config["provider"]
+                    if isinstance(self.embedder_config, dict)
+                    else self.embedder_config.__class__.__name__.replace(
+                        "Provider", ""
+                    ).lower()
+                )
+                raise ValueError(
+                    f"Failed to initialize embedder. Please check your configuration or connection.\n"
+                    f"Provider: {provider}\n"
+                    f"Error: {e}"
+                ) from e
+
+            batch_size = None
+            if (
+                isinstance(self.embedder_config, dict)
+                and "config" in self.embedder_config
+            ):
+                nested_config = self.embedder_config["config"]
+                if isinstance(nested_config, dict):
+                    batch_size = nested_config.get("batch_size")
+
+            if batch_size is not None:
+                config = ChromaDBConfig(
+                    embedding_function=cast(
+                        ChromaEmbeddingFunctionWrapper, embedding_function
+                    ),
+                    batch_size=cast(int, batch_size),
+                )
+            else:
+                config = ChromaDBConfig(
+                    embedding_function=cast(
+                        ChromaEmbeddingFunctionWrapper, embedding_function
+                    )
+                )
+
+            if self.path:
+                config.settings.persist_directory = self.path
+
+            self._client = create_client(config)
+
+    def _get_client(self) -> BaseClient:
+        """Get the appropriate client - instance-specific or global."""
+        return self._client if self._client else get_rag_client()
+
+    def _sanitize_role(self, role: str) -> str:
+        """
+        Sanitizes agent roles to ensure valid directory names.
+        """
+        return role.replace("\n", "").replace(" ", "_").replace("/", "_")
+
+    @staticmethod
+    def _build_storage_file_name(type: str, file_name: str) -> str:
+        """
+        Ensures file name does not exceed max allowed by OS
+        """
+        base_path = f"{db_storage_path()}/{type}"
+
+        if len(file_name) > MAX_FILE_NAME_LENGTH:
+            logging.warning(
+                f"Trimming file name from {len(file_name)} to {MAX_FILE_NAME_LENGTH} characters."
+            )
+            file_name = file_name[:MAX_FILE_NAME_LENGTH]
+
+        return f"{base_path}/{file_name}"
+
+    def save(self, value: Any, metadata: dict[str, Any]) -> None:
+        """Save a value to storage.
+
+        Args:
+            value: The value to save.
+            metadata: Metadata to associate with the value.
+        """
+        try:
+            client = self._get_client()
+            collection_name = (
+                f"memory_{self.type}_{self.agents}"
+                if self.agents
+                else f"memory_{self.type}"
+            )
+            client.get_or_create_collection(collection_name=collection_name)
+
+            document: BaseRecord = {"content": value}
+            if metadata:
+                document["metadata"] = metadata
+
+            batch_size = None
+            if (
+                self.embedder_config
+                and isinstance(self.embedder_config, dict)
+                and "config" in self.embedder_config
+            ):
+                nested_config = self.embedder_config["config"]
+                if isinstance(nested_config, dict):
+                    batch_size = nested_config.get("batch_size")
+
+            if batch_size is not None:
+                client.add_documents(
+                    collection_name=collection_name,
+                    documents=[document],
+                    batch_size=cast(int, batch_size),
+                )
+            else:
+                client.add_documents(
+                    collection_name=collection_name, documents=[document]
+                )
+        except Exception as e:
+            logging.error(
+                f"Error during {self.type} save: {e!s}\n{traceback.format_exc()}"
+            )
+
+    async def asave(self, value: Any, metadata: dict[str, Any]) -> None:
+        """Save a value to storage asynchronously.
+
+        Args:
+            value: The value to save.
+            metadata: Metadata to associate with the value.
+        """
+        try:
+            client = self._get_client()
+            collection_name = (
+                f"memory_{self.type}_{self.agents}"
+                if self.agents
+                else f"memory_{self.type}"
+            )
+            await client.aget_or_create_collection(collection_name=collection_name)
+
+            document: BaseRecord = {"content": value}
+            if metadata:
+                document["metadata"] = metadata
+
+            batch_size = None
+            if (
+                self.embedder_config
+                and isinstance(self.embedder_config, dict)
+                and "config" in self.embedder_config
+            ):
+                nested_config = self.embedder_config["config"]
+                if isinstance(nested_config, dict):
+                    batch_size = nested_config.get("batch_size")
+
+            if batch_size is not None:
+                await client.aadd_documents(
+                    collection_name=collection_name,
+                    documents=[document],
+                    batch_size=cast(int, batch_size),
+                )
+            else:
+                await client.aadd_documents(
+                    collection_name=collection_name, documents=[document]
+                )
+        except Exception as e:
+            logging.error(
+                f"Error during {self.type} async save: {e!s}\n{traceback.format_exc()}"
+            )
+
+    def search(
+        self,
+        query: str,
+        limit: int = 5,
+        filter: dict[str, Any] | None = None,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search for matching entries in storage.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            filter: Optional metadata filter.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching entries.
+        """
+        try:
+            client = self._get_client()
+            collection_name = (
+                f"memory_{self.type}_{self.agents}"
+                if self.agents
+                else f"memory_{self.type}"
+            )
+            return client.search(
+                collection_name=collection_name,
+                query=query,
+                limit=limit,
+                metadata_filter=filter,
+                score_threshold=score_threshold,
+            )
+        except Exception as e:
+            logging.error(
+                f"Error during {self.type} search: {e!s}\n{traceback.format_exc()}"
+            )
+            return []
+
+    async def asearch(
+        self,
+        query: str,
+        limit: int = 5,
+        filter: dict[str, Any] | None = None,
+        score_threshold: float = 0.6,
+    ) -> list[Any]:
+        """Search for matching entries in storage asynchronously.
+
+        Args:
+            query: The search query.
+            limit: Maximum number of results to return.
+            filter: Optional metadata filter.
+            score_threshold: Minimum similarity score for results.
+
+        Returns:
+            List of matching entries.
+        """
+        try:
+            client = self._get_client()
+            collection_name = (
+                f"memory_{self.type}_{self.agents}"
+                if self.agents
+                else f"memory_{self.type}"
+            )
+            return await client.asearch(
+                collection_name=collection_name,
+                query=query,
+                limit=limit,
+                metadata_filter=filter,
+                score_threshold=score_threshold,
+            )
+        except Exception as e:
+            logging.error(
+                f"Error during {self.type} async search: {e!s}\n{traceback.format_exc()}"
+            )
+            return []
+
+    def reset(self) -> None:
+        try:
+            client = self._get_client()
+            collection_name = (
+                f"memory_{self.type}_{self.agents}"
+                if self.agents
+                else f"memory_{self.type}"
+            )
+            client.delete_collection(collection_name=collection_name)
+        except Exception as e:
+            if "attempt to write a readonly database" in str(
+                e
+            ) or "does not exist" in str(e):
+                # Ignore readonly database and collection not found errors (already reset)
+                pass
+            else:
+                raise Exception(
+                    f"An error occurred while resetting the {self.type} memory: {e}"
+                ) from e
--- a/lib/crewai/src/crewai/memory/types.py
+++ b/lib/crewai/src/crewai/memory/types.py
@@ -1,369 +0,0 @@
-"""Data types for the unified memory system."""
-
-from __future__ import annotations
-
-from datetime import datetime
-from typing import Any
-from uuid import uuid4
-
-from pydantic import BaseModel, Field
-
-
-# When searching the vector store, we ask for more results than the caller
-# requested so that post-search steps (composite scoring, deduplication,
-# category filtering) have enough candidates to fill the final result set.
-# For example, if the caller asks for 10 results and this is 2, we fetch 20
-# from the vector store and then trim down after scoring.
-_RECALL_OVERSAMPLE_FACTOR = 2
-
-
-class MemoryRecord(BaseModel):
-    """A single memory entry stored in the memory system."""
-
-    id: str = Field(
-        default_factory=lambda: str(uuid4()),
-        description="Unique identifier for the memory record.",
-    )
-    content: str = Field(description="The textual content of the memory.")
-    scope: str = Field(
-        default="/",
-        description="Hierarchical path organizing the memory (e.g. /company/team/user).",
-    )
-    categories: list[str] = Field(
-        default_factory=list,
-        description="Categories or tags for the memory.",
-    )
-    metadata: dict[str, Any] = Field(
-        default_factory=dict,
-        description="Arbitrary metadata associated with the memory.",
-    )
-    importance: float = Field(
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-        description="Importance score from 0.0 to 1.0, affects retrieval ranking.",
-    )
-    created_at: datetime = Field(
-        default_factory=datetime.utcnow,
-        description="When the memory was created.",
-    )
-    last_accessed: datetime = Field(
-        default_factory=datetime.utcnow,
-        description="When the memory was last accessed.",
-    )
-    embedding: list[float] | None = Field(
-        default=None,
-        description="Vector embedding for semantic search. Computed on save if not provided.",
-    )
-    source: str | None = Field(
-        default=None,
-        description=(
-            "Origin of this memory (e.g. user ID, session ID). "
-            "Used for provenance tracking and privacy filtering."
-        ),
-    )
-    private: bool = Field(
-        default=False,
-        description=(
-            "If True, this memory is only visible to recall requests from the same source, "
-            "or when include_private=True is passed."
-        ),
-    )
-
-
-class MemoryMatch(BaseModel):
-    """A memory record with relevance score from a recall operation."""
-
-    record: MemoryRecord = Field(description="The matched memory record.")
-    score: float = Field(
-        description="Combined relevance score (semantic, recency, importance).",
-    )
-    match_reasons: list[str] = Field(
-        default_factory=list,
-        description="Reasons for the match (e.g. semantic, recency, importance).",
-    )
-    evidence_gaps: list[str] = Field(
-        default_factory=list,
-        description="Information the system looked for but could not find.",
-    )
-
-
-class ScopeInfo(BaseModel):
-    """Information about a scope in the memory hierarchy."""
-
-    path: str = Field(description="The scope path (e.g. /company/engineering).")
-    record_count: int = Field(
-        default=0,
-        description="Number of records in this scope (including subscopes if applicable).",
-    )
-    categories: list[str] = Field(
-        default_factory=list,
-        description="Categories used in this scope.",
-    )
-    oldest_record: datetime | None = Field(
-        default=None,
-        description="Timestamp of the oldest record in this scope.",
-    )
-    newest_record: datetime | None = Field(
-        default=None,
-        description="Timestamp of the newest record in this scope.",
-    )
-    child_scopes: list[str] = Field(
-        default_factory=list,
-        description="Immediate child scope paths.",
-    )
-
-
-class MemoryConfig(BaseModel):
-    """Internal configuration for memory scoring, consolidation, and recall behavior.
-
-    Users configure these values via ``Memory(...)`` keyword arguments.
-    This model is not part of the public API -- it exists so that the config
-    can be passed as a single object to RecallFlow, EncodingFlow, and
-    compute_composite_score.
-    """
-
-    # -- Composite score weights --
-    # The recall composite score is:
-    #   semantic_weight * similarity + recency_weight * decay + importance_weight * importance
-    # These should sum to ~1.0 for intuitive 0-1 scoring.
-
-    recency_weight: float = Field(
-        default=0.3,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "Weight for recency in the composite relevance score. "
-            "Higher values favor recently created memories over older ones."
-        ),
-    )
-    semantic_weight: float = Field(
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "Weight for semantic similarity in the composite relevance score. "
-            "Higher values make recall rely more on vector-search closeness."
-        ),
-    )
-    importance_weight: float = Field(
-        default=0.2,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "Weight for explicit importance in the composite relevance score. "
-            "Higher values make high-importance memories surface more often."
-        ),
-    )
-    recency_half_life_days: int = Field(
-        default=30,
-        ge=1,
-        description=(
-            "Number of days for the recency score to halve (exponential decay). "
-            "Lower values make memories lose relevance faster; higher values "
-            "keep old memories relevant longer."
-        ),
-    )
-
-    # -- Consolidation (on save) --
-
-    consolidation_threshold: float = Field(
-        default=0.85,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "Semantic similarity above which the consolidation flow is triggered "
-            "when saving new content. The LLM then decides whether to merge, "
-            "update, or delete overlapping records. Set to 1.0 to disable."
-        ),
-    )
-    consolidation_limit: int = Field(
-        default=5,
-        ge=1,
-        description=(
-            "Maximum number of existing records to compare against when checking "
-            "for consolidation during a save."
-        ),
-    )
-    batch_dedup_threshold: float = Field(
-        default=0.98,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "Cosine similarity threshold for dropping near-exact duplicates "
-            "within a single remember_many() batch. Only items with similarity "
-            ">= this value are dropped. Set very high (0.98) to avoid "
-            "discarding useful memories that are merely similar."
-        ),
-    )
-
-    # -- Save defaults --
-
-    default_importance: float = Field(
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "Importance assigned to new memories when no explicit value is given "
-            "and the LLM analysis path is skipped (i.e. all fields provided by "
-            "the caller)."
-        ),
-    )
-
-    # -- Recall depth control --
-    # The RecallFlow router uses these thresholds to decide between returning
-    # results immediately ("synthesize") and doing an extra LLM-driven
-    # exploration round ("explore_deeper").
-
-    confidence_threshold_high: float = Field(
-        default=0.8,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "When recall confidence is at or above this value, results are "
-            "returned directly without deeper exploration."
-        ),
-    )
-    confidence_threshold_low: float = Field(
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "When recall confidence is below this value and exploration budget "
-            "remains, a deeper LLM-driven exploration round is triggered."
-        ),
-    )
-    complex_query_threshold: float = Field(
-        default=0.7,
-        ge=0.0,
-        le=1.0,
-        description=(
-            "For queries classified as 'complex' by the LLM, deeper exploration "
-            "is triggered when confidence is below this value."
-        ),
-    )
-    exploration_budget: int = Field(
-        default=1,
-        ge=0,
-        description=(
-            "Number of LLM-driven exploration rounds allowed during deep recall. "
-            "0 means recall always uses direct vector search only; higher values "
-            "allow more thorough but slower retrieval."
-        ),
-    )
-    recall_oversample_factor: int = Field(
-        default=_RECALL_OVERSAMPLE_FACTOR,
-        ge=1,
-        description=(
-            "When searching the vector store, fetch this many times more results "
-            "than the caller requested so that post-search steps (composite "
-            "scoring, deduplication, category filtering) have enough candidates "
-            "to fill the final result set."
-        ),
-    )
-    query_analysis_threshold: int = Field(
-        default=250,
-        ge=0,
-        description=(
-            "Character count threshold for LLM query analysis during deep recall. "
-            "Queries shorter than this are embedded directly without an LLM call "
-            "to distill sub-queries or infer scopes (saving ~1-3s). Longer queries "
-            "(e.g. full task descriptions) benefit from LLM distillation. "
-            "Set to 0 to always use LLM analysis."
-        ),
-    )
-
-
-def embed_text(embedder: Any, text: str) -> list[float]:
-    """Embed a single text string and return a list of floats.
-
-    Args:
-        embedder: Callable that accepts a list of strings and returns embeddings.
-        text: The text to embed.
-
-    Returns:
-        List of floats representing the embedding, or empty list on failure.
-    """
-    if not text or not text.strip():
-        return []
-    result = embedder([text])
-    if not result:
-        return []
-    first = result[0]
-    if hasattr(first, "tolist"):
-        return first.tolist()
-    if isinstance(first, list):
-        return [float(x) for x in first]
-    return list(first)
-
-
-def embed_texts(embedder: Any, texts: list[str]) -> list[list[float]]:
-    """Embed multiple texts in a single API call.
-
-    The embedder already accepts ``list[str]``, so this just calls it once
-    with the full batch and normalises the output format.
-
-    Args:
-        embedder: Callable that accepts a list of strings and returns embeddings.
-        texts: List of texts to embed.
-
-    Returns:
-        List of embeddings, one per input text. Empty texts produce empty lists.
-    """
-    if not texts:
-        return []
-    # Filter out empty texts, remembering their positions
-    valid: list[tuple[int, str]] = [
-        (i, t) for i, t in enumerate(texts) if t and t.strip()
-    ]
-    if not valid:
-        return [[] for _ in texts]
-
-    result = embedder([t for _, t in valid])
-    embeddings: list[list[float]] = [[] for _ in texts]
-    for (orig_idx, _), emb in zip(valid, result, strict=False):
-        if hasattr(emb, "tolist"):
-            embeddings[orig_idx] = emb.tolist()
-        elif isinstance(emb, list):
-            embeddings[orig_idx] = [float(x) for x in emb]
-        else:
-            embeddings[orig_idx] = list(emb)
-    return embeddings
-
-
-def compute_composite_score(
-    record: MemoryRecord,
-    semantic_score: float,
-    config: MemoryConfig,
-) -> tuple[float, list[str]]:
-    """Compute a weighted composite relevance score from semantic, recency, and importance.
-
-    composite = w_semantic * semantic + w_recency * decay + w_importance * importance
-    where decay = 0.5^(age_days / half_life_days).
-
-    Args:
-        record: The memory record (provides created_at and importance).
-        semantic_score: Raw semantic similarity from vector search, in [0, 1].
-        config: Weights and recency half-life.
-
-    Returns:
-        Tuple of (composite_score, match_reasons). match_reasons includes
-        "semantic" always; "recency" if decay > 0.5; "importance" if record.importance > 0.5.
-    """
-    age_seconds = (datetime.utcnow() - record.created_at).total_seconds()
-    age_days = max(age_seconds / 86400.0, 0.0)
-    decay = 0.5 ** (age_days / config.recency_half_life_days)
-
-    composite = (
-        config.semantic_weight * semantic_score
-        + config.recency_weight * decay
-        + config.importance_weight * record.importance
-    )
-
-    reasons: list[str] = ["semantic"]
-    if decay > 0.5:
-        reasons.append("recency")
-    if record.importance > 0.5:
-        reasons.append("importance")
-
-    return composite, reasons
--- a/lib/crewai/src/crewai/memory/unified_memory.py
+++ b/lib/crewai/src/crewai/memory/unified_memory.py
@@ -1,838 +0,0 @@
-"""Unified Memory class: single intelligent memory with LLM analysis and pluggable storage."""
-
-from __future__ import annotations
-
-from concurrent.futures import Future, ThreadPoolExecutor
-from datetime import datetime
-import threading
-import time
-from typing import Any, Literal
-
-from crewai.events.event_bus import crewai_event_bus
-from crewai.events.types.memory_events import (
-    MemoryQueryCompletedEvent,
-    MemoryQueryFailedEvent,
-    MemoryQueryStartedEvent,
-    MemorySaveCompletedEvent,
-    MemorySaveFailedEvent,
-    MemorySaveStartedEvent,
-)
-from crewai.llms.base_llm import BaseLLM
-from crewai.memory.analyze import extract_memories_from_content
-from crewai.memory.recall_flow import RecallFlow
-from crewai.memory.storage.backend import StorageBackend
-from crewai.memory.storage.lancedb_storage import LanceDBStorage
-from crewai.memory.types import (
-    MemoryConfig,
-    MemoryMatch,
-    MemoryRecord,
-    ScopeInfo,
-    compute_composite_score,
-    embed_text,
-)
-
-
-def _default_embedder() -> Any:
-    """Build default OpenAI embedder for memory."""
-    from crewai.rag.embeddings.factory import build_embedder
-
-    return build_embedder({"provider": "openai", "config": {}})
-
-
-class Memory:
-    """Unified memory: standalone, LLM-analyzed, with intelligent recall flow.
-
-    Works without agent/crew. Uses LLM to infer scope, categories, importance on save.
-    Uses RecallFlow for adaptive-depth recall. Supports scope/slice views and
-    pluggable storage (LanceDB default).
-    """
-
-    def __init__(
-        self,
-        llm: BaseLLM | str = "gpt-4o-mini",
-        storage: StorageBackend | str = "lancedb",
-        embedder: Any = None,
-        # -- Scoring weights --
-        # These three weights control how recall results are ranked.
-        # The composite score is: semantic_weight * similarity + recency_weight * decay + importance_weight * importance.
-        # They should sum to ~1.0 for intuitive scoring.
-        recency_weight: float = 0.3,
-        semantic_weight: float = 0.5,
-        importance_weight: float = 0.2,
-        # How quickly old memories lose relevance. The recency score halves every
-        # N days (exponential decay). Lower = faster forgetting; higher = longer relevance.
-        recency_half_life_days: int = 30,
-        # -- Consolidation --
-        # When remembering new content, if an existing record has similarity >= this
-        # threshold, the LLM is asked to merge/update/delete. Set to 1.0 to disable.
-        consolidation_threshold: float = 0.85,
-        # Max existing records to compare against when checking for consolidation.
-        consolidation_limit: int = 5,
-        # -- Save defaults --
-        # Importance assigned to new memories when no explicit value is given and
-        # the LLM analysis path is skipped (all fields provided by the caller).
-        default_importance: float = 0.5,
-        # -- Recall depth control --
-        # These thresholds govern the RecallFlow router that decides between
-        # returning results immediately ("synthesize") vs. doing an extra
-        # LLM-driven exploration round ("explore_deeper").
-        #   confidence >= confidence_threshold_high  => always synthesize
-        #   confidence <  confidence_threshold_low   => explore deeper (if budget > 0)
-        #   complex query + confidence < complex_query_threshold => explore deeper
-        confidence_threshold_high: float = 0.8,
-        confidence_threshold_low: float = 0.5,
-        complex_query_threshold: float = 0.7,
-        # How many LLM-driven exploration rounds the RecallFlow is allowed to run.
-        # 0 = always shallow (vector search only); higher = more thorough but slower.
-        exploration_budget: int = 1,
-        # Queries shorter than this skip LLM analysis (saving ~1-3s).
-        # Longer queries (full task descriptions) benefit from LLM distillation.
-        query_analysis_threshold: int = 200,
-    ) -> None:
-        """Initialize Memory.
-
-        Args:
-            llm: LLM for analysis (model name or BaseLLM instance).
-            storage: Backend: "lancedb" or a StorageBackend instance.
-            embedder: Embedding callable, provider config dict, or None (default OpenAI).
-            recency_weight: Weight for recency in the composite relevance score.
-            semantic_weight: Weight for semantic similarity in the composite relevance score.
-            importance_weight: Weight for importance in the composite relevance score.
-            recency_half_life_days: Recency score halves every N days (exponential decay).
-            consolidation_threshold: Similarity above which consolidation is triggered on save.
-            consolidation_limit: Max existing records to compare during consolidation.
-            default_importance: Default importance when not provided or inferred.
-            confidence_threshold_high: Recall confidence above which results are returned directly.
-            confidence_threshold_low: Recall confidence below which deeper exploration is triggered.
-            complex_query_threshold: For complex queries, explore deeper below this confidence.
-            exploration_budget: Number of LLM-driven exploration rounds during deep recall.
-            query_analysis_threshold: Queries shorter than this skip LLM analysis during deep recall.
-        """
-        self._config = MemoryConfig(
-            recency_weight=recency_weight,
-            semantic_weight=semantic_weight,
-            importance_weight=importance_weight,
-            recency_half_life_days=recency_half_life_days,
-            consolidation_threshold=consolidation_threshold,
-            consolidation_limit=consolidation_limit,
-            default_importance=default_importance,
-            confidence_threshold_high=confidence_threshold_high,
-            confidence_threshold_low=confidence_threshold_low,
-            complex_query_threshold=complex_query_threshold,
-            exploration_budget=exploration_budget,
-            query_analysis_threshold=query_analysis_threshold,
-        )
-
-        # Store raw config for lazy initialization. LLM and embedder are only
-        # built on first access so that Memory() never fails at construction
-        # time (e.g. when auto-created by Flow without an API key set).
-        self._llm_config: BaseLLM | str = llm
-        self._llm_instance: BaseLLM | None = None if isinstance(llm, str) else llm
-        self._embedder_config: Any = embedder
-        self._embedder_instance: Any = (
-            embedder if (embedder is not None and not isinstance(embedder, dict)) else None
-        )
-
-        # Storage is initialized eagerly (local, no API key needed).
-        if storage == "lancedb":
-            self._storage = LanceDBStorage()
-        elif isinstance(storage, str):
-            self._storage = LanceDBStorage(path=storage)
-        else:
-            self._storage = storage
-
-        # Background save queue. max_workers=1 serializes saves to avoid
-        # concurrent storage mutations (two saves finding the same similar
-        # record and both trying to update/delete it). Within each save,
-        # the parallel LLM calls still run on their own thread pool.
-        self._save_pool = ThreadPoolExecutor(
-            max_workers=1, thread_name_prefix="memory-save"
-        )
-        self._pending_saves: list[Future[Any]] = []
-        self._pending_lock = threading.Lock()
-
-    _MEMORY_DOCS_URL = "https://docs.crewai.com/concepts/memory"
-
-    @property
-    def _llm(self) -> BaseLLM:
-        """Lazy LLM initialization -- only created when first needed."""
-        if self._llm_instance is None:
-            from crewai.llm import LLM
-
-            try:
-                self._llm_instance = LLM(model=self._llm_config)
-            except Exception as e:
-                raise RuntimeError(
-                    f"Memory requires an LLM for analysis but initialization failed: {e}\n\n"
-                    "To fix this, do one of the following:\n"
-                    '  - Set OPENAI_API_KEY for the default model (gpt-4o-mini)\n'
-                    '  - Pass a different model: Memory(llm="anthropic/claude-3-haiku-20240307")\n'
-                    '  - Pass any LLM instance: Memory(llm=LLM(model="your-model"))\n'
-                    "  - To skip LLM analysis, pass all fields explicitly to remember()\n"
-                    '    and use depth="shallow" for recall.\n\n'
-                    f"Docs: {self._MEMORY_DOCS_URL}"
-                ) from e
-        return self._llm_instance
-
-    @property
-    def _embedder(self) -> Any:
-        """Lazy embedder initialization -- only created when first needed."""
-        if self._embedder_instance is None:
-            try:
-                if isinstance(self._embedder_config, dict):
-                    from crewai.rag.embeddings.factory import build_embedder
-
-                    self._embedder_instance = build_embedder(self._embedder_config)
-                else:
-                    self._embedder_instance = _default_embedder()
-            except Exception as e:
-                raise RuntimeError(
-                    f"Memory requires an embedder for vector search but initialization failed: {e}\n\n"
-                    "To fix this, do one of the following:\n"
-                    "  - Set OPENAI_API_KEY for the default embedder (text-embedding-3-small)\n"
-                    '  - Pass a different embedder: Memory(embedder={{"provider": "google", "config": {{...}}}})\n'
-                    "  - Pass a callable: Memory(embedder=my_embedding_function)\n\n"
-                    f"Docs: {self._MEMORY_DOCS_URL}"
-                ) from e
-        return self._embedder_instance
-
-    # ------------------------------------------------------------------
-    # Background write queue
-    # ------------------------------------------------------------------
-
-    def _submit_save(self, fn: Any, *args: Any, **kwargs: Any) -> Future[Any]:
-        """Submit a save operation to the background thread pool.
-
-        The future is tracked so that ``drain_writes()`` can wait for it.
-        If the pool has been shut down (e.g. after ``close()``), the save
-        runs synchronously as a fallback so late saves still succeed.
-        """
-        try:
-            future: Future[Any] = self._save_pool.submit(fn, *args, **kwargs)
-        except RuntimeError:
-            # Pool shut down -- run synchronously as fallback
-            future = Future()
-            try:
-                result = fn(*args, **kwargs)
-                future.set_result(result)
-            except Exception as exc:
-                future.set_exception(exc)
-            return future
-        with self._pending_lock:
-            self._pending_saves.append(future)
-        future.add_done_callback(self._on_save_done)
-        return future
-
-    def _on_save_done(self, future: Future[Any]) -> None:
-        """Remove a completed future from the pending list and emit failure event if needed.
-
-        This callback must never raise -- it runs from the thread pool's
-        internal machinery during process shutdown when executors and the
-        event bus may already be closed.
-        """
-        try:
-            with self._pending_lock:
-                try:
-                    self._pending_saves.remove(future)
-                except ValueError:
-                    pass  # already removed
-            exc = future.exception()
-            if exc is not None:
-                crewai_event_bus.emit(
-                    self,
-                    MemorySaveFailedEvent(
-                        value="background save",
-                        error=str(exc),
-                        source_type="unified_memory",
-                    ),
-                )
-        except Exception:  # noqa: S110
-            pass  # swallow everything during shutdown
-
-    def drain_writes(self) -> None:
-        """Block until all pending background saves have completed.
-
-        Called automatically by ``recall()`` and should be called by the
-        crew at shutdown to ensure no saves are lost.
-        """
-        with self._pending_lock:
-            pending = list(self._pending_saves)
-        for future in pending:
-            future.result()  # blocks until done; re-raises exceptions
-
-    def close(self) -> None:
-        """Drain pending saves and shut down the background thread pool."""
-        self.drain_writes()
-        self._save_pool.shutdown(wait=True)
-
-    def _encode_batch(
-        self,
-        contents: list[str],
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        importance: float | None = None,
-        source: str | None = None,
-        private: bool = False,
-    ) -> list[MemoryRecord]:
-        """Run the batch EncodingFlow for one or more items. No event emission.
-
-        This is the core encoding logic shared by ``remember()`` and
-        ``remember_many()``. Events are managed by the calling method.
-        """
-        from crewai.memory.encoding_flow import EncodingFlow
-
-        flow = EncodingFlow(
-            storage=self._storage,
-            llm=self._llm,
-            embedder=self._embedder,
-            config=self._config,
-        )
-        items_input = [
-            {
-                "content": c,
-                "scope": scope,
-                "categories": categories,
-                "metadata": metadata,
-                "importance": importance,
-                "source": source,
-                "private": private,
-            }
-            for c in contents
-        ]
-        flow.kickoff(inputs={"items": items_input})
-        return [
-            item.result_record
-            for item in flow.state.items
-            if not item.dropped and item.result_record is not None
-        ]
-
-    def remember(
-        self,
-        content: str,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        importance: float | None = None,
-        source: str | None = None,
-        private: bool = False,
-        agent_role: str | None = None,
-    ) -> MemoryRecord:
-        """Store a single item in memory (synchronous).
-
-        Routes through the same serialized save pool as ``remember_many``
-        to prevent races, but blocks until the save completes so the caller
-        gets the ``MemoryRecord`` back immediately.
-
-        Args:
-            content: Text to remember.
-            scope: Optional scope path; inferred if None.
-            categories: Optional categories; inferred if None.
-            metadata: Optional metadata; merged with LLM-extracted if inferred.
-            importance: Optional importance 0-1; inferred if None.
-            source: Optional provenance identifier (e.g. user ID, session ID).
-            private: If True, only visible to recall from the same source.
-            agent_role: Optional agent role for event metadata.
-
-        Returns:
-            The created MemoryRecord.
-
-        Raises:
-            Exception: On save failure (events emitted).
-        """
-        _source_type = "unified_memory"
-        try:
-            crewai_event_bus.emit(
-                self,
-                MemorySaveStartedEvent(
-                    value=content,
-                    metadata=metadata,
-                    source_type=_source_type,
-                ),
-            )
-            start = time.perf_counter()
-
-            # Submit through the save pool for proper serialization,
-            # then immediately wait for the result.
-            future = self._submit_save(
-                self._encode_batch,
-                [content], scope, categories, metadata, importance, source, private,
-            )
-            records = future.result()
-            record = records[0] if records else None
-
-            elapsed_ms = (time.perf_counter() - start) * 1000
-            crewai_event_bus.emit(
-                self,
-                MemorySaveCompletedEvent(
-                    value=content,
-                    metadata=metadata or {},
-                    agent_role=agent_role,
-                    save_time_ms=elapsed_ms,
-                    source_type=_source_type,
-                ),
-            )
-            return record
-        except Exception as e:
-            crewai_event_bus.emit(
-                self,
-                MemorySaveFailedEvent(
-                    value=content,
-                    metadata=metadata,
-                    error=str(e),
-                    source_type=_source_type,
-                ),
-            )
-            raise
-
-    def remember_many(
-        self,
-        contents: list[str],
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        importance: float | None = None,
-        source: str | None = None,
-        private: bool = False,
-        agent_role: str | None = None,
-    ) -> list[MemoryRecord]:
-        """Store multiple items in memory (non-blocking).
-
-        The encoding pipeline runs in a background thread. This method
-        returns immediately so the caller (e.g. agent) is not blocked.
-        A ``MemorySaveStartedEvent`` is emitted immediately; the
-        ``MemorySaveCompletedEvent`` is emitted when the background
-        save finishes.
-
-        Any subsequent ``recall()`` call will automatically wait for
-        pending saves to complete before searching (read barrier).
-
-        Args:
-            contents: List of text items to remember.
-            scope: Optional scope applied to all items.
-            categories: Optional categories applied to all items.
-            metadata: Optional metadata applied to all items.
-            importance: Optional importance applied to all items.
-            source: Optional provenance identifier applied to all items.
-            private: Privacy flag applied to all items.
-            agent_role: Optional agent role for event metadata.
-
-        Returns:
-            Empty list (records are not available until the background save completes).
-        """
-        if not contents:
-            return []
-
-        self._submit_save(
-            self._background_encode_batch,
-            contents, scope, categories, metadata,
-            importance, source, private, agent_role,
-        )
-        return []
-
-    def _background_encode_batch(
-        self,
-        contents: list[str],
-        scope: str | None,
-        categories: list[str] | None,
-        metadata: dict[str, Any] | None,
-        importance: float | None,
-        source: str | None,
-        private: bool,
-        agent_role: str | None,
-    ) -> list[MemoryRecord]:
-        """Run the encoding pipeline in a background thread with event emission.
-
-        Both started and completed events are emitted here (in the background
-        thread) so they pair correctly on the event bus scope stack.
-
-        All ``emit`` calls are wrapped in try/except to handle the case where
-        the event bus shuts down before the background save finishes (e.g.
-        during process exit).
-        """
-        try:
-            crewai_event_bus.emit(
-                self,
-                MemorySaveStartedEvent(
-                    value=f"{len(contents)} memories (background)",
-                    metadata=metadata,
-                    source_type="unified_memory",
-                ),
-            )
-        except RuntimeError:
-            pass  # event bus shut down during process exit
-
-        try:
-            start = time.perf_counter()
-            records = self._encode_batch(
-                contents, scope, categories, metadata, importance, source, private
-            )
-            elapsed_ms = (time.perf_counter() - start) * 1000
-        except RuntimeError:
-            # The encoding pipeline uses asyncio.run() -> to_thread() internally.
-            # If the process is shutting down, the default executor is closed and
-            # to_thread raises "cannot schedule new futures after shutdown".
-            # Silently abandon the save -- the process is exiting anyway.
-            return []
-
-        try:
-            crewai_event_bus.emit(
-                self,
-                MemorySaveCompletedEvent(
-                    value=f"{len(records)} memories saved",
-                    metadata=metadata or {},
-                    agent_role=agent_role,
-                    save_time_ms=elapsed_ms,
-                    source_type="unified_memory",
-                ),
-            )
-        except RuntimeError:
-            pass  # event bus shut down during process exit
-        return records
-
-    def extract_memories(self, content: str) -> list[str]:
-        """Extract discrete memories from a raw content blob using the LLM.
-
-        This is a pure helper -- it does NOT store anything.
-        Call remember() on each returned string to persist them.
-
-        Args:
-            content: Raw text (e.g. task + result dump).
-
-        Returns:
-            List of short, self-contained memory statements.
-        """
-        return extract_memories_from_content(content, self._llm)
-
-    def recall(
-        self,
-        query: str,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        limit: int = 10,
-        depth: Literal["shallow", "deep"] = "deep",
-        source: str | None = None,
-        include_private: bool = False,
-    ) -> list[MemoryMatch]:
-        """Retrieve relevant memories.
-
-        ``shallow`` embeds the query directly and runs a single vector search.
-        ``deep`` (default) uses the RecallFlow: the LLM distills the query into
-        targeted sub-queries, selects scopes, searches in parallel, and applies
-        confidence-based routing for optional deeper exploration.
-
-        Args:
-            query: Natural language query.
-            scope: Optional scope prefix to search within.
-            categories: Optional category filter.
-            limit: Max number of results.
-            depth: "shallow" for direct vector search, "deep" for intelligent flow.
-            source: Optional provenance filter. Private records are only visible
-                    when this matches the record's source.
-            include_private: If True, all private records are visible regardless of source.
-
-        Returns:
-            List of MemoryMatch, ordered by relevance.
-        """
-        # Read barrier: wait for any pending background saves to finish
-        # so that the search sees all persisted records.
-        self.drain_writes()
-
-        _source = "unified_memory"
-        try:
-            crewai_event_bus.emit(
-                self,
-                MemoryQueryStartedEvent(
-                    query=query,
-                    limit=limit,
-                    score_threshold=None,
-                    source_type=_source,
-                ),
-            )
-            start = time.perf_counter()
-
-            if depth == "shallow":
-                embedding = embed_text(self._embedder, query)
-                if not embedding:
-                    results: list[MemoryMatch] = []
-                else:
-                    raw = self._storage.search(
-                        embedding,
-                        scope_prefix=scope,
-                        categories=categories,
-                        limit=limit,
-                        min_score=0.0,
-                    )
-                    # Privacy filter
-                    if not include_private:
-                        raw = [
-                            (r, s) for r, s in raw
-                            if not r.private or r.source == source
-                        ]
-                    results = []
-                    for r, s in raw:
-                        composite, reasons = compute_composite_score(
-                            r, s, self._config
-                        )
-                        results.append(
-                            MemoryMatch(
-                                record=r,
-                                score=composite,
-                                match_reasons=reasons,
-                            )
-                        )
-                    results.sort(key=lambda m: m.score, reverse=True)
-            else:
-                flow = RecallFlow(
-                    storage=self._storage,
-                    llm=self._llm,
-                    embedder=self._embedder,
-                    config=self._config,
-                )
-                flow.kickoff(
-                    inputs={
-                        "query": query,
-                        "scope": scope,
-                        "categories": categories or [],
-                        "limit": limit,
-                        "source": source,
-                        "include_private": include_private,
-                    }
-                )
-                results = flow.state.final_results
-
-            # Update last_accessed for recalled records
-            if results:
-                try:
-                    touch = getattr(self._storage, "touch_records", None)
-                    if touch is not None:
-                        touch([m.record.id for m in results])
-                except Exception:  # noqa: S110
-                    pass  # Non-critical: don't fail recall because of touch
-
-            elapsed_ms = (time.perf_counter() - start) * 1000
-            crewai_event_bus.emit(
-                self,
-                MemoryQueryCompletedEvent(
-                    query=query,
-                    results=results,
-                    limit=limit,
-                    score_threshold=None,
-                    query_time_ms=elapsed_ms,
-                    source_type=_source,
-                ),
-            )
-            return results
-        except Exception as e:
-            crewai_event_bus.emit(
-                self,
-                MemoryQueryFailedEvent(
-                    query=query,
-                    limit=limit,
-                    score_threshold=None,
-                    error=str(e),
-                    source_type=_source,
-                ),
-            )
-            raise
-
-    def forget(
-        self,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        older_than: datetime | None = None,
-        metadata_filter: dict[str, Any] | None = None,
-        record_ids: list[str] | None = None,
-    ) -> int:
-        """Delete memories matching criteria.
-
-        Returns:
-            Number of records deleted.
-        """
-        return self._storage.delete(
-            scope_prefix=scope,
-            categories=categories,
-            record_ids=record_ids,
-            older_than=older_than,
-            metadata_filter=metadata_filter,
-        )
-
-    def update(
-        self,
-        record_id: str,
-        content: str | None = None,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        importance: float | None = None,
-    ) -> MemoryRecord:
-        """Update an existing memory record by ID.
-
-        Args:
-            record_id: ID of the record to update.
-            content: New content; re-embedded if provided.
-            scope: New scope path.
-            categories: New categories.
-            metadata: New metadata.
-            importance: New importance score.
-
-        Returns:
-            The updated MemoryRecord.
-
-        Raises:
-            ValueError: If the record is not found.
-        """
-        existing = self._storage.get_record(record_id)
-        if existing is None:
-            raise ValueError(f"Record not found: {record_id}")
-        now = datetime.utcnow()
-        updates: dict[str, Any] = {"last_accessed": now}
-        if content is not None:
-            updates["content"] = content
-            embedding = embed_text(self._embedder, content)
-            updates["embedding"] = embedding if embedding else existing.embedding
-        if scope is not None:
-            updates["scope"] = scope
-        if categories is not None:
-            updates["categories"] = categories
-        if metadata is not None:
-            updates["metadata"] = metadata
-        if importance is not None:
-            updates["importance"] = importance
-        updated = existing.model_copy(update=updates)
-        self._storage.update(updated)
-        return updated
-
-    def scope(self, path: str) -> Any:
-        """Return a scoped view of this memory."""
-        from crewai.memory.memory_scope import MemoryScope
-
-        return MemoryScope(memory=self, root_path=path)
-
-    def slice(
-        self,
-        scopes: list[str],
-        categories: list[str] | None = None,
-        read_only: bool = True,
-    ) -> Any:
-        """Return a multi-scope view (slice) of this memory."""
-        from crewai.memory.memory_scope import MemorySlice
-
-        return MemorySlice(
-            memory=self,
-            scopes=scopes,
-            categories=categories,
-            read_only=read_only,
-        )
-
-    def list_scopes(self, path: str = "/") -> list[str]:
-        """List immediate child scopes under path."""
-        return self._storage.list_scopes(path)
-
-    def list_records(
-        self, scope: str | None = None, limit: int = 200, offset: int = 0
-    ) -> list[MemoryRecord]:
-        """List records in a scope, newest first.
-
-        Args:
-            scope: Optional scope path prefix to filter by.
-            limit: Maximum number of records to return.
-            offset: Number of records to skip (for pagination).
-        """
-        return self._storage.list_records(scope_prefix=scope, limit=limit, offset=offset)
-
-    def info(self, path: str = "/") -> ScopeInfo:
-        """Return scope info for path."""
-        return self._storage.get_scope_info(path)
-
-    def tree(self, path: str = "/", max_depth: int = 3) -> str:
-        """Return a formatted tree of scopes (string)."""
-        lines: list[str] = []
-
-        def _walk(p: str, depth: int, prefix: str) -> None:
-            if depth > max_depth:
-                return
-            info = self._storage.get_scope_info(p)
-            lines.append(f"{prefix}{p or '/'} ({info.record_count} records)")
-            for child in info.child_scopes[:20]:
-                _walk(child, depth + 1, prefix + "  ")
-
-        _walk(path.rstrip("/") or "/", 0, "")
-        return "\n".join(lines) if lines else f"{path or '/'} (0 records)"
-
-    def list_categories(self, path: str | None = None) -> dict[str, int]:
-        """List categories and counts; path=None means global."""
-        return self._storage.list_categories(scope_prefix=path)
-
-    def reset(self, scope: str | None = None) -> None:
-        """Reset (delete all) memories in scope. None = all."""
-        self._storage.reset(scope_prefix=scope)
-
-    async def aextract_memories(self, content: str) -> list[str]:
-        """Async variant of extract_memories."""
-        return self.extract_memories(content)
-
-    async def aremember(
-        self,
-        content: str,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        importance: float | None = None,
-        source: str | None = None,
-        private: bool = False,
-    ) -> MemoryRecord:
-        """Async remember: delegates to sync for now."""
-        return self.remember(
-            content,
-            scope=scope,
-            categories=categories,
-            metadata=metadata,
-            importance=importance,
-            source=source,
-            private=private,
-        )
-
-    async def aremember_many(
-        self,
-        contents: list[str],
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        importance: float | None = None,
-        source: str | None = None,
-        private: bool = False,
-        agent_role: str | None = None,
-    ) -> list[MemoryRecord]:
-        """Async remember_many: delegates to sync for now."""
-        return self.remember_many(
-            contents,
-            scope=scope,
-            categories=categories,
-            metadata=metadata,
-            importance=importance,
-            source=source,
-            private=private,
-            agent_role=agent_role,
-        )
-
-    async def arecall(
-        self,
-        query: str,
-        scope: str | None = None,
-        categories: list[str] | None = None,
-        limit: int = 10,
-        depth: Literal["shallow", "deep"] = "deep",
-        source: str | None = None,
-        include_private: bool = False,
-    ) -> list[MemoryMatch]:
-        """Async recall: delegates to sync for now."""
-        return self.recall(
-            query,
-            scope=scope,
-            categories=categories,
-            limit=limit,
-            depth=depth,
-            source=source,
-            include_private=include_private,
-        )
--- a/lib/crewai/src/crewai/tools/memory_tools.py
+++ b/lib/crewai/src/crewai/tools/memory_tools.py
@@ -1,136 +0,0 @@
-"""Memory tools that give agents active recall and remember capabilities."""
-
-from __future__ import annotations
-
-from typing import Any
-
-from pydantic import BaseModel, Field
-
-from crewai.tools.base_tool import BaseTool
-from crewai.utilities.i18n import get_i18n
-
-
-class RecallMemorySchema(BaseModel):
-    """Schema for the recall memory tool."""
-
-    queries: list[str] = Field(
-        ...,
-        description=(
-            "One or more search queries. Pass a single item for a focused search, "
-            "or multiple items to search for several things at once."
-        ),
-    )
-    scope: str | None = Field(
-        default=None,
-        description="Optional scope to narrow the search (e.g. /project/alpha)",
-    )
-    depth: str = Field(
-        default="shallow",
-        description="'shallow' for fast vector search, 'deep' for LLM-analyzed retrieval",
-    )
-
-
-class RecallMemoryTool(BaseTool):
-    """Tool that lets an agent search memory for one or more queries at once."""
-
-    name: str = "Search memory"
-    description: str = ""
-    args_schema: type[BaseModel] = RecallMemorySchema
-    memory: Any = Field(exclude=True)
-
-    def _run(
-        self,
-        queries: list[str] | str,
-        scope: str | None = None,
-        depth: str = "shallow",
-        **kwargs: Any,
-    ) -> str:
-        """Search memory for relevant information.
-
-        Args:
-            queries: One or more search queries (string or list of strings).
-            scope: Optional scope prefix to narrow the search.
-            depth: "shallow" for fast vector search, "deep" for LLM-analyzed retrieval.
-
-        Returns:
-            Formatted string of matching memories, or a message if none found.
-        """
-        if isinstance(queries, str):
-            queries = [queries]
-        actual_depth = depth if depth in ("shallow", "deep") else "shallow"
-
-        all_lines: list[str] = []
-        seen_ids: set[str] = set()
-        for query in queries:
-            matches = self.memory.recall(query, scope=scope, limit=5, depth=actual_depth)
-            for m in matches:
-                if m.record.id not in seen_ids:
-                    seen_ids.add(m.record.id)
-                    all_lines.append(f"- (score={m.score:.2f}) {m.record.content}")
-
-        if not all_lines:
-            return "No relevant memories found."
-        return "Found memories:\n" + "\n".join(all_lines)
-
-
-class RememberSchema(BaseModel):
-    """Schema for the remember tool."""
-
-    contents: list[str] = Field(
-        ...,
-        description=(
-            "One or more facts, decisions, or observations to remember. "
-            "Pass a single item or multiple items at once."
-        ),
-    )
-
-
-class RememberTool(BaseTool):
-    """Tool that lets an agent save one or more items to memory at once."""
-
-    name: str = "Save to memory"
-    description: str = ""
-    args_schema: type[BaseModel] = RememberSchema
-    memory: Any = Field(exclude=True)
-
-    def _run(self, contents: list[str] | str, **kwargs: Any) -> str:
-        """Store one or more items in memory. The system infers scope, categories, and importance.
-
-        Args:
-            contents: One or more items to remember (string or list of strings).
-
-        Returns:
-            Confirmation with the number of items saved.
-        """
-        if isinstance(contents, str):
-            contents = [contents]
-        if len(contents) == 1:
-            record = self.memory.remember(contents[0])
-            return (
-                f"Saved to memory (scope={record.scope}, "
-                f"importance={record.importance:.1f})."
-            )
-        self.memory.remember_many(contents)
-        return f"Saving {len(contents)} items to memory in background."
-
-
-def create_memory_tools(memory: Any) -> list[BaseTool]:
-    """Create Recall and Remember tools for the given memory instance.
-
-    Args:
-        memory: A Memory, MemoryScope, or MemorySlice instance.
-
-    Returns:
-        List containing a RecallMemoryTool and a RememberTool.
-    """
-    i18n = get_i18n()
-    return [
-        RecallMemoryTool(
-            memory=memory,
-            description=i18n.tools("recall_memory"),
-        ),
-        RememberTool(
-            memory=memory,
-            description=i18n.tools("save_to_memory"),
-        ),
-    ]
--- a/lib/crewai/src/crewai/translations/en.json
+++ b/lib/crewai/src/crewai/translations/en.json
@@ -34,11 +34,7 @@
    "lite_agent_response_format": "Format your final answer according to the following OpenAPI schema: {response_format}\n\nIMPORTANT: Preserve the original content exactly as-is. Do NOT rewrite, paraphrase, or modify the meaning of the content. Only structure it to match the schema format.\n\nDo not include the OpenAPI schema in the final output. Ensure the final output does not include any code block markers like ```json or ```python.",
    "knowledge_search_query": "The original query is: {task_prompt}.",
    "knowledge_search_query_system_prompt": "Your goal is to rewrite the user query so that it is optimized for retrieval from a vector database. Consider how the query will be used to find relevant documents, and aim to make it more specific and context-aware. \n\n Do not include any other text than the rewritten query, especially any preamble or postamble and only add expected output format if its relevant to the rewritten query. \n\n Focus on the key words of the intended task and to retrieve the most relevant information. \n\n There will be some extra context provided that might need to be removed such as expected_output formats structured_outputs and other instructions.",
-    "human_feedback_collapse": "Based on the following human feedback, determine which outcome best matches their intent.\n\nFeedback: {feedback}\n\nPossible outcomes: {outcomes}\n\nRespond with ONLY one of the exact outcome values listed above, nothing else.",
-    "hitl_pre_review_system": "You are reviewing content before a human sees it. Apply the lessons from past human feedback to improve the output. Preserve the original meaning and structure, but incorporate the corrections and preferences indicated by the lessons.",
-    "hitl_pre_review_user": "Output to review:\n{output}\n\nLessons from past human feedback:\n{lessons}\n\nApply the lessons to improve the output.",
-    "hitl_distill_system": "You extract generalizable lessons from human feedback on system outputs. A lesson should be a reusable rule or preference that applies to future similar outputs -- not a one-time correction specific to this exact content.\n\nExamples of good lessons:\n- Always include source citations when making factual claims\n- Use bullet points instead of long paragraphs for action items\n- Avoid technical jargon when the audience is non-technical\n\nIf the feedback is just approval (e.g. looks good, approved) or contains no generalizable guidance, return an empty list.",
-    "hitl_distill_user": "Method: {method_name}\n\nSystem output:\n{output}\n\nHuman feedback:\n{feedback}\n\nExtract generalizable lessons. Return an empty list if none."
+    "human_feedback_collapse": "Based on the following human feedback, determine which outcome best matches their intent.\n\nFeedback: {feedback}\n\nPossible outcomes: {outcomes}\n\nRespond with ONLY one of the exact outcome values listed above, nothing else."
  },
  "errors": {
    "force_final_answer_error": "You can't keep going, here is the best final answer you generated:\n\n {formatted_answer}",
@@ -59,19 +55,7 @@
      "name": "Add image to content",
      "description": "See image to understand its content, you can optionally ask a question about the image",
      "default_action": "Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe."
-    },
-    "recall_memory": "Search through the team's shared memory for relevant information. Pass one or more queries to search for multiple things at once. Use this when you need to find facts, decisions, preferences, or past results that may have been stored previously.",
-    "save_to_memory": "Store one or more important facts, decisions, observations, or lessons in memory so they can be recalled later by you or other agents. Pass multiple items at once when you have several things worth remembering."
-  },
-  "memory": {
-    "query_system": "You analyze a query for searching memory.\nGiven the query and available scopes, output:\n1. keywords: Key entities or keywords that can be used to filter by category.\n2. suggested_scopes: Which available scopes are most relevant (empty for all).\n3. complexity: 'simple' or 'complex'.\n4. recall_queries: 1-3 short, targeted search phrases distilled from the query. Each should be a concise phrase optimized for semantic vector search. If the query is already short and focused, return it as-is in a single-item list. For long task descriptions, extract the distinct things worth searching for.\n5. time_filter: If the query references a time period (like 'last week', 'yesterday', 'in January'), return an ISO 8601 date string for the earliest relevant date (e.g. '2026-02-01'). Return null if no time constraint is implied.",
-    "extract_memories_system": "You extract discrete, reusable memory statements from raw content (e.g. a task description and its result).\n\nFor the given content, output a list of memory statements. Each memory must:\n- Be one clear sentence or short statement\n- Be understandable without the original context\n- Capture a decision, fact, outcome, preference, lesson, or observation worth remembering\n- NOT be a vague summary or a restatement of the task description\n- NOT duplicate the same idea in different words\n\nIf there is nothing worth remembering (e.g. empty result, no decisions or facts), return an empty list.\nOutput a JSON object with a single key \"memories\" whose value is a list of strings.",
-    "extract_memories_user": "Content:\n{content}\n\nExtract memory statements as described. Return structured output.",
-    "query_user": "Query: {query}\n\nAvailable scopes: {available_scopes}\n{scope_desc}\n\nReturn the analysis as structured output.",
-    "save_system": "You analyze content to be stored in a hierarchical memory system.\nGiven the content and the existing scopes and categories, output:\n1. suggested_scope: The best matching existing scope path, or a new path if none fit (use / for root).\n2. categories: A list of categories (reuse existing when relevant, add new ones if needed).\n3. importance: A number from 0.0 to 1.0 indicating how significant this memory is.\n4. extracted_metadata: A JSON object with any entities, dates, or topics you can extract.",
-    "save_user": "Content to store:\n{content}\n\nExisting scopes: {existing_scopes}\nExisting categories: {existing_categories}\n\nReturn the analysis as structured output.",
-    "consolidation_system": "You are comparing new content against existing memories to decide how to consolidate them.\n\nFor each existing memory, choose one action:\n- 'keep': The existing memory is still accurate and not redundant with the new content.\n- 'update': The existing memory should be updated with new information. Provide the updated content.\n- 'delete': The existing memory is outdated, superseded, or contradicted by the new content.\n\nAlso decide whether the new content should be inserted as a separate memory:\n- insert_new=true: The new content adds information not fully captured by existing memories (even after updates).\n- insert_new=false: The new content is fully captured by the existing memories (after any updates).\n\nBe conservative: prefer 'keep' when unsure. Only 'update' or 'delete' when there is a clear contradiction, supersession, or redundancy.",
-    "consolidation_user": "New content to consider storing:\n{new_content}\n\nExisting similar memories:\n{records_summary}\n\nReturn the consolidation plan as structured output."
+    }
  },
  "reasoning": {
    "initial_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are creating a strategic plan for a task that requires your expertise and unique perspective.",
--- a/lib/crewai/src/crewai/utilities/i18n.py
+++ b/lib/crewai/src/crewai/utilities/i18n.py
@@ -86,21 +86,10 @@ class I18N(BaseModel):
        """
        return self.retrieve("tools", tool)

-    def memory(self, key: str) -> str:
-        """Retrieve a memory prompt by key.
-
-        Args:
-            key: The key of the memory prompt to retrieve.
-
-        Returns:
-            The memory prompt as a string.
-        """
-        return self.retrieve("memory", key)
-
    def retrieve(
        self,
        kind: Literal[
-            "slices", "errors", "tools", "reasoning", "hierarchical_manager_agent", "memory"
+            "slices", "errors", "tools", "reasoning", "hierarchical_manager_agent"
        ],
        key: str,
    ) -> str:
--- a/lib/crewai/tests/agents/test_agent_executor.py
+++ b/lib/crewai/tests/agents/test_agent_executor.py
@@ -4,7 +4,6 @@ Tests the Flow-based agent executor implementation including state management,
 flow methods, routing logic, and error handling.
 """

-import time
 from unittest.mock import Mock, patch

 import pytest
@@ -373,7 +372,10 @@ class TestFlowInvoke:
        task.human_input = False

        crew = Mock()
-        crew._memory = None
+        crew._short_term_memory = None
+        crew._long_term_memory = None
+        crew._entity_memory = None
+        crew._external_memory = None

        agent = Mock()
        agent.role = "Test"
@@ -396,10 +398,14 @@ class TestFlowInvoke:
        }

    @patch.object(AgentExecutor, "kickoff")
-    @patch.object(AgentExecutor, "_save_to_memory")
+    @patch.object(AgentExecutor, "_create_short_term_memory")
+    @patch.object(AgentExecutor, "_create_long_term_memory")
+    @patch.object(AgentExecutor, "_create_external_memory")
    def test_invoke_success(
        self,
-        mock_save_to_memory,
+        mock_external_memory,
+        mock_long_term_memory,
+        mock_short_term_memory,
        mock_kickoff,
        mock_dependencies,
    ):
@@ -419,7 +425,9 @@ class TestFlowInvoke:

        assert result == {"output": "Final result"}
        mock_kickoff.assert_called_once()
-        mock_save_to_memory.assert_called_once()
+        mock_short_term_memory.assert_called_once()
+        mock_long_term_memory.assert_called_once()
+        mock_external_memory.assert_called_once()

    @patch.object(AgentExecutor, "kickoff")
    def test_invoke_failure_no_agent_finish(self, mock_kickoff, mock_dependencies):
@@ -435,10 +443,14 @@ class TestFlowInvoke:
            executor.invoke(inputs)

    @patch.object(AgentExecutor, "kickoff")
-    @patch.object(AgentExecutor, "_save_to_memory")
+    @patch.object(AgentExecutor, "_create_short_term_memory")
+    @patch.object(AgentExecutor, "_create_long_term_memory")
+    @patch.object(AgentExecutor, "_create_external_memory")
    def test_invoke_with_system_prompt(
        self,
-        mock_save_to_memory,
+        mock_external_memory,
+        mock_long_term_memory,
+        mock_short_term_memory,
        mock_kickoff,
        mock_dependencies,
    ):
@@ -458,181 +470,10 @@ class TestFlowInvoke:

        inputs = {"input": "test", "tool_names": "", "tools": ""}
        result = executor.invoke(inputs)
-        mock_save_to_memory.assert_called_once()
+        mock_short_term_memory.assert_called_once()
+        mock_long_term_memory.assert_called_once()
+        mock_external_memory.assert_called_once()
        mock_kickoff.assert_called_once()

        assert result == {"output": "Done"}
        assert len(executor.state.messages) >= 2
-
-
-class TestNativeToolExecution:
-    """Test native tool execution behavior."""
-
-    @pytest.fixture
-    def mock_dependencies(self):
-        llm = Mock()
-        llm.supports_stop_words.return_value = True
-
-        task = Mock()
-        task.name = "Test Task"
-        task.description = "Test"
-        task.human_input = False
-        task.response_model = None
-
-        crew = Mock()
-        crew._memory = None
-        crew.verbose = False
-        crew._train = False
-
-        agent = Mock()
-        agent.id = "test-agent-id"
-        agent.role = "Test Agent"
-        agent.verbose = False
-        agent.key = "test-key"
-
-        prompt = {"prompt": "Test {input} {tool_names} {tools}"}
-
-        tools_handler = Mock()
-        tools_handler.cache = None
-
-        return {
-            "llm": llm,
-            "task": task,
-            "crew": crew,
-            "agent": agent,
-            "prompt": prompt,
-            "max_iter": 10,
-            "tools": [],
-            "tools_names": "",
-            "stop_words": [],
-            "tools_description": "",
-            "tools_handler": tools_handler,
-        }
-
-    def test_execute_native_tool_runs_parallel_for_multiple_calls(
-        self, mock_dependencies
-    ):
-        executor = AgentExecutor(**mock_dependencies)
-
-        def slow_one() -> str:
-            time.sleep(0.2)
-            return "one"
-
-        def slow_two() -> str:
-            time.sleep(0.2)
-            return "two"
-
-        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
-        executor.state.pending_tool_calls = [
-            {
-                "id": "call_1",
-                "function": {"name": "slow_one", "arguments": "{}"},
-            },
-            {
-                "id": "call_2",
-                "function": {"name": "slow_two", "arguments": "{}"},
-            },
-        ]
-
-        started = time.perf_counter()
-        result = executor.execute_native_tool()
-        elapsed = time.perf_counter() - started
-
-        assert result == "native_tool_completed"
-        assert elapsed < 0.5
-        tool_messages = [m for m in executor.state.messages if m.get("role") == "tool"]
-        assert len(tool_messages) == 2
-        assert tool_messages[0]["tool_call_id"] == "call_1"
-        assert tool_messages[1]["tool_call_id"] == "call_2"
-
-    def test_execute_native_tool_falls_back_to_sequential_for_result_as_answer(
-        self, mock_dependencies
-    ):
-        executor = AgentExecutor(**mock_dependencies)
-
-        def slow_one() -> str:
-            time.sleep(0.2)
-            return "one"
-
-        def slow_two() -> str:
-            time.sleep(0.2)
-            return "two"
-
-        result_tool = Mock()
-        result_tool.name = "slow_one"
-        result_tool.result_as_answer = True
-        result_tool.max_usage_count = None
-        result_tool.current_usage_count = 0
-
-        executor.original_tools = [result_tool]
-        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
-        executor.state.pending_tool_calls = [
-            {
-                "id": "call_1",
-                "function": {"name": "slow_one", "arguments": "{}"},
-            },
-            {
-                "id": "call_2",
-                "function": {"name": "slow_two", "arguments": "{}"},
-            },
-        ]
-
-        started = time.perf_counter()
-        result = executor.execute_native_tool()
-        elapsed = time.perf_counter() - started
-
-        assert result == "tool_result_is_final"
-        assert elapsed >= 0.2
-        assert elapsed < 0.8
-        assert isinstance(executor.state.current_answer, AgentFinish)
-        assert executor.state.current_answer.output == "one"
-
-    def test_execute_native_tool_result_as_answer_short_circuits_remaining_calls(
-        self, mock_dependencies
-    ):
-        executor = AgentExecutor(**mock_dependencies)
-        call_counts = {"slow_one": 0, "slow_two": 0}
-
-        def slow_one() -> str:
-            call_counts["slow_one"] += 1
-            time.sleep(0.2)
-            return "one"
-
-        def slow_two() -> str:
-            call_counts["slow_two"] += 1
-            time.sleep(0.2)
-            return "two"
-
-        result_tool = Mock()
-        result_tool.name = "slow_one"
-        result_tool.result_as_answer = True
-        result_tool.max_usage_count = None
-        result_tool.current_usage_count = 0
-
-        executor.original_tools = [result_tool]
-        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
-        executor.state.pending_tool_calls = [
-            {
-                "id": "call_1",
-                "function": {"name": "slow_one", "arguments": "{}"},
-            },
-            {
-                "id": "call_2",
-                "function": {"name": "slow_two", "arguments": "{}"},
-            },
-        ]
-
-        started = time.perf_counter()
-        result = executor.execute_native_tool()
-        elapsed = time.perf_counter() - started
-
-        assert result == "tool_result_is_final"
-        assert isinstance(executor.state.current_answer, AgentFinish)
-        assert executor.state.current_answer.output == "one"
-        assert call_counts["slow_one"] == 1
-        assert call_counts["slow_two"] == 0
-        assert elapsed < 0.5
-
-        tool_messages = [m for m in executor.state.messages if m.get("role") == "tool"]
-        assert len(tool_messages) == 1
-        assert tool_messages[0]["tool_call_id"] == "call_1"
--- a/lib/crewai/tests/agents/test_async_agent_executor.py
+++ b/lib/crewai/tests/agents/test_async_agent_executor.py
@@ -95,14 +95,16 @@ class TestAsyncAgentExecutor:
            ),
        ):
            with patch.object(executor, "_show_start_logs"):
-                with patch.object(executor, "_save_to_memory"):
-                    result = await executor.ainvoke(
-                        {
-                            "input": "test input",
-                            "tool_names": "",
-                            "tools": "",
-                        }
-                    )
+                with patch.object(executor, "_create_short_term_memory"):
+                    with patch.object(executor, "_create_long_term_memory"):
+                        with patch.object(executor, "_create_external_memory"):
+                            result = await executor.ainvoke(
+                                {
+                                    "input": "test input",
+                                    "tool_names": "",
+                                    "tools": "",
+                                }
+                            )

        assert result == {"output": expected_output}

@@ -271,14 +273,16 @@ class TestAsyncAgentExecutor:
            ):
                with patch.object(executor, "_show_start_logs"):
                    with patch.object(executor, "_show_logs"):
-                        with patch.object(executor, "_save_to_memory"):
-                            return await executor.ainvoke(
-                                {
-                                    "input": f"test {executor_id}",
-                                    "tool_names": "",
-                                    "tools": "",
-                                }
-                            )
+                        with patch.object(executor, "_create_short_term_memory"):
+                            with patch.object(executor, "_create_long_term_memory"):
+                                with patch.object(executor, "_create_external_memory"):
+                                    return await executor.ainvoke(
+                                        {
+                                            "input": f"test {executor_id}",
+                                            "tool_names": "",
+                                            "tools": "",
+                                        }
+                                    )

        results = await asyncio.gather(
            create_and_run_executor(1),
--- a/lib/crewai/tests/agents/test_lite_agent.py
+++ b/lib/crewai/tests/agents/test_lite_agent.py
@@ -16,7 +16,6 @@ import pytest
 from crewai import LLM, Agent
 from crewai.flow import Flow, start
 from crewai.tools import BaseTool
-from crewai.types.usage_metrics import UsageMetrics


 # A simple test tool
@@ -1065,97 +1064,3 @@ def test_lite_agent_verbose_false_suppresses_printer_output():
    agent2.kickoff("Say hello")

    mock_printer.print.assert_not_called()
-
-
-# --- LiteAgent memory integration ---
-
-
-@pytest.mark.filterwarnings("ignore:LiteAgent is deprecated")
-def test_lite_agent_memory_none_default():
-    """With memory=None (default), _memory is None and no memory is used."""
-    mock_llm = Mock(spec=LLM)
-    mock_llm.call.return_value = "Final Answer: Ok"
-    mock_llm.stop = []
-    mock_llm.get_token_usage_summary.return_value = UsageMetrics(
-        total_tokens=10,
-        prompt_tokens=5,
-        completion_tokens=5,
-        cached_prompt_tokens=0,
-        successful_requests=1,
-    )
-    agent = LiteAgent(
-        role="Test",
-        goal="Test goal",
-        backstory="Test backstory",
-        llm=mock_llm,
-        memory=None,
-        verbose=False,
-    )
-    assert agent._memory is None
-
-
-@pytest.mark.filterwarnings("ignore:LiteAgent is deprecated")
-def test_lite_agent_memory_true_resolves_to_default_memory():
-    """With memory=True, _memory is a Memory instance."""
-    from crewai.memory.unified_memory import Memory
-
-    mock_llm = Mock(spec=LLM)
-    mock_llm.call.return_value = "Final Answer: Ok"
-    mock_llm.stop = []
-    mock_llm.get_token_usage_summary.return_value = UsageMetrics(
-        total_tokens=10,
-        prompt_tokens=5,
-        completion_tokens=5,
-        cached_prompt_tokens=0,
-        successful_requests=1,
-    )
-    agent = LiteAgent(
-        role="Test",
-        goal="Test goal",
-        backstory="Test backstory",
-        llm=mock_llm,
-        memory=True,
-        verbose=False,
-    )
-    assert agent._memory is not None
-    assert isinstance(agent._memory, Memory)
-
-
-@pytest.mark.filterwarnings("ignore:LiteAgent is deprecated")
-def test_lite_agent_memory_instance_recall_and_save_called():
-    """With a custom memory instance, kickoff calls recall and then extract_memories/remember."""
-    mock_llm = Mock(spec=LLM)
-    mock_llm.call.return_value = "Final Answer: The answer is 42."
-    mock_llm.stop = []
-    mock_llm.supports_stop_words.return_value = False
-    mock_llm.get_token_usage_summary.return_value = UsageMetrics(
-        total_tokens=10,
-        prompt_tokens=5,
-        completion_tokens=5,
-        cached_prompt_tokens=0,
-        successful_requests=1,
-    )
-    mock_memory = Mock()
-    mock_memory.recall.return_value = []
-    mock_memory.extract_memories.return_value = ["Fact one.", "Fact two."]
-
-    agent = LiteAgent(
-        role="Test",
-        goal="Test goal",
-        backstory="Test backstory",
-        llm=mock_llm,
-        memory=mock_memory,
-        verbose=False,
-    )
-    assert agent._memory is mock_memory
-
-    agent.kickoff("What is the answer?")
-
-    mock_memory.recall.assert_called_once()
-    call_kw = mock_memory.recall.call_args[1]
-    assert call_kw.get("limit") == 10
-    # depth is not passed explicitly; Memory.recall() defaults to "deep"
-    mock_memory.extract_memories.assert_called_once()
-    mock_memory.remember_many.assert_called_once_with(
-        ["Fact one.", "Fact two."], agent_role="Test"
-    )
--- a/lib/crewai/tests/agents/test_native_tool_calling.py
+++ b/lib/crewai/tests/agents/test_native_tool_calling.py
@@ -6,20 +6,13 @@ when the LLM supports it, across multiple providers.

 from __future__ import annotations

-from collections.abc import Generator
 import os
-import threading
-import time
-from collections import Counter
 from unittest.mock import patch

 import pytest
 from pydantic import BaseModel, Field

 from crewai import Agent, Crew, Task
-from crewai.events import crewai_event_bus
-from crewai.hooks import register_after_tool_call_hook, register_before_tool_call_hook
-from crewai.hooks.tool_hooks import ToolCallHookContext
 from crewai.llm import LLM
 from crewai.tools.base_tool import BaseTool

@@ -71,73 +64,6 @@ class FailingTool(BaseTool):
    def _run(self) -> str:
        raise Exception("This tool always fails")

-
-class LocalSearchInput(BaseModel):
-    query: str = Field(description="Search query")
-
-
-class ParallelProbe:
-    """Thread-safe in-memory recorder for tool execution windows."""
-
-    _lock = threading.Lock()
-    _windows: list[tuple[str, float, float]] = []
-
-    @classmethod
-    def reset(cls) -> None:
-        with cls._lock:
-            cls._windows = []
-
-    @classmethod
-    def record(cls, tool_name: str, start: float, end: float) -> None:
-        with cls._lock:
-            cls._windows.append((tool_name, start, end))
-
-    @classmethod
-    def windows(cls) -> list[tuple[str, float, float]]:
-        with cls._lock:
-            return list(cls._windows)
-
-
-def _parallel_prompt() -> str:
-    return (
-        "This is a tool-calling compliance test. "
-        "In your next assistant turn, emit exactly 3 tool calls in the same response (parallel tool calls), in this order: "
-        "1) parallel_local_search_one(query='latest OpenAI model release notes'), "
-        "2) parallel_local_search_two(query='latest Anthropic model release notes'), "
-        "3) parallel_local_search_three(query='latest Gemini model release notes'). "
-        "Do not call any other tools and do not answer before those 3 tool calls are emitted. "
-        "After the tool results return, provide a one paragraph summary."
-    )
-
-
-def _max_concurrency(windows: list[tuple[str, float, float]]) -> int:
-    points: list[tuple[float, int]] = []
-    for _, start, end in windows:
-        points.append((start, 1))
-        points.append((end, -1))
-    points.sort(key=lambda p: (p[0], p[1]))
-
-    current = 0
-    maximum = 0
-    for _, delta in points:
-        current += delta
-        if current > maximum:
-            maximum = current
-    return maximum
-
-
-def _assert_tools_overlapped() -> None:
-    windows = ParallelProbe.windows()
-    local_windows = [
-        w
-        for w in windows
-        if w[0].startswith("parallel_local_search_")
-    ]
-
-    assert len(local_windows) >= 3, f"Expected at least 3 local tool calls, got {len(local_windows)}"
-    assert _max_concurrency(local_windows) >= 2, "Expected overlapping local tool executions"
-
-
@pytest.fixture
 def calculator_tool() -> CalculatorTool:
    """Create a calculator tool for testing."""
@@ -156,65 +82,6 @@ def failing_tool() -> BaseTool:

    )

-
-@pytest.fixture
-def parallel_tools() -> list[BaseTool]:
-    """Create local tools used to verify native parallel execution deterministically."""
-
-    class ParallelLocalSearchOne(BaseTool):
-        name: str = "parallel_local_search_one"
-        description: str = "Local search tool #1 for concurrency testing."
-        args_schema: type[BaseModel] = LocalSearchInput
-
-        def _run(self, query: str) -> str:
-            start = time.perf_counter()
-            time.sleep(1.0)
-            end = time.perf_counter()
-            ParallelProbe.record(self.name, start, end)
-            return f"[one] {query}"
-
-    class ParallelLocalSearchTwo(BaseTool):
-        name: str = "parallel_local_search_two"
-        description: str = "Local search tool #2 for concurrency testing."
-        args_schema: type[BaseModel] = LocalSearchInput
-
-        def _run(self, query: str) -> str:
-            start = time.perf_counter()
-            time.sleep(1.0)
-            end = time.perf_counter()
-            ParallelProbe.record(self.name, start, end)
-            return f"[two] {query}"
-
-    class ParallelLocalSearchThree(BaseTool):
-        name: str = "parallel_local_search_three"
-        description: str = "Local search tool #3 for concurrency testing."
-        args_schema: type[BaseModel] = LocalSearchInput
-
-        def _run(self, query: str) -> str:
-            start = time.perf_counter()
-            time.sleep(1.0)
-            end = time.perf_counter()
-            ParallelProbe.record(self.name, start, end)
-            return f"[three] {query}"
-
-    return [
-        ParallelLocalSearchOne(),
-        ParallelLocalSearchTwo(),
-        ParallelLocalSearchThree(),
-    ]
-
-
-def _attach_parallel_probe_handler() -> None:
-    @crewai_event_bus.on(ToolUsageFinishedEvent)
-    def _capture_tool_window(_source, event: ToolUsageFinishedEvent):
-        if not event.tool_name.startswith("parallel_local_search_"):
-            return
-        ParallelProbe.record(
-            event.tool_name,
-            event.started_at.timestamp(),
-            event.finished_at.timestamp(),
-        )
-
 # =============================================================================
 # OpenAI Provider Tests
 # =============================================================================
@@ -255,7 +122,7 @@ class TestOpenAINativeToolCalling:
        self, calculator_tool: CalculatorTool
    ) -> None:
        """Test OpenAI agent kickoff with mocked LLM call."""
-        llm = LLM(model="gpt-5-nano")
+        llm = LLM(model="gpt-4o-mini")

        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
            agent = Agent(
@@ -279,174 +146,6 @@ class TestOpenAINativeToolCalling:
            assert mock_call.called
            assert result is not None

-    @pytest.mark.vcr()
-    @pytest.mark.timeout(180)
-    def test_openai_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="gpt-5-nano", temperature=1),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    @pytest.mark.timeout(180)
-    def test_openai_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="gpt-4o-mini"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    @pytest.mark.timeout(180)
-    def test_openai_parallel_native_tool_calling_tool_hook_parity_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        hook_calls: dict[str, list[dict[str, str]]] = {"before": [], "after": []}
-
-        def before_hook(context: ToolCallHookContext) -> bool | None:
-            if context.tool_name.startswith("parallel_local_search_"):
-                hook_calls["before"].append(
-                    {
-                        "tool_name": context.tool_name,
-                        "query": str(context.tool_input.get("query", "")),
-                    }
-                )
-            return None
-
-        def after_hook(context: ToolCallHookContext) -> str | None:
-            if context.tool_name.startswith("parallel_local_search_"):
-                hook_calls["after"].append(
-                    {
-                        "tool_name": context.tool_name,
-                        "query": str(context.tool_input.get("query", "")),
-                    }
-                )
-            return None
-
-        register_before_tool_call_hook(before_hook)
-        register_after_tool_call_hook(after_hook)
-
-        try:
-            agent = Agent(
-                role="Parallel Tool Agent",
-                goal="Use both tools exactly as instructed",
-                backstory="You follow tool instructions precisely.",
-                tools=parallel_tools,
-                llm=LLM(model="gpt-5-nano", temperature=1),
-                verbose=False,
-                max_iter=3,
-            )
-            task = Task(
-                description=_parallel_prompt(),
-                expected_output="A one sentence summary of both tool outputs",
-                agent=agent,
-            )
-            crew = Crew(agents=[agent], tasks=[task])
-            result = crew.kickoff()
-
-            assert result is not None
-            _assert_tools_overlapped()
-
-            before_names = [call["tool_name"] for call in hook_calls["before"]]
-            after_names = [call["tool_name"] for call in hook_calls["after"]]
-            assert len(before_names) >= 3, "Expected before hooks for all parallel calls"
-            assert Counter(before_names) == Counter(after_names)
-            assert all(call["query"] for call in hook_calls["before"])
-            assert all(call["query"] for call in hook_calls["after"])
-        finally:
-            from crewai.hooks import (
-                unregister_after_tool_call_hook,
-                unregister_before_tool_call_hook,
-            )
-
-            unregister_before_tool_call_hook(before_hook)
-            unregister_after_tool_call_hook(after_hook)
-
-    @pytest.mark.vcr()
-    @pytest.mark.timeout(180)
-    def test_openai_parallel_native_tool_calling_tool_hook_parity_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        hook_calls: dict[str, list[dict[str, str]]] = {"before": [], "after": []}
-
-        def before_hook(context: ToolCallHookContext) -> bool | None:
-            if context.tool_name.startswith("parallel_local_search_"):
-                hook_calls["before"].append(
-                    {
-                        "tool_name": context.tool_name,
-                        "query": str(context.tool_input.get("query", "")),
-                    }
-                )
-            return None
-
-        def after_hook(context: ToolCallHookContext) -> str | None:
-            if context.tool_name.startswith("parallel_local_search_"):
-                hook_calls["after"].append(
-                    {
-                        "tool_name": context.tool_name,
-                        "query": str(context.tool_input.get("query", "")),
-                    }
-                )
-            return None
-
-        register_before_tool_call_hook(before_hook)
-        register_after_tool_call_hook(after_hook)
-
-        try:
-            agent = Agent(
-                role="Parallel Tool Agent",
-                goal="Use both tools exactly as instructed",
-                backstory="You follow tool instructions precisely.",
-                tools=parallel_tools,
-                llm=LLM(model="gpt-5-nano", temperature=1),
-                verbose=False,
-                max_iter=3,
-            )
-            result = agent.kickoff(_parallel_prompt())
-
-            assert result is not None
-            _assert_tools_overlapped()
-
-            before_names = [call["tool_name"] for call in hook_calls["before"]]
-            after_names = [call["tool_name"] for call in hook_calls["after"]]
-            assert len(before_names) >= 3, "Expected before hooks for all parallel calls"
-            assert Counter(before_names) == Counter(after_names)
-            assert all(call["query"] for call in hook_calls["before"])
-            assert all(call["query"] for call in hook_calls["after"])
-        finally:
-            from crewai.hooks import (
-                unregister_after_tool_call_hook,
-                unregister_before_tool_call_hook,
-            )
-
-            unregister_before_tool_call_hook(before_hook)
-            unregister_after_tool_call_hook(after_hook)
-

 # =============================================================================
 # Anthropic Provider Tests
@@ -518,46 +217,6 @@ class TestAnthropicNativeToolCalling:
            assert mock_call.called
            assert result is not None

-    @pytest.mark.vcr()
-    def test_anthropic_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="anthropic/claude-sonnet-4-6"),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    def test_anthropic_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="anthropic/claude-sonnet-4-6"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-

 # =============================================================================
 # Google/Gemini Provider Tests
@@ -588,7 +247,7 @@ class TestGeminiNativeToolCalling:
            goal="Help users with mathematical calculations",
            backstory="You are a helpful math assistant.",
            tools=[calculator_tool],
-            llm=LLM(model="gemini/gemini-2.5-flash"),
+            llm=LLM(model="gemini/gemini-2.0-flash-exp"),
        )

        task = Task(
@@ -607,7 +266,7 @@ class TestGeminiNativeToolCalling:
        self, calculator_tool: CalculatorTool
    ) -> None:
        """Test Gemini agent kickoff with mocked LLM call."""
-        llm = LLM(model="gemini/gemini-2.5-flash")
+        llm = LLM(model="gemini/gemini-2.0-flash-001")

        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
            agent = Agent(
@@ -631,46 +290,6 @@ class TestGeminiNativeToolCalling:
            assert mock_call.called
            assert result is not None

-    @pytest.mark.vcr()
-    def test_gemini_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="gemini/gemini-2.5-flash"),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    def test_gemini_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="gemini/gemini-2.5-flash"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-

 # =============================================================================
 # Azure Provider Tests
@@ -705,7 +324,7 @@ class TestAzureNativeToolCalling:
            goal="Help users with mathematical calculations",
            backstory="You are a helpful math assistant.",
            tools=[calculator_tool],
-            llm=LLM(model="azure/gpt-5-nano"),
+            llm=LLM(model="azure/gpt-4o-mini"),
            verbose=False,
            max_iter=3,
        )
@@ -728,7 +347,7 @@ class TestAzureNativeToolCalling:
    ) -> None:
        """Test Azure agent kickoff with mocked LLM call."""
        llm = LLM(
-            model="azure/gpt-5-nano",
+            model="azure/gpt-4o-mini",
            api_key="test-key",
            base_url="https://test.openai.azure.com",
        )
@@ -755,46 +374,6 @@ class TestAzureNativeToolCalling:
            assert mock_call.called
            assert result is not None

-    @pytest.mark.vcr()
-    def test_azure_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="azure/gpt-5-nano"),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    def test_azure_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="azure/gpt-5-nano"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-

 # =============================================================================
 # Bedrock Provider Tests
@@ -805,30 +384,18 @@ class TestBedrockNativeToolCalling:
    """Tests for native tool calling with AWS Bedrock models."""

    @pytest.fixture(autouse=True)
-    def validate_bedrock_credentials_for_live_recording(self):
-        """Run Bedrock tests only when explicitly enabled."""
-        run_live_bedrock = os.getenv("RUN_BEDROCK_LIVE_TESTS", "false").lower() == "true"
-
-        if not run_live_bedrock:
-            pytest.skip(
-                "Skipping Bedrock tests by default. "
-                "Set RUN_BEDROCK_LIVE_TESTS=true with valid AWS credentials to enable."
-            )
-
-        access_key = os.getenv("AWS_ACCESS_KEY_ID", "")
-        secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "")
-        if (
-            not access_key
-            or not secret_key
-            or access_key.startswith(("fake-", "test-"))
-            or secret_key.startswith(("fake-", "test-"))
-        ):
-            pytest.skip(
-                "Skipping Bedrock tests: valid AWS credentials are required when "
-                "RUN_BEDROCK_LIVE_TESTS=true."
-            )
-
-        yield
+    def mock_aws_env(self):
+        """Mock AWS environment variables for tests."""
+        env_vars = {
+        "AWS_ACCESS_KEY_ID": "test-key",
+        "AWS_SECRET_ACCESS_KEY": "test-secret",
+        "AWS_REGION": "us-east-1",
+        }
+        if "AWS_ACCESS_KEY_ID" not in os.environ:
+            with patch.dict(os.environ, env_vars):
+                yield
+        else:
+            yield

    @pytest.mark.vcr()
    def test_bedrock_agent_kickoff_with_tools_mocked(
@@ -860,46 +427,6 @@ class TestBedrockNativeToolCalling:
        assert result.raw is not None
        assert "120" in str(result.raw)

-    @pytest.mark.vcr()
-    def test_bedrock_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="bedrock/anthropic.claude-3-haiku-20240307-v1:0"),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    def test_bedrock_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="bedrock/anthropic.claude-3-haiku-20240307-v1:0"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-

 # =============================================================================
 # Cross-Provider Native Tool Calling Behavior Tests
@@ -912,7 +439,7 @@ class TestNativeToolCallingBehavior:
    def test_supports_function_calling_check(self) -> None:
        """Test that supports_function_calling() is properly checked."""
        # OpenAI should support function calling
-        openai_llm = LLM(model="gpt-5-nano")
+        openai_llm = LLM(model="gpt-4o-mini")
        assert hasattr(openai_llm, "supports_function_calling")
        assert openai_llm.supports_function_calling() is True

@@ -948,7 +475,7 @@ class TestNativeToolCallingTokenUsage:
            goal="Perform calculations efficiently",
            backstory="You calculate things.",
            tools=[calculator_tool],
-            llm=LLM(model="gpt-5-nano"),
+            llm=LLM(model="gpt-4o-mini"),
            verbose=False,
            max_iter=3,
        )
@@ -992,7 +519,7 @@ def test_native_tool_calling_error_handling(failing_tool: FailingTool):
        goal="Perform calculations efficiently",
        backstory="You calculate things.",
        tools=[failing_tool],
-        llm=LLM(model="gpt-5-nano"),
+        llm=LLM(model="gpt-4o-mini"),
        verbose=False,
        max_iter=3,
    )
@@ -1051,7 +578,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Call the counting tool multiple times",
            backstory="You are an agent that counts things.",
            tools=[tool],
-            llm=LLM(model="gpt-5-nano"),
+            llm=LLM(model="gpt-4o-mini"),
            verbose=False,
            max_iter=5,
        )
@@ -1079,7 +606,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Use the counting tool as many times as requested",
            backstory="You are an agent that counts things. You must try to use the tool for each value requested.",
            tools=[tool],
-            llm=LLM(model="gpt-5-nano"),
+            llm=LLM(model="gpt-4o-mini"),
            verbose=False,
            max_iter=5,
        )
@@ -1111,7 +638,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Use the counting tool exactly as requested",
            backstory="You are an agent that counts things precisely.",
            tools=[tool],
-            llm=LLM(model="gpt-5-nano"),
+            llm=LLM(model="gpt-4o-mini"),
            verbose=False,
            max_iter=5,
        )
@@ -1126,6 +653,5 @@ class TestMaxUsageCountWithNativeToolCalling:
        result = crew.kickoff()

        assert result is not None
-        # Verify the requested calls occurred while keeping usage bounded.
-        assert tool.current_usage_count >= 2
-        assert tool.current_usage_count <= tool.max_usage_count
+        # Verify usage count was incremented for each successful call
+        assert tool.current_usage_count == 2
--- a/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,247 +0,0 @@
-interactions:
- request:
-    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
-      This is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '1639'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      x-api-key:
-      - X-API-KEY-XXX
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 0.73.0
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-      x-stainless-timeout:
-      - NOT_GIVEN
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: '{"model":"claude-sonnet-4-6","id":"msg_01XeN1XTXZgmPyLMMGjivabb","type":"message","role":"assistant","content":[{"type":"text","text":"I''ll
-        execute all 3 parallel searches simultaneously right now!"},{"type":"tool_use","id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","name":"parallel_local_search_one","input":{"query":"latest
-        OpenAI model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","name":"parallel_local_search_two","input":{"query":"latest
-        Anthropic model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","name":"parallel_local_search_three","input":{"query":"latest
-        Gemini model release notes"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":914,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":169,"service_tier":"standard","inference_geo":"global"}}'
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Security-Policy:
-      - CSP-FILTERED
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:54:43 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - ANTHROPIC-ORGANIZATION-ID-XXX
-      anthropic-ratelimit-input-tokens-limit:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-input-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-input-tokens-reset:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-output-tokens-limit:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-output-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-output-tokens-reset:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-requests-limit:
-      - '20000'
-      anthropic-ratelimit-requests-remaining:
-      - '19999'
-      anthropic-ratelimit-requests-reset:
-      - '2026-02-18T23:54:41Z'
-      anthropic-ratelimit-tokens-limit:
-      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-tokens-reset:
-      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - REQUEST-ID-XXX
-      strict-transport-security:
-      - STS-XXX
-      x-envoy-upstream-service-time:
-      - '2099'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
-      This is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."},{"role":"assistant","content":[{"type":"tool_use","id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","name":"parallel_local_search_one","input":{"query":"latest
-      OpenAI model release notes"}},{"type":"tool_use","id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","name":"parallel_local_search_two","input":{"query":"latest
-      Anthropic model release notes"}},{"type":"tool_use","id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","name":"parallel_local_search_three","input":{"query":"latest
-      Gemini model release notes"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","content":"[one]
-      latest OpenAI model release notes"},{"type":"tool_result","tool_use_id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","content":"[two]
-      latest Anthropic model release notes"},{"type":"tool_result","tool_use_id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","content":"[three]
-      latest Gemini model release notes"}]}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '2517'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      x-api-key:
-      - X-API-KEY-XXX
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 0.73.0
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-      x-stainless-timeout:
-      - NOT_GIVEN
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: "{\"model\":\"claude-sonnet-4-6\",\"id\":\"msg_01PFXqwwdwwHWadPdtNU5tUZ\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"The
-        three parallel searches were executed successfully, each targeting the latest
-        release notes for the leading AI model families. The search results confirm
-        that queries were dispatched simultaneously to retrieve the most recent developments
-        from **OpenAI** (via tool one), **Anthropic** (via tool two), and **Google's
-        Gemini** (via tool three). While the local search tools returned placeholder
-        outputs in this test environment rather than detailed release notes, the structure
-        of the test validates that all three parallel tool calls were emitted correctly
-        and in the specified order \u2014 demonstrating proper concurrent tool-call
-        behavior with no dependencies between the three independent searches.\"}],\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"usage\":{\"input_tokens\":1197,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":131,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}}"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Security-Policy:
-      - CSP-FILTERED
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:54:49 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - ANTHROPIC-ORGANIZATION-ID-XXX
-      anthropic-ratelimit-input-tokens-limit:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-input-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-input-tokens-reset:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-output-tokens-limit:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-output-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-output-tokens-reset:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-requests-limit:
-      - '20000'
-      anthropic-ratelimit-requests-remaining:
-      - '19999'
-      anthropic-ratelimit-requests-reset:
-      - '2026-02-18T23:54:44Z'
-      anthropic-ratelimit-tokens-limit:
-      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-tokens-reset:
-      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - REQUEST-ID-XXX
-      strict-transport-security:
-      - STS-XXX
-      x-envoy-upstream-service-time:
-      - '4092'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_crew.yaml
@@ -1,254 +0,0 @@
-interactions:
- request:
-    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
-      This is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '1820'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      x-api-key:
-      - X-API-KEY-XXX
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 0.73.0
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-      x-stainless-timeout:
-      - NOT_GIVEN
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: '{"model":"claude-sonnet-4-6","id":"msg_01RJ4CphwpmkmsJFJjeCNvXz","type":"message","role":"assistant","content":[{"type":"text","text":"I''ll
-        execute all 3 parallel tool calls simultaneously right away!"},{"type":"tool_use","id":"toolu_01YWY3cSomRuv4USmq55Prk3","name":"parallel_local_search_one","input":{"query":"latest
-        OpenAI model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","name":"parallel_local_search_two","input":{"query":"latest
-        Anthropic model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","name":"parallel_local_search_three","input":{"query":"latest
-        Gemini model release notes"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":951,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":170,"service_tier":"standard","inference_geo":"global"}}'
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Security-Policy:
-      - CSP-FILTERED
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:54:51 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - ANTHROPIC-ORGANIZATION-ID-XXX
-      anthropic-ratelimit-input-tokens-limit:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-input-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-input-tokens-reset:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-output-tokens-limit:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-output-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-output-tokens-reset:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-requests-limit:
-      - '20000'
-      anthropic-ratelimit-requests-remaining:
-      - '19999'
-      anthropic-ratelimit-requests-reset:
-      - '2026-02-18T23:54:49Z'
-      anthropic-ratelimit-tokens-limit:
-      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-tokens-reset:
-      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - REQUEST-ID-XXX
-      strict-transport-security:
-      - STS-XXX
-      x-envoy-upstream-service-time:
-      - '1967'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
-      This is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."},{"role":"assistant","content":[{"type":"tool_use","id":"toolu_01YWY3cSomRuv4USmq55Prk3","name":"parallel_local_search_one","input":{"query":"latest
-      OpenAI model release notes"}},{"type":"tool_use","id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","name":"parallel_local_search_two","input":{"query":"latest
-      Anthropic model release notes"}},{"type":"tool_use","id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","name":"parallel_local_search_three","input":{"query":"latest
-      Gemini model release notes"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01YWY3cSomRuv4USmq55Prk3","content":"[one]
-      latest OpenAI model release notes"},{"type":"tool_result","tool_use_id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","content":"[two]
-      latest Anthropic model release notes"},{"type":"tool_result","tool_use_id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","content":"[three]
-      latest Gemini model release notes"}]},{"role":"user","content":"Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '2882'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      x-api-key:
-      - X-API-KEY-XXX
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 0.73.0
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-      x-stainless-timeout:
-      - NOT_GIVEN
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: "{\"model\":\"claude-sonnet-4-6\",\"id\":\"msg_0143MHUne1az3Tt69EoLjyZd\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"Here
-        is the complete content returned from all three tool calls:\\n\\n- **parallel_local_search_one**
-        result: `[one] latest OpenAI model release notes`\\n- **parallel_local_search_two**
-        result: `[two] latest Anthropic model release notes`\\n- **parallel_local_search_three**
-        result: `[three] latest Gemini model release notes`\\n\\nAll three parallel
-        tool calls were executed successfully in the same response turn, returning
-        their respective outputs: the first tool searched for the latest OpenAI model
-        release notes, the second tool searched for the latest Anthropic model release
-        notes, and the third tool searched for the latest Gemini model release notes
-        \u2014 confirming that all search queries were dispatched concurrently and
-        their results retrieved as expected.\"}],\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"usage\":{\"input_tokens\":1272,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":172,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}}"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Security-Policy:
-      - CSP-FILTERED
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:54:55 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - ANTHROPIC-ORGANIZATION-ID-XXX
-      anthropic-ratelimit-input-tokens-limit:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-input-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-input-tokens-reset:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-output-tokens-limit:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-output-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-output-tokens-reset:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-requests-limit:
-      - '20000'
-      anthropic-ratelimit-requests-remaining:
-      - '19999'
-      anthropic-ratelimit-requests-reset:
-      - '2026-02-18T23:54:52Z'
-      anthropic-ratelimit-tokens-limit:
-      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-tokens-reset:
-      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - REQUEST-ID-XXX
-      strict-transport-security:
-      - STS-XXX
-      x-envoy-upstream-service-time:
-      - '3144'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_agent_with_native_tool_calling.yaml
@@ -5,19 +5,20 @@ interactions:
      calculations"}, {"role": "user", "content": "\nCurrent Task: Calculate what
      is 15 * 8\n\nThis is the expected criteria for your final answer: The result
      of the calculation\nyou MUST return the actual complete content as the final
-      answer, not a summary."}], "stream": false, "tool_choice": "auto", "tools":
-      [{"function": {"name": "calculator", "description": "Perform mathematical calculations.
-      Use this for any math operations.", "parameters": {"properties": {"expression":
-      {"description": "Mathematical expression to evaluate", "title": "Expression",
-      "type": "string"}}, "required": ["expression"], "type": "object", "additionalProperties":
-      false}}, "type": "function"}]}'
+      answer, not a summary.\n\nThis is VERY important to you, your job depends on
+      it!"}], "stream": false, "stop": ["\nObservation:"], "tool_choice": "auto",
+      "tools": [{"function": {"name": "calculator", "description": "Perform mathematical
+      calculations. Use this for any math operations.", "parameters": {"properties":
+      {"expression": {"description": "Mathematical expression to evaluate", "title":
+      "Expression", "type": "string"}}, "required": ["expression"], "type": "object"}},
+      "type": "function"}]}'
    headers:
      Accept:
      - application/json
      Connection:
      - keep-alive
      Content-Length:
-      - '828'
+      - '883'
      Content-Type:
      - application/json
      User-Agent:
@@ -31,20 +32,20 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-12-01-preview
  response:
    body:
      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"expression\":\"15
-        * 8\"}","name":"calculator"},"id":"call_Cow46pNllpDx0pxUgZFeqlh1","type":"function"}]}}],"created":1771459544,"id":"chatcmpl-DAlq4osCP9ABJ1HyXFBoYWylMg0bi","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":219,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":208,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":427}}
+        * 8\"}","name":"calculator"},"id":"call_cJWzKh5LdBpY3Sk8GATS3eRe","type":"function"}]}}],"created":1769122114,"id":"chatcmpl-D0xlavS0V3m00B9Fsjyv39xQWUGFV","model":"gpt-4o-mini-2024-07-18","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f97eff32c5","usage":{"completion_tokens":18,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":137,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":155}}

        '
    headers:
      Content-Length:
-      - '1049'
+      - '1058'
      Content-Type:
      - application/json
      Date:
-      - Thu, 19 Feb 2026 00:05:45 GMT
+      - Thu, 22 Jan 2026 22:48:34 GMT
      Strict-Transport-Security:
      - STS-XXX
      apim-request-id:
@@ -58,7 +59,7 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
      x-ms-deployment-name:
-      - gpt-5-nano
+      - gpt-4o-mini
      x-ms-rai-invoked:
      - 'true'
      x-ms-region:
@@ -82,25 +83,26 @@ interactions:
      calculations"}, {"role": "user", "content": "\nCurrent Task: Calculate what
      is 15 * 8\n\nThis is the expected criteria for your final answer: The result
      of the calculation\nyou MUST return the actual complete content as the final
-      answer, not a summary."}, {"role": "assistant", "content": "", "tool_calls":
-      [{"id": "call_Cow46pNllpDx0pxUgZFeqlh1", "type": "function", "function": {"name":
-      "calculator", "arguments": "{\"expression\":\"15 * 8\"}"}}]}, {"role": "tool",
-      "tool_call_id": "call_Cow46pNllpDx0pxUgZFeqlh1", "content": "The result of 15
-      * 8 is 120"}, {"role": "user", "content": "Analyze the tool result. If requirements
-      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
-      the answer without meta-commentary."}], "stream": false, "tool_choice": "auto",
-      "tools": [{"function": {"name": "calculator", "description": "Perform mathematical
-      calculations. Use this for any math operations.", "parameters": {"properties":
-      {"expression": {"description": "Mathematical expression to evaluate", "title":
-      "Expression", "type": "string"}}, "required": ["expression"], "type": "object",
-      "additionalProperties": false}}, "type": "function"}]}'
+      answer, not a summary.\n\nThis is VERY important to you, your job depends on
+      it!"}, {"role": "assistant", "content": "", "tool_calls": [{"id": "call_cJWzKh5LdBpY3Sk8GATS3eRe",
+      "type": "function", "function": {"name": "calculator", "arguments": "{\"expression\":\"15
+      * 8\"}"}}]}, {"role": "tool", "tool_call_id": "call_cJWzKh5LdBpY3Sk8GATS3eRe",
+      "content": "The result of 15 * 8 is 120"}, {"role": "user", "content": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "stream":
+      false, "stop": ["\nObservation:"], "tool_choice": "auto", "tools": [{"function":
+      {"name": "calculator", "description": "Perform mathematical calculations. Use
+      this for any math operations.", "parameters": {"properties": {"expression":
+      {"description": "Mathematical expression to evaluate", "title": "Expression",
+      "type": "string"}}, "required": ["expression"], "type": "object"}}, "type":
+      "function"}]}'
    headers:
      Accept:
      - application/json
      Connection:
      - keep-alive
      Content-Length:
-      - '1320'
+      - '1375'
      Content-Type:
      - application/json
      User-Agent:
@@ -114,19 +116,20 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-12-01-preview
  response:
    body:
-      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"120","refusal":null,"role":"assistant"}}],"created":1771459547,"id":"chatcmpl-DAlq7zJimnIMoXieNww8jY5f2pIPd","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":203,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":284,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":487}}
+      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
+        result of the calculation is 120.","refusal":null,"role":"assistant"}}],"created":1769122115,"id":"chatcmpl-D0xlbUNVA7RVkn0GsuBGoNhgQTtac","model":"gpt-4o-mini-2024-07-18","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f97eff32c5","usage":{"completion_tokens":11,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":207,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":218}}

        '
    headers:
      Content-Length:
-      - '1207'
+      - '1250'
      Content-Type:
      - application/json
      Date:
-      - Thu, 19 Feb 2026 00:05:49 GMT
+      - Thu, 22 Jan 2026 22:48:34 GMT
      Strict-Transport-Security:
      - STS-XXX
      apim-request-id:
@@ -140,7 +143,7 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
      x-ms-deployment-name:
-      - gpt-5-nano
+      - gpt-4o-mini
      x-ms-rai-invoked:
      - 'true'
      x-ms-region:
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,198 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
-      a tool-calling compliance test. In your next assistant turn, emit exactly 3
-      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}], "stream": false, "tool_choice": "auto", "tools": [{"function":
-      {"name": "parallel_local_search_one", "description": "Local search tool #1 for
-      concurrency testing.", "parameters": {"properties": {"query": {"description":
-      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
-      "type": "object", "additionalProperties": false}}, "type": "function"}, {"function":
-      {"name": "parallel_local_search_two", "description": "Local search tool #2 for
-      concurrency testing.", "parameters": {"properties": {"query": {"description":
-      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
-      "type": "object", "additionalProperties": false}}, "type": "function"}, {"function":
-      {"name": "parallel_local_search_three", "description": "Local search tool #3
-      for concurrency testing.", "parameters": {"properties": {"query": {"description":
-      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
-      "type": "object", "additionalProperties": false}}, "type": "function"}]}'
-    headers:
-      Accept:
-      - application/json
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1763'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      api-key:
-      - X-API-KEY-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
-  response:
-    body:
-      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"query\":
-        \"latest OpenAI model release notes\"}","name":"parallel_local_search_one"},"id":"call_emQmocGydKuxvESfQopNngdm","type":"function"},{"function":{"arguments":"{\"query\":
-        \"latest Anthropic model release notes\"}","name":"parallel_local_search_two"},"id":"call_eNpK9WUYFCX2ZEUPhYCKvdMs","type":"function"},{"function":{"arguments":"{\"query\":
-        \"latest Gemini model release notes\"}","name":"parallel_local_search_three"},"id":"call_Wdtl6jFxGehSUMn5I1O4Mrdx","type":"function"}]}}],"created":1771459550,"id":"chatcmpl-DAlqAyJGnQKDkNCaTcjU2T8BeJaXM","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":666,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":576,"rejected_prediction_tokens":0},"prompt_tokens":343,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":1009}}
-
-        '
-    headers:
-      Content-Length:
-      - '1433'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:05:55 GMT
-      Strict-Transport-Security:
-      - STS-XXX
-      apim-request-id:
-      - APIM-REQUEST-ID-XXX
-      azureml-model-session:
-      - AZUREML-MODEL-SESSION-XXX
-      x-accel-buffering:
-      - 'no'
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-      x-ms-deployment-name:
-      - gpt-5-nano
-      x-ms-rai-invoked:
-      - 'true'
-      x-ms-region:
-      - X-MS-REGION-XXX
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
-      a tool-calling compliance test. In your next assistant turn, emit exactly 3
-      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}, {"role": "assistant", "content": "", "tool_calls": [{"id":
-      "call_emQmocGydKuxvESfQopNngdm", "type": "function", "function": {"name": "parallel_local_search_one",
-      "arguments": "{\"query\": \"latest OpenAI model release notes\"}"}}, {"id":
-      "call_eNpK9WUYFCX2ZEUPhYCKvdMs", "type": "function", "function": {"name": "parallel_local_search_two",
-      "arguments": "{\"query\": \"latest Anthropic model release notes\"}"}}, {"id":
-      "call_Wdtl6jFxGehSUMn5I1O4Mrdx", "type": "function", "function": {"name": "parallel_local_search_three",
-      "arguments": "{\"query\": \"latest Gemini model release notes\"}"}}]}, {"role":
-      "tool", "tool_call_id": "call_emQmocGydKuxvESfQopNngdm", "content": "[one] latest
-      OpenAI model release notes"}, {"role": "tool", "tool_call_id": "call_eNpK9WUYFCX2ZEUPhYCKvdMs",
-      "content": "[two] latest Anthropic model release notes"}, {"role": "tool", "tool_call_id":
-      "call_Wdtl6jFxGehSUMn5I1O4Mrdx", "content": "[three] latest Gemini model release
-      notes"}], "stream": false, "tool_choice": "auto", "tools": [{"function": {"name":
-      "parallel_local_search_one", "description": "Local search tool #1 for concurrency
-      testing.", "parameters": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, "type": "function"}, {"function": {"name":
-      "parallel_local_search_two", "description": "Local search tool #2 for concurrency
-      testing.", "parameters": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, "type": "function"}, {"function": {"name":
-      "parallel_local_search_three", "description": "Local search tool #3 for concurrency
-      testing.", "parameters": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, "type": "function"}]}'
-    headers:
-      Accept:
-      - application/json
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '2727'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      api-key:
-      - X-API-KEY-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
-  response:
-    body:
-      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
-        latest release notes have been published for the OpenAI, Anthropic, and Gemini
-        models, signaling concurrent updates across the leading AI model families.
-        Each set outlines new capabilities and performance improvements, along with
-        changes to APIs, tooling, and deployment guidelines. Users should review the
-        individual notes to understand new features, adjustments to tokenization,
-        latency or throughput, safety and alignment enhancements, pricing or access
-        changes, and any breaking changes or migration steps required to adopt the
-        updated models in existing workflows.","refusal":null,"role":"assistant"}}],"created":1771459556,"id":"chatcmpl-DAlqGKWXfGNlTIbDY9F6oHQp6hbxM","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":747,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":640,"rejected_prediction_tokens":0},"prompt_tokens":467,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":1214}}
-
-        '
-    headers:
-      Content-Length:
-      - '1778'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:06:02 GMT
-      Strict-Transport-Security:
-      - STS-XXX
-      apim-request-id:
-      - APIM-REQUEST-ID-XXX
-      azureml-model-session:
-      - AZUREML-MODEL-SESSION-XXX
-      x-accel-buffering:
-      - 'no'
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-      x-ms-deployment-name:
-      - gpt-5-nano
-      x-ms-rai-invoked:
-      - 'true'
-      x-ms-region:
-      - X-MS-REGION-XXX
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_crew.yaml
@@ -1,201 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
-      a tool-calling compliance test. In your next assistant turn, emit exactly 3
-      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}], "stream": false, "tool_choice":
-      "auto", "tools": [{"function": {"name": "parallel_local_search_one", "description":
-      "Local search tool #1 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}, {"function": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}, {"function": {"name": "parallel_local_search_three", "description":
-      "Local search tool #3 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}]}'
-    headers:
-      Accept:
-      - application/json
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1944'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      api-key:
-      - X-API-KEY-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
-  response:
-    body:
-      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"query\":
-        \"latest OpenAI model release notes\"}","name":"parallel_local_search_one"},"id":"call_NEvGoF86nhPQfXRoJd5SOyLd","type":"function"},{"function":{"arguments":"{\"query\":
-        \"latest Anthropic model release notes\"}","name":"parallel_local_search_two"},"id":"call_q8Q2du4gAMQLrGTgWgfwfbDZ","type":"function"},{"function":{"arguments":"{\"query\":
-        \"latest Gemini model release notes\"}","name":"parallel_local_search_three"},"id":"call_yTBal9ofZzuo10j0pWqhHCSj","type":"function"}]}}],"created":1771459563,"id":"chatcmpl-DAlqN7kyC5ACI5Yl1Pj63rOH5HIvI","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":2457,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":2368,"rejected_prediction_tokens":0},"prompt_tokens":378,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":2835}}
-
-        '
-    headers:
-      Content-Length:
-      - '1435'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:06:17 GMT
-      Strict-Transport-Security:
-      - STS-XXX
-      apim-request-id:
-      - APIM-REQUEST-ID-XXX
-      azureml-model-session:
-      - AZUREML-MODEL-SESSION-XXX
-      x-accel-buffering:
-      - 'no'
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-      x-ms-deployment-name:
-      - gpt-5-nano
-      x-ms-rai-invoked:
-      - 'true'
-      x-ms-region:
-      - X-MS-REGION-XXX
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
-      a tool-calling compliance test. In your next assistant turn, emit exactly 3
-      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}, {"role": "assistant", "content":
-      "", "tool_calls": [{"id": "call_NEvGoF86nhPQfXRoJd5SOyLd", "type": "function",
-      "function": {"name": "parallel_local_search_one", "arguments": "{\"query\":
-      \"latest OpenAI model release notes\"}"}}, {"id": "call_q8Q2du4gAMQLrGTgWgfwfbDZ",
-      "type": "function", "function": {"name": "parallel_local_search_two", "arguments":
-      "{\"query\": \"latest Anthropic model release notes\"}"}}, {"id": "call_yTBal9ofZzuo10j0pWqhHCSj",
-      "type": "function", "function": {"name": "parallel_local_search_three", "arguments":
-      "{\"query\": \"latest Gemini model release notes\"}"}}]}, {"role": "tool", "tool_call_id":
-      "call_NEvGoF86nhPQfXRoJd5SOyLd", "content": "[one] latest OpenAI model release
-      notes"}, {"role": "tool", "tool_call_id": "call_q8Q2du4gAMQLrGTgWgfwfbDZ", "content":
-      "[two] latest Anthropic model release notes"}, {"role": "tool", "tool_call_id":
-      "call_yTBal9ofZzuo10j0pWqhHCSj", "content": "[three] latest Gemini model release
-      notes"}, {"role": "user", "content": "Analyze the tool result. If requirements
-      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
-      the answer without meta-commentary."}], "stream": false, "tool_choice": "auto",
-      "tools": [{"function": {"name": "parallel_local_search_one", "description":
-      "Local search tool #1 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}, {"function": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}, {"function": {"name": "parallel_local_search_three", "description":
-      "Local search tool #3 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}]}'
-    headers:
-      Accept:
-      - application/json
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '3096'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      api-key:
-      - X-API-KEY-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
-  response:
-    body:
-      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
-        three tool results indicate the latest release notes are available for OpenAI
-        models, Anthropic models, and Gemini models.","refusal":null,"role":"assistant"}}],"created":1771459579,"id":"chatcmpl-DAlqdRtr8EefmFfazuh4jm7KvVxim","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":1826,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":1792,"rejected_prediction_tokens":0},"prompt_tokens":537,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":2363}}
-
-        '
-    headers:
-      Content-Length:
-      - '1333'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:06:31 GMT
-      Strict-Transport-Security:
-      - STS-XXX
-      apim-request-id:
-      - APIM-REQUEST-ID-XXX
-      azureml-model-session:
-      - AZUREML-MODEL-SESSION-XXX
-      x-accel-buffering:
-      - 'no'
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-      x-ms-deployment-name:
-      - gpt-5-nano
-      x-ms-rai-invoked:
-      - 'true'
-      x-ms-region:
-      - X-MS-REGION-XXX
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,63 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
-      is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}]}], "inferenceConfig": {"stopSequences": ["\nObservation:"]},
-      "system": [{"text": "You are Parallel Tool Agent. You follow tool instructions
-      precisely.\nYour personal goal is: Use both tools exactly as instructed"}],
-      "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
-      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
-      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
-      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}}},
-      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
-      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
-    headers:
-      Content-Length:
-      - '1773'
-      Content-Type:
-      - !!binary |
-        YXBwbGljYXRpb24vanNvbg==
-      User-Agent:
-      - X-USER-AGENT-XXX
-      amz-sdk-invocation-id:
-      - AMZ-SDK-INVOCATION-ID-XXX
-      amz-sdk-request:
-      - !!binary |
-        YXR0ZW1wdD0x
-      authorization:
-      - AUTHORIZATION-XXX
-      x-amz-date:
-      - X-AMZ-DATE-XXX
-    method: POST
-    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
-  response:
-    body:
-      string: '{"message":"The security token included in the request is invalid."}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '68'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:00:08 GMT
-      x-amzn-ErrorType:
-      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
-      x-amzn-RequestId:
-      - X-AMZN-REQUESTID-XXX
-    status:
-      code: 403
-      message: Forbidden
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_crew.yaml
@@ -1,226 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
-      is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}]}], "inferenceConfig": {"stopSequences":
-      ["\nObservation:"]}, "system": [{"text": "You are Parallel Tool Agent. You follow
-      tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"}], "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
-      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
-      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
-      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}}},
-      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
-      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
-    headers:
-      Content-Length:
-      - '1954'
-      Content-Type:
-      - !!binary |
-        YXBwbGljYXRpb24vanNvbg==
-      User-Agent:
-      - X-USER-AGENT-XXX
-      amz-sdk-invocation-id:
-      - AMZ-SDK-INVOCATION-ID-XXX
-      amz-sdk-request:
-      - !!binary |
-        YXR0ZW1wdD0x
-      authorization:
-      - AUTHORIZATION-XXX
-      x-amz-date:
-      - X-AMZ-DATE-XXX
-    method: POST
-    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
-  response:
-    body:
-      string: '{"message":"The security token included in the request is invalid."}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '68'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:00:07 GMT
-      x-amzn-ErrorType:
-      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
-      x-amzn-RequestId:
-      - X-AMZN-REQUESTID-XXX
-    status:
-      code: 403
-      message: Forbidden
- request:
-    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
-      is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}]}, {"role": "user", "content":
-      [{"text": "\nCurrent Task: This is a tool-calling compliance test. In your next
-      assistant turn, emit exactly 3 tool calls in the same response (parallel tool
-      calls), in this order: 1) parallel_local_search_one(query=''latest OpenAI model
-      release notes''), 2) parallel_local_search_two(query=''latest Anthropic model
-      release notes''), 3) parallel_local_search_three(query=''latest Gemini model
-      release notes''). Do not call any other tools and do not answer before those
-      3 tool calls are emitted. After the tool results return, provide a one paragraph
-      summary.\n\nThis is the expected criteria for your final answer: A one sentence
-      summary of both tool outputs\nyou MUST return the actual complete content as
-      the final answer, not a summary."}]}], "inferenceConfig": {"stopSequences":
-      ["\nObservation:"]}, "system": [{"text": "You are Parallel Tool Agent. You follow
-      tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed\n\nYou are Parallel Tool Agent. You follow tool instructions precisely.\nYour
-      personal goal is: Use both tools exactly as instructed"}], "toolConfig": {"tools":
-      [{"toolSpec": {"name": "parallel_local_search_one", "description": "Local search
-      tool #1 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}}}, {"toolSpec":
-      {"name": "parallel_local_search_two", "description": "Local search tool #2 for
-      concurrency testing.", "inputSchema": {"json": {"properties": {"query": {"description":
-      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
-      "type": "object", "additionalProperties": false}}}}, {"toolSpec": {"name": "parallel_local_search_three",
-      "description": "Local search tool #3 for concurrency testing.", "inputSchema":
-      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
-      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}}}]}}'
-    headers:
-      Content-Length:
-      - '2855'
-      Content-Type:
-      - !!binary |
-        YXBwbGljYXRpb24vanNvbg==
-      User-Agent:
-      - X-USER-AGENT-XXX
-      amz-sdk-invocation-id:
-      - AMZ-SDK-INVOCATION-ID-XXX
-      amz-sdk-request:
-      - !!binary |
-        YXR0ZW1wdD0x
-      authorization:
-      - AUTHORIZATION-XXX
-      x-amz-date:
-      - X-AMZ-DATE-XXX
-    method: POST
-    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
-  response:
-    body:
-      string: '{"message":"The security token included in the request is invalid."}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '68'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:00:07 GMT
-      x-amzn-ErrorType:
-      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
-      x-amzn-RequestId:
-      - X-AMZN-REQUESTID-XXX
-    status:
-      code: 403
-      message: Forbidden
- request:
-    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
-      is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}]}, {"role": "user", "content":
-      [{"text": "\nCurrent Task: This is a tool-calling compliance test. In your next
-      assistant turn, emit exactly 3 tool calls in the same response (parallel tool
-      calls), in this order: 1) parallel_local_search_one(query=''latest OpenAI model
-      release notes''), 2) parallel_local_search_two(query=''latest Anthropic model
-      release notes''), 3) parallel_local_search_three(query=''latest Gemini model
-      release notes''). Do not call any other tools and do not answer before those
-      3 tool calls are emitted. After the tool results return, provide a one paragraph
-      summary.\n\nThis is the expected criteria for your final answer: A one sentence
-      summary of both tool outputs\nyou MUST return the actual complete content as
-      the final answer, not a summary."}]}, {"role": "user", "content": [{"text":
-      "\nCurrent Task: This is a tool-calling compliance test. In your next assistant
-      turn, emit exactly 3 tool calls in the same response (parallel tool calls),
-      in this order: 1) parallel_local_search_one(query=''latest OpenAI model release
-      notes''), 2) parallel_local_search_two(query=''latest Anthropic model release
-      notes''), 3) parallel_local_search_three(query=''latest Gemini model release
-      notes''). Do not call any other tools and do not answer before those 3 tool
-      calls are emitted. After the tool results return, provide a one paragraph summary.\n\nThis
-      is the expected criteria for your final answer: A one sentence summary of both
-      tool outputs\nyou MUST return the actual complete content as the final answer,
-      not a summary."}]}], "inferenceConfig": {"stopSequences": ["\nObservation:"]},
-      "system": [{"text": "You are Parallel Tool Agent. You follow tool instructions
-      precisely.\nYour personal goal is: Use both tools exactly as instructed\n\nYou
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed\n\nYou are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}], "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
-      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
-      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
-      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}}},
-      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
-      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
-    headers:
-      Content-Length:
-      - '3756'
-      Content-Type:
-      - !!binary |
-        YXBwbGljYXRpb24vanNvbg==
-      User-Agent:
-      - X-USER-AGENT-XXX
-      amz-sdk-invocation-id:
-      - AMZ-SDK-INVOCATION-ID-XXX
-      amz-sdk-request:
-      - !!binary |
-        YXR0ZW1wdD0x
-      authorization:
-      - AUTHORIZATION-XXX
-      x-amz-date:
-      - X-AMZ-DATE-XXX
-    method: POST
-    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
-  response:
-    body:
-      string: '{"message":"The security token included in the request is invalid."}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '68'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:00:07 GMT
-      x-amzn-ErrorType:
-      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
-      x-amzn-RequestId:
-      - X-AMZN-REQUESTID-XXX
-    status:
-      code: 403
-      message: Forbidden
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_agent_with_native_tool_calling.yaml
@@ -3,14 +3,14 @@ interactions:
    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary."}], "role": "user"}], "systemInstruction": {"parts": [{"text":
-      "You are Math Assistant. You are a helpful math assistant.\nYour personal goal
-      is: Help users with mathematical calculations"}], "role": "user"}, "tools":
-      [{"functionDeclarations": [{"description": "Perform mathematical calculations.
-      Use this for any math operations.", "name": "calculator", "parameters_json_schema":
-      {"properties": {"expression": {"description": "Mathematical expression to evaluate",
-      "title": "Expression", "type": "string"}}, "required": ["expression"], "type":
-      "object", "additionalProperties": false}}]}], "generationConfig": {"stopSequences":
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
      ["\nObservation:"]}}'
    headers:
      User-Agent:
@@ -22,7 +22,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '892'
+      - '907'
      content-type:
      - application/json
      host:
@@ -32,31 +32,31 @@ interactions:
      x-goog-api-key:
      - X-GOOG-API-KEY-XXX
    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
  response:
    body:
      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
-        \           },\n            \"thoughtSignature\": \"Cp8DAb4+9vu74rJ0QQNTa6oMMh3QAlvx3cS4TL0I1od7EdQZtMBbsr5viQiTUR/LKj8nwPvtLjZxib5SXqmV0t2B2ZMdq1nqD62vLPD3i7tmUeRoysODfxomRGRhy/CPysMhobt5HWF1W/n6tNiQz3V36f0/dRx5yJeyN4tJL/RZePv77FUqywOfFlYOkOIyAkrE5LT6FicOjhHm/B9bGV/y7TNmN6TtwQDxoE9nU92Q/UNZ7rNyZE7aSR7KPJZuRXrrBBh+akt5dX5n6N9kGWkyRpWVgUox01+b22RSj4S/QY45IvadtmmkFk8DMVAtAnEiK0WazltC+TOdUJHwVgBD494fngoVcHU+R1yIJrVe7h6Ce3Ts5IYLrRCedDU3wW1ghn/hXx1nvTqQumpsGTGtE2v3KjF/7DmQA96WzB1X7+QUOF2J3pK9HemiKxAQl4U9fP2eNN8shvy2YykBlahWDujEwye7ji4wIWtNHbf0t+uFwGTQ3QruAKXvWB04ExjHM2I/8O9U5tOsH0cwPqnpFR2EaTqaPXXUllZ2K+DaaA==\"\n
-        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
-        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        115,\n    \"candidatesTokenCount\": 17,\n    \"totalTokenCount\": 227,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 115\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 95\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"Y1KWadvNMKz1jMcPiJeJmAI\"\n}\n"
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.00062879999833447594\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 103,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 110,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 103\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"PpByabfUHsih_uMPlu2ysAM\"\n}\n"
    headers:
      Alt-Svc:
      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
      Content-Type:
      - application/json; charset=UTF-8
      Date:
-      - Wed, 18 Feb 2026 23:59:32 GMT
+      - Thu, 22 Jan 2026 21:01:50 GMT
      Server:
      - scaffolding on HTTPServer2
      Server-Timing:
-      - gfet4t7; dur=956
+      - gfet4t7; dur=521
      Transfer-Encoding:
      - chunked
      Vary:
@@ -76,19 +76,18 @@ interactions:
    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary."}], "role": "user"}, {"parts": [{"functionCall": {"args": {"expression":
-      "15 * 8"}, "name": "calculator"}}], "role": "model"}, {"parts": [{"functionResponse":
-      {"name": "calculator", "response": {"result": "The result of 15 * 8 is 120"}}}],
-      "role": "user"}, {"parts": [{"text": "Analyze the tool result. If requirements
-      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
-      the answer without meta-commentary."}], "role": "user"}], "systemInstruction":
-      {"parts": [{"text": "You are Math Assistant. You are a helpful math assistant.\nYour
-      personal goal is: Help users with mathematical calculations"}], "role": "user"},
-      "tools": [{"functionDeclarations": [{"description": "Perform mathematical calculations.
-      Use this for any math operations.", "name": "calculator", "parameters_json_schema":
-      {"properties": {"expression": {"description": "Mathematical expression to evaluate",
-      "title": "Expression", "type": "string"}}, "required": ["expression"], "type":
-      "object", "additionalProperties": false}}]}], "generationConfig": {"stopSequences":
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
      ["\nObservation:"]}}'
    headers:
      User-Agent:
@@ -100,7 +99,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1326'
+      - '1219'
      content-type:
      - application/json
      host:
@@ -110,28 +109,378 @@ interactions:
      x-goog-api-key:
      - X-GOOG-API-KEY-XXX
    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
  response:
    body:
      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"The result of 15 * 8 is 120\"\n          }\n
-        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        191,\n    \"candidatesTokenCount\": 14,\n    \"totalTokenCount\": 205,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 191\n
-        \     }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.5-flash\",\n  \"responseId\":
-        \"ZFKWaf2BMM6MjMcP6P--kQM\"\n}\n"
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
+        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.013549212898526872\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 149,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 156,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 149\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"P5Byadc8kJT-4w_p99XQAQ\"\n}\n"
    headers:
      Alt-Svc:
      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
      Content-Type:
      - application/json; charset=UTF-8
      Date:
-      - Wed, 18 Feb 2026 23:59:33 GMT
+      - Thu, 22 Jan 2026 21:01:51 GMT
      Server:
      - scaffolding on HTTPServer2
      Server-Timing:
-      - gfet4t7; dur=421
+      - gfet4t7; dur=444
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1531'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
+        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.0409286447933742\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 195,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 202,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 195\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"P5Byadn5HOK6_uMPnvmXwAk\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 22 Jan 2026 21:01:51 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=503
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1843'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
+        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.018002046006066457\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 241,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 248,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 241\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"P5Byafi2PKbn_uMPtIbfuQI\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 22 Jan 2026 21:01:52 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=482
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '2155'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
+        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.10329001290457589\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 287,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 294,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 287\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"QJByaamVIP_g_uMPt6mI0Qg\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 22 Jan 2026 21:01:52 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=534
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '2467'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"text\": \"120\\n\"\n          }\n        ],\n
+        \       \"role\": \"model\"\n      },\n      \"finishReason\": \"STOP\",\n
+        \     \"avgLogprobs\": -0.0097615998238325119\n    }\n  ],\n  \"usageMetadata\":
+        {\n    \"promptTokenCount\": 333,\n    \"candidatesTokenCount\": 4,\n    \"totalTokenCount\":
+        337,\n    \"promptTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 333\n      }\n    ],\n    \"candidatesTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 4\n      }\n
+        \   ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n  \"responseId\":
+        \"QZByaZHABO-i_uMP58aYqAk\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 22 Jan 2026 21:01:53 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=412
      Transfer-Encoding:
      - chunked
      Vary:
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,188 +0,0 @@
-interactions:
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}], "role": "user"}], "systemInstruction": {"parts": [{"text":
-      "You are Parallel Tool Agent. You follow tool instructions precisely.\nYour
-      personal goal is: Use both tools exactly as instructed"}], "role": "user"},
-      "tools": [{"functionDeclarations": [{"description": "Local search tool #1 for
-      concurrency testing.", "name": "parallel_local_search_one", "parameters_json_schema":
-      {"properties": {"query": {"description": "Search query", "title": "Query", "type":
-      "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}, {"description": "Local search tool #2 for concurrency testing.", "name":
-      "parallel_local_search_two", "parameters_json_schema": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}, {"description":
-      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1783'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"args\": {\n                \"query\": \"latest OpenAI model
-        release notes\"\n              }\n            },\n            \"thoughtSignature\":
-        \"CrICAb4+9vtrrkiSatPyOs7fssb9akcgCIiQdJKp/k+hcEZVNFvU/H0e4FFmLIhTCPRyHxmU+AQPtBZ5vg6y9ZCcv11RdcWgYW8rPQzCnC+YTUxPAfDzaObky1QsL5pl9+yglQqVoVM31ZcnoiH02z85pwAv6TSJxdJZEekW6XwcIrCoHNCgY3ghHFEd3y3wLJ5JWL7wmiRNTC9TCT8aJHXKFohYrb+4JMULCx8BqKVxOucZPiDHA8GsoqSlzkYEe2xCh9oSdaZpCFrxhZ9bwoVDbVmPrjaq2hj5BoJ5hNxscHJ/E0EOl4ogeKZW+hIVfdzpjAFZW9Oejkb9G4ZSLbxXsoO7x8bi4LHFRABniGrWvNuOOH0Udh4t57oXHXZO4u5NNTood/GkJGcP+aHqUAH1fwqL\"\n
-        \         },\n          {\n            \"functionCall\": {\n              \"name\":
-        \"parallel_local_search_two\",\n              \"args\": {\n                \"query\":
-        \"latest Anthropic model release notes\"\n              }\n            }\n
-        \         },\n          {\n            \"functionCall\": {\n              \"name\":
-        \"parallel_local_search_three\",\n              \"args\": {\n                \"query\":
-        \"latest Gemini model release notes\"\n              }\n            }\n          }\n
-        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
-        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        291,\n    \"candidatesTokenCount\": 70,\n    \"totalTokenCount\": 428,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 291\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 67\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"alKWacytCLi5jMcPhISaoAI\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Wed, 18 Feb 2026 23:59:39 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=999
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}], "role": "user"}, {"parts": [{"functionCall": {"args":
-      {"query": "latest OpenAI model release notes"}, "name": "parallel_local_search_one"},
-      "thoughtSignature": "CrICAb4-9vtrrkiSatPyOs7fssb9akcgCIiQdJKp_k-hcEZVNFvU_H0e4FFmLIhTCPRyHxmU-AQPtBZ5vg6y9ZCcv11RdcWgYW8rPQzCnC-YTUxPAfDzaObky1QsL5pl9-yglQqVoVM31ZcnoiH02z85pwAv6TSJxdJZEekW6XwcIrCoHNCgY3ghHFEd3y3wLJ5JWL7wmiRNTC9TCT8aJHXKFohYrb-4JMULCx8BqKVxOucZPiDHA8GsoqSlzkYEe2xCh9oSdaZpCFrxhZ9bwoVDbVmPrjaq2hj5BoJ5hNxscHJ_E0EOl4ogeKZW-hIVfdzpjAFZW9Oejkb9G4ZSLbxXsoO7x8bi4LHFRABniGrWvNuOOH0Udh4t57oXHXZO4u5NNTood_GkJGcP-aHqUAH1fwqL"},
-      {"functionCall": {"args": {"query": "latest Anthropic model release notes"},
-      "name": "parallel_local_search_two"}}, {"functionCall": {"args": {"query": "latest
-      Gemini model release notes"}, "name": "parallel_local_search_three"}}], "role":
-      "model"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_one",
-      "response": {"result": "[one] latest OpenAI model release notes"}}}], "role":
-      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_two",
-      "response": {"result": "[two] latest Anthropic model release notes"}}}], "role":
-      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_three",
-      "response": {"result": "[three] latest Gemini model release notes"}}}], "role":
-      "user"}], "systemInstruction": {"parts": [{"text": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}], "role": "user"}, "tools": [{"functionDeclarations":
-      [{"description": "Local search tool #1 for concurrency testing.", "name": "parallel_local_search_one",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, {"description": "Local search tool #2 for concurrency
-      testing.", "name": "parallel_local_search_two", "parameters_json_schema": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, {"description":
-      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '3071'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"Here is a summary of the latest model
-        release notes: I have retrieved information regarding the latest OpenAI model
-        release notes, the latest Anthropic model release notes, and the latest Gemini
-        model release notes. The specific details of these release notes are available
-        through the respective tool outputs.\",\n            \"thoughtSignature\":
-        \"CsoBAb4+9vtPvWFM08lR1S4QrLN+Z1+Zpf04Y/bC8tjOpnxz3EEvHyRNEwkslUX5pftBi8J78Xk4/FUER0xjJZc8clUObTvayxLNup4h1JwJ5ZdatulInNGTEieFnF4w8KjSFB/vqNCZvXWZbiLkpzqAnsoAIf0x4VmMN11V0Ozo+3f2QftD+iBrfu3g21UI5tbG0Z+0QHxjRVKXrQOp7dmoZPzaxI0zalfDEI+A2jGpVl/VvauVNv0jQn0yItcA5tkVeWLq6717CjNoig==\"\n
-        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        435,\n    \"candidatesTokenCount\": 54,\n    \"totalTokenCount\": 524,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 435\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 35\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"bFKWaZOZCqCvjMcPvvGNgAc\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Wed, 18 Feb 2026 23:59:41 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=967
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_crew.yaml
@@ -1,192 +0,0 @@
-interactions:
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}], "role": "user"}], "systemInstruction":
-      {"parts": [{"text": "You are Parallel Tool Agent. You follow tool instructions
-      precisely.\nYour personal goal is: Use both tools exactly as instructed"}],
-      "role": "user"}, "tools": [{"functionDeclarations": [{"description": "Local
-      search tool #1 for concurrency testing.", "name": "parallel_local_search_one",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, {"description": "Local search tool #2 for concurrency
-      testing.", "name": "parallel_local_search_two", "parameters_json_schema": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, {"description":
-      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1964'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"args\": {\n                \"query\": \"latest OpenAI model
-        release notes\"\n              }\n            },\n            \"thoughtSignature\":
-        \"CuMEAb4+9vu1V1iOC9o/a8+jQqow8F4RTrjlnjnDCwsisMHLLJ+Wj3pZxbFDeIjCJe9pa6+14InyYHh/ezgHrv+xPGIJtX9pJQatDCBAfCmcZ3fDipVIMAHLcl0Q660EVuZ+vRgvNhPSau+uSN9u303wJsaKvdzOQnfww2LfLtJMNtOhSHfkfhfw2bkBOtMa5/FuLqKSr6m94dSdE7HShR6+jLMLbiSXkBLWsRp0jGl85Wvd0hoA7dUyq+uIuyOBr5Myo9uMrLbxfnrRRbPMorOpYTCmHK0HE8mEBRjzh1hNwcBcfRL0VcgA2UnBIurStIeVbq51BJQ1UOq6r1wVi50Wdh1GjIQ/iN9C15T1Ql3adjom5QbmY+XY08RJOiNyVplh1YQ0qlWCVHEpueEfdzcIB+BUauVrLNqBcBr5g6ekO5QZCAdt7PLerQU8jhKjDQy367jCKQyaHir0GmAISS8RlZ8tkLKNZlZhd11D76ui6X8ep9yznViBbqH0AS1R2hMm+ielMVFjhidglTMjqB0X+yk1K2eZXkc+R/xsXRPlnlZWRygnV+IbU8RAnZWtneM464Wccmc1scfF45GKiji5bLYO7Zx+ZF8mSLcQaC8M3z121D6VbFonhaIdkJ3Wb7nI2vEyxFjdinVk3/P0zL8nu3nHeqQviTrQIoHMsZk0yPyqu9NWxg3wGJL5pbcaQh87ROQuTsInkuzzEr0QMzjw9W5iquhMh4/Wy/OKXAgf3maQB9Jb4HoHZlc0io+KYqewFSVx2BvqXbqJbIrTkTo6XRTbK7dkwlCbMmE1wKIwjrrzZQI=\"\n
-        \         },\n          {\n            \"functionCall\": {\n              \"name\":
-        \"parallel_local_search_two\",\n              \"args\": {\n                \"query\":
-        \"latest Anthropic model release notes\"\n              }\n            }\n
-        \         },\n          {\n            \"functionCall\": {\n              \"name\":
-        \"parallel_local_search_three\",\n              \"args\": {\n                \"query\":
-        \"latest Gemini model release notes\"\n              }\n            }\n          }\n
-        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
-        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        327,\n    \"candidatesTokenCount\": 70,\n    \"totalTokenCount\": 536,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 327\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 139\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"ZVKWabziF7bcjMcP3r2SuAg\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Wed, 18 Feb 2026 23:59:34 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=1262
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}], "role": "user"}, {"parts": [{"functionCall":
-      {"args": {"query": "latest OpenAI model release notes"}, "name": "parallel_local_search_one"}},
-      {"functionCall": {"args": {"query": "latest Anthropic model release notes"},
-      "name": "parallel_local_search_two"}}, {"functionCall": {"args": {"query": "latest
-      Gemini model release notes"}, "name": "parallel_local_search_three"}}], "role":
-      "model"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_one",
-      "response": {"result": "[one] latest OpenAI model release notes"}}}], "role":
-      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_two",
-      "response": {"result": "[two] latest Anthropic model release notes"}}}], "role":
-      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_three",
-      "response": {"result": "[three] latest Gemini model release notes"}}}], "role":
-      "user"}, {"parts": [{"text": "Analyze the tool result. If requirements are met,
-      provide the Final Answer. Otherwise, call the next tool. Deliver only the answer
-      without meta-commentary."}], "role": "user"}], "systemInstruction": {"parts":
-      [{"text": "You are Parallel Tool Agent. You follow tool instructions precisely.\nYour
-      personal goal is: Use both tools exactly as instructed"}], "role": "user"},
-      "tools": [{"functionDeclarations": [{"description": "Local search tool #1 for
-      concurrency testing.", "name": "parallel_local_search_one", "parameters_json_schema":
-      {"properties": {"query": {"description": "Search query", "title": "Query", "type":
-      "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}, {"description": "Local search tool #2 for concurrency testing.", "name":
-      "parallel_local_search_two", "parameters_json_schema": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}, {"description":
-      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '3014'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"The search results indicate the latest
-        model release notes for OpenAI, Anthropic, and Gemini are: [one] latest OpenAI
-        model release notes[two] latest Anthropic model release notes[three] latest
-        Gemini model release notes.\",\n            \"thoughtSignature\": \"CsUPAb4+9vs4hkuatQAakl1FSHx5DIde9nHYobJdlWs2HEzES9gHn7uwjMIlFPTzJUbnZqxpAK93hqsCofdfGANr8dwK+/IbZAiMSikpAq2ZjEbWADjfalU3ke4LcQMh6TEYFVGz1QCinjne3jZx5jOVaL8YdAtjOYnBZWA6KqdvfKjD7+Ct/BLoEqvu4LW6kxhXQgcV+D3M1QxGlr1dxpajj4wyYFI9LXchE2vCdAMPYTkPQ4WPbS3xjz0jJb6qFAwwg+BY5kGemkWWVHsvq28t09pd7FEH0bod5cEpR65qEefpJfhHsXYqmOwHDkfNePYnYC+5qmn7kvkN+fhF41SoMRZahMZGDjIo+q6vvru3eXKmZiuLsrh8AqQIks/4S3sSuxt16ogYKE+LlFxml2ygXFPww59nRAtc+xK6VW8jB2vyv9Eo5cpnG9ZBv1dOznJnmj4AWA1ddMlp+yq8AdaboTSo5dysYMwFcSXS3kuU+xi92dC+7GqZZbDr5frvnc+MnSuzYwHhNjSQqvTo5DKGit53zDwlFJT74kLBXk36BOFQp4xlfs+BpKkw11bow6qQoTvC68D023ZHami+McO1WYBDoO5CrDoosU8fAYljqaGArBoMlssF4O7VKHEaEbEZnYCr0Wxo6XP/mtPIpHQE4OyCz/GAJSJtQv1hO7DNCMzpSpkLyuemB1SOZGl3mlLQhosh3TAGP0xgqmHpKccdCSWoXGWjO48VluFuV9E1FwW1Xi++XhMRcUaljJXPZaNVjGcAG1uAxeVkUMsY8tBvQ0vaumUK2jkzbyQTWeStEWwl1yKmklI8JDXske/k6tYJOyF+8t0mF7oCEqNHSNicj7TomihpPlVjNl1Mm4l5fvwlKtAPJwiKrchCunlZB3uGN1AR0h0Hvznffutc/lV/FWFbNgFAaNJZKRs40vMk1xmRZyH2rs+Ob2fZriQ3BSwzzNeiwDLXxm0m/ytOai+K9ObFuC/IEh5fJfvQbNeo3TmiCAMCZPNXMDtlOyLqQzzKwmMFH4c53Ol+kkTiuAKECNQR1dOCufAL0U5lzEUFRxFvOq67lp6xqG8m+WzCIkbnF8QyJHfujtXVMJACaevUkM7+kAVyTwETEKQsanp0tBwzV42ieChp/h7pivcC++cFXdSG5dvR94BgkHmtpC9+jfNH32RREPLuyWfU5aBXiOkxjRs9fDexAFjrkGjM18I+jqHZNeuUR20BKe2jFsU8xJS3Fa4eXabm/YPL1t8R5jr572Ch/r4bspFp8MQ5RcFo8Nn/HiBmW8uZ2BcLEY1RPWUBvxVhfvh/hNxaRKu21x8vGz72RoiNuOjNbeADYAaBJqBGLp0MALxZ/rnXPzDLQUt6Mv07fWHAZr5p3r/skleot25lr2Tcl4qJCPM4/cfs6U0x4CY26ktBiCs4bWKqSEV1Q05nf5kpxVOIRSTgxqFOj/rWIAF3uw7mvsuRKd3YXILV5OrvEoETdQvf7BdYPbQbIQYDf7DBKhf51O8RKQgcfl6mVQswamdJ+PyqLbozTkFCjXMKI0PwJdy8tfKfCeeEe0TbOXSfeTczKQkL8WyWkBg4tS81JnWAVzfVlNjbvo/fk+wv7FyfJJS1HJGlxZ0kUlWi1369rSlldYPoSqopuekOxtYnpYpz92y/jVLNQXE1IVLqWYh9o3gTwjeyaHG7fCaWF2QRGrCUvejT8eJjevhj/sgadjPVcEP5o7Zcw5yTBCgc0+FX1j5KpCmfZ/dVvT4iIX8bOkhxjHQ8ifOx39BMM4EObgCA+g+BFN+Ra7kOf4hJ6tPNhqvJa4E4fyISlVrRiBqSt59ZkuLyWuY9SYy0nvbklP30WDUHSAvcuEwVMSuT524afHISfO/+tSgE7JAKzEPSOoVO3Z5NS9kcAqHuBSe/LL4XJbCKF9Oggm9/gwdAulnBANd4ydQ/raTPE/QUu/CGqqGhBd+wo8x0Jg/BMZWkwhz0fEzsh+OjnrEkHv4QIqZ9v/j1Rv9uc+cDeK7eGi62okGLrPFX2pNQtsZRdUM9aBSlTBUVSdCDpkvieENzLnR257EDZy1EV2HxGRfOFZVVdaW1n8XvL73pcFoQ5XABpfYuigOS8i4S8g43Qfe77GosnuXR5rcJCrL03q3hptb97K5ysKFLgumsaaWo92MBhZYKvQ6SwStgyWRlb22uQGQJYsS8OTD/uVNiQzFjOMsR/l71c9RI1Eb7SQJT6WWvL1YhA7sQw/lQf8soLKfWshoky6mMrGopjRak8xHpJe5VWbqK8PK6iXDd403JrHICyh4M3FpEja3eX2V3SN6U+EgIWKIE8lE/iQZakhLtG2KL7nNQy/cksxzIh5ElQCe5NkrQZO0fai6ek8qwbmz07RVg2FknD7F2hvmxZBqoJSXhsFVn/9+fnkcsZekEtUevFmlQQNspPc63XgO0XmpTye9uM/BbTEsNEWeHSFZTEQLLx1l+pgwsYO3NlNSIUN24/GIR7JrZFG4fAoljkDKjhrYQzr1Fiy3t5G+CmadZ0TcjRQQdDw36ETlf7cizcrQc4FNtnx5rNWEaf54vUvlsd2DD19UIkzP9omITsiuNPPcUNq0A6v1TkgnSNYfhb26nxJIg34r8MmCAhWzB2eCy54gvOHDGLFAwfFZrQdvl\"\n
-        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        504,\n    \"candidatesTokenCount\": 45,\n    \"totalTokenCount\": 973,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 504\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 424\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"Z1KWaYbTKZvnjMcP7piEoAg\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Wed, 18 Feb 2026 23:59:37 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=2283
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_agent_with_native_tool_calling.yaml
@@ -5,9 +5,9 @@ interactions:
      calculations"},{"role":"user","content":"\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary."}],"model":"gpt-5-nano","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
-      mathematical calculations. Use this for any math operations.","strict":true,"parameters":{"properties":{"expression":{"description":"Mathematical
-      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object","additionalProperties":false}}}]}'
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
+      mathematical calculations. Use this for any math operations.","parameters":{"properties":{"expression":{"description":"Mathematical
+      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object"}}}]}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -20,7 +20,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '813'
+      - '829'
      content-type:
      - application/json
      host:
@@ -47,17 +47,21 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-DAlG9W2mJYuOgpf3FwCRgbqaiHWf3\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771457317,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
+      string: "{\n  \"id\": \"chatcmpl-D0vm7joOuDBPcMpfmOnftOoTCPtc8\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1769114459,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"120\",\n        \"refusal\": null,\n
-        \       \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 208,\n    \"completion_tokens\":
-        138,\n    \"total_tokens\": 346,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_G73UZDvL4wC9EEdvm1UcRIRM\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"calculator\",\n
+        \             \"arguments\": \"{\\\"expression\\\":\\\"15 * 8\\\"}\"\n            }\n
+        \         }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 137,\n    \"completion_tokens\":
+        17,\n    \"total_tokens\": 154,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 128,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_c4585b5b9c\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -66,7 +70,126 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Wed, 18 Feb 2026 23:28:39 GMT
+      - Thu, 22 Jan 2026 20:40:59 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '761'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '1080'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Assistant. You are
+      a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"},{"role":"user","content":"\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"},{"role":"assistant","content":null,"tool_calls":[{"id":"call_G73UZDvL4wC9EEdvm1UcRIRM","type":"function","function":{"name":"calculator","arguments":"{\"expression\":\"15
+      * 8\"}"}}]},{"role":"tool","tool_call_id":"call_G73UZDvL4wC9EEdvm1UcRIRM","content":"The
+      result of 15 * 8 is 120"},{"role":"user","content":"Analyze the tool result.
+      If requirements are met, provide the Final Answer. Otherwise, call the next
+      tool. Deliver only the answer without meta-commentary."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
+      mathematical calculations. Use this for any math operations.","parameters":{"properties":{"expression":{"description":"Mathematical
+      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object"}}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1299'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D0vm8mUnzLxu9pf1rc7MODkrMsCmf\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1769114460,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"120\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
+        \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 207,\n    \"completion_tokens\":
+        2,\n    \"total_tokens\": 209,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_c4585b5b9c\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 22 Jan 2026 20:41:00 GMT
      Server:
      - cloudflare
      Strict-Transport-Security:
@@ -84,13 +207,13 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '1869'
+      - '262'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
+      x-envoy-upstream-service-time:
+      - '496'
      x-openai-proxy-wasm:
      - v0.1
      x-ratelimit-limit-requests:
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,265 +0,0 @@
-interactions:
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1733'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DAldZHfQGVcV3FNwAJAtNooU3PAU7\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771458769,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_kz1qLLRsugXwWiQMeH9oFAep\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
-        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_yNouGq1Kv6P5W9fhTng6acZi\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
-        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_O7MqnuniDmyT6a0BS31GTunB\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
-        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
-        \     \"finish_reason\": \"tool_calls\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        259,\n    \"completion_tokens\": 78,\n    \"total_tokens\": 337,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_414ba99a04\"\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:52:50 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '1418'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_kz1qLLRsugXwWiQMeH9oFAep","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
-      \"latest OpenAI model release notes\"}"}},{"id":"call_yNouGq1Kv6P5W9fhTng6acZi","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
-      \"latest Anthropic model release notes\"}"}},{"id":"call_O7MqnuniDmyT6a0BS31GTunB","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
-      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_kz1qLLRsugXwWiQMeH9oFAep","name":"parallel_local_search_one","content":"[one]
-      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_yNouGq1Kv6P5W9fhTng6acZi","name":"parallel_local_search_two","content":"[two]
-      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_O7MqnuniDmyT6a0BS31GTunB","name":"parallel_local_search_three","content":"[three]
-      latest Gemini model release notes"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '2756'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DAldbawkFNpOeXbaJTkTlsSi7OiII\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771458771,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"The latest release notes for OpenAI,
-        Anthropic, and Gemini models highlight significant updates and improvements
-        in each respective technology. OpenAI's notes detail new features and optimizations
-        that enhance user interaction and performance. Anthropic's release emphasizes
-        their focus on safety and alignment in AI development, showcasing advancements
-        in responsible AI practices. Gemini's notes underline their innovative approaches
-        and cutting-edge functionalities designed to push the boundaries of current
-        AI capabilities.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 377,\n    \"completion_tokens\":
-        85,\n    \"total_tokens\": 462,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_414ba99a04\"\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:52:53 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '1755'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_crew.yaml
@@ -1,265 +0,0 @@
-interactions:
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1929'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DAlddfEozIpgleBufPaffZMQWK0Hj\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771458773,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_Putc2jV5GhiIZMwx8mDcI61Q\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
-        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_iyjwcvkL3PdoOddxsqkHCT9T\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
-        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_G728RseEU7SbGk5YTiyyp9IH\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
-        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"tool_calls\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 378,\n    \"completion_tokens\":
-        1497,\n    \"total_tokens\": 1875,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 1408,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:53:08 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '14853'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_Putc2jV5GhiIZMwx8mDcI61Q","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
-      \"latest OpenAI model release notes\"}"}},{"id":"call_iyjwcvkL3PdoOddxsqkHCT9T","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
-      \"latest Anthropic model release notes\"}"}},{"id":"call_G728RseEU7SbGk5YTiyyp9IH","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
-      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_Putc2jV5GhiIZMwx8mDcI61Q","name":"parallel_local_search_one","content":"[one]
-      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_iyjwcvkL3PdoOddxsqkHCT9T","name":"parallel_local_search_two","content":"[two]
-      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_G728RseEU7SbGk5YTiyyp9IH","name":"parallel_local_search_three","content":"[three]
-      latest Gemini model release notes"},{"role":"user","content":"Analyze the tool
-      result. If requirements are met, provide the Final Answer. Otherwise, call the
-      next tool. Deliver only the answer without meta-commentary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '3136'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DAldt2BXNqiYYLPgInjHCpYKfk2VK\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771458789,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"The results show the latest model release
-        notes for OpenAI, Anthropic, and Gemini.\",\n        \"refusal\": null,\n
-        \       \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 537,\n    \"completion_tokens\":
-        2011,\n    \"total_tokens\": 2548,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 1984,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:53:25 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '15368'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_tool_hook_parity_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_tool_hook_parity_agent_kickoff.yaml
@@ -1,264 +0,0 @@
-interactions:
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1748'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DB244zBgA66fzl8TNcIPRWoE4lDIQ\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771521916,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_D2ojRWqkng6krQ51vWQEU8wR\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
-        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_v1tpTKw1sYcI75SWG1LCkAC3\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
-        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_RrbyZClymnngoNLhlkQLLpwM\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
-        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"tool_calls\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 343,\n    \"completion_tokens\":
-        855,\n    \"total_tokens\": 1198,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 768,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 17:25:23 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '6669'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_D2ojRWqkng6krQ51vWQEU8wR","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
-      \"latest OpenAI model release notes\"}"}},{"id":"call_v1tpTKw1sYcI75SWG1LCkAC3","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
-      \"latest Anthropic model release notes\"}"}},{"id":"call_RrbyZClymnngoNLhlkQLLpwM","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
-      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_D2ojRWqkng6krQ51vWQEU8wR","name":"parallel_local_search_one","content":"[one]
-      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_v1tpTKw1sYcI75SWG1LCkAC3","name":"parallel_local_search_two","content":"[two]
-      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_RrbyZClymnngoNLhlkQLLpwM","name":"parallel_local_search_three","content":"[three]
-      latest Gemini model release notes"}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '2771'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DB24DjyYsIHiQJ7hHXob8tQFfeXBs\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771521925,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"The three latest release-note references
-        retrieved encompass OpenAI, Anthropic, and Gemini, indicating that all three
-        major model families are actively updating their offerings. These notes typically
-        cover improvements to capabilities, safety measures, performance enhancements,
-        and any new APIs or features, suggesting a trend of ongoing refinement across
-        providers. If you\u2019d like, I can pull the full release notes or extract
-        and compare the key changes across the three sources.\",\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 467,\n    \"completion_tokens\":
-        1437,\n    \"total_tokens\": 1904,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 1344,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 17:25:35 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '10369'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_tool_hook_parity_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_tool_hook_parity_crew.yaml
@@ -1,339 +0,0 @@
-interactions:
- request:
-    body: '{"trace_id": "e456cc10-ce7b-4e68-a2cc-ddb806a2e7b9", "execution_type":
-      "crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
-      "crew_name": "crew", "flow_name": null, "crewai_version": "1.9.3", "privacy_level":
-      "standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
-      0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2026-02-19T17:24:41.723158+00:00"},
-      "ephemeral_trace_id": "e456cc10-ce7b-4e68-a2cc-ddb806a2e7b9"}'
-    headers:
-      Accept:
-      - '*/*'
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '488'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      X-Crewai-Organization-Id:
-      - 3433f0ee-8a94-4aa4-822b-2ac71aa38b18
-      X-Crewai-Version:
-      - 1.9.3
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-    method: POST
-    uri: https://app.crewai.com/crewai_plus/api/v1/tracing/ephemeral/batches
-  response:
-    body:
-      string: '{"id":"a78f2aca-0525-47c7-8f37-b3fca0ad6672","ephemeral_trace_id":"e456cc10-ce7b-4e68-a2cc-ddb806a2e7b9","execution_type":"crew","crew_name":"crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"1.9.3","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"crew","flow_name":null,"crewai_version":"1.9.3","privacy_level":"standard"},"created_at":"2026-02-19T17:24:41.989Z","updated_at":"2026-02-19T17:24:41.989Z","access_code":"TRACE-bd80d6be74","user_identifier":null}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '515'
-      Content-Type:
-      - application/json; charset=utf-8
-      Date:
-      - Thu, 19 Feb 2026 17:24:41 GMT
-      cache-control:
-      - no-store
-      content-security-policy:
-      - CSP-FILTERED
-      etag:
-      - ETAG-XXX
-      expires:
-      - '0'
-      permissions-policy:
-      - PERMISSIONS-POLICY-XXX
-      pragma:
-      - no-cache
-      referrer-policy:
-      - REFERRER-POLICY-XXX
-      strict-transport-security:
-      - STS-XXX
-      vary:
-      - Accept
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-frame-options:
-      - X-FRAME-OPTIONS-XXX
-      x-permitted-cross-domain-policies:
-      - X-PERMITTED-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-      x-runtime:
-      - X-RUNTIME-XXX
-      x-xss-protection:
-      - X-XSS-PROTECTION-XXX
-    status:
-      code: 201
-      message: Created
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1929'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DB23W8RBF6zlxweiHYGb6maVfyctt\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771521882,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_sge1FXUkpmPEDe8nTOgn0tQG\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
-        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_z5jRPH4DQ7Wp3HdDUlZe8gGh\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
-        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_DNlgqnadODDsyQkSuLcXZCX2\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
-        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"tool_calls\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 378,\n    \"completion_tokens\":
-        2456,\n    \"total_tokens\": 2834,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 2368,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 17:25:02 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '19582'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_sge1FXUkpmPEDe8nTOgn0tQG","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
-      \"latest OpenAI model release notes\"}"}},{"id":"call_z5jRPH4DQ7Wp3HdDUlZe8gGh","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
-      \"latest Anthropic model release notes\"}"}},{"id":"call_DNlgqnadODDsyQkSuLcXZCX2","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
-      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_sge1FXUkpmPEDe8nTOgn0tQG","name":"parallel_local_search_one","content":"[one]
-      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_z5jRPH4DQ7Wp3HdDUlZe8gGh","name":"parallel_local_search_two","content":"[two]
-      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_DNlgqnadODDsyQkSuLcXZCX2","name":"parallel_local_search_three","content":"[three]
-      latest Gemini model release notes"},{"role":"user","content":"Analyze the tool
-      result. If requirements are met, provide the Final Answer. Otherwise, call the
-      next tool. Deliver only the answer without meta-commentary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '3136'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DB23sY0Ahpd1yAgLZ882KkA50Zljx\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771521904,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"Results returned three items: the latest
-        OpenAI model release notes, the latest Anthropic model release notes, and
-        the latest Gemini model release notes.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\":
-        {\n    \"prompt_tokens\": 537,\n    \"completion_tokens\": 1383,\n    \"total_tokens\":
-        1920,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\": 0,\n      \"audio_tokens\":
-        0\n    },\n    \"completion_tokens_details\": {\n      \"reasoning_tokens\":
-        1344,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\": 0,\n
-        \     \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 17:25:16 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '12339'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/memory/test_crew_external_memory_save.yaml
+++ b/lib/crewai/tests/cassettes/memory/test_crew_external_memory_save.yaml
--- a/lib/crewai/tests/cassettes/memory/test_crew_external_memory_save_using_crew_without_memory_flag[save].yaml
+++ b/lib/crewai/tests/cassettes/memory/test_crew_external_memory_save_using_crew_without_memory_flag[save].yaml
--- a/lib/crewai/tests/cassettes/memory/test_crew_external_memory_save_using_crew_without_memory_flag[search].yaml
+++ b/lib/crewai/tests/cassettes/memory/test_crew_external_memory_save_using_crew_without_memory_flag[search].yaml
@@ -0,0 +1,191 @@
+interactions:
+- request:
+    body: !!binary |
+      Ct8MCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkStgwKEgoQY3Jld2FpLnRl
+      bGVtZXRyeRKcCAoQjin/Su47zAwLq3Hv6yv8GhIImRMfAPs+FOMqDENyZXcgQ3JlYXRlZDABOYCY
+      xbgUrDUYQVie07gUrDUYShsKDmNyZXdhaV92ZXJzaW9uEgkKBzAuMTE0LjBKGgoOcHl0aG9uX3Zl
+      cnNpb24SCAoGMy4xMi45Si4KCGNyZXdfa2V5EiIKIDA3YTcxNzY4Y2M0YzkzZWFiM2IzMWUzYzhk
+      MjgzMmM2SjEKB2NyZXdfaWQSJgokY2UyMGFlNWYtZmMyNy00YWJhLWExYWMtNzUwY2ZhZmMwMTE4
+      ShwKDGNyZXdfcHJvY2VzcxIMCgpzZXF1ZW50aWFsShEKC2NyZXdfbWVtb3J5EgIQAEoaChRjcmV3
+      X251bWJlcl9vZl90YXNrcxICGAFKGwoVY3Jld19udW1iZXJfb2ZfYWdlbnRzEgIYAUo6ChBjcmV3
+      X2ZpbmdlcnByaW50EiYKJDQ4NGFmZDhjLTczMmEtNGM1Ni1hZjk2LTU2MzkwMjNmYjhjOUo7Chtj
+      cmV3X2ZpbmdlcnByaW50X2NyZWF0ZWRfYXQSHAoaMjAyNS0wNC0xMlQxNzoyNzoxNS42NzMyMjNK
+      0AIKC2NyZXdfYWdlbnRzEsACCr0CW3sia2V5IjogIjAyZGYxM2UzNjcxMmFiZjUxZDIzOGZlZWJh
+      YjFjYTI2IiwgImlkIjogImYyYjZkYTU1LTNiMGItNDZiNy05Mzk5LWE5NDJmYjQ4YzU2OSIsICJy
+      b2xlIjogIlJlc2VhcmNoZXIiLCAidmVyYm9zZT8iOiB0cnVlLCAibWF4X2l0ZXIiOiAyNSwgIm1h
+      eF9ycG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJncHQtNG8t
+      bWluaSIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4ZWN1dGlv
+      bj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFtdfV1K/wEK
+      CmNyZXdfdGFza3MS8AEK7QFbeyJrZXkiOiAiN2I0MmRmM2MzYzc0YzIxYzg5NDgwZTBjMDcwNTM4
+      NWYiLCAiaWQiOiAiYmE1MjFjNDgtYzcwNS00MDRlLWE5MDktMjkwZGM0NTlkOThkIiwgImFzeW5j
+      X2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdlbnRfcm9sZSI6
+      ICJSZXNlYXJjaGVyIiwgImFnZW50X2tleSI6ICIwMmRmMTNlMzY3MTJhYmY1MWQyMzhmZWViYWIx
+      Y2EyNiIsICJ0b29sc19uYW1lcyI6IFtdfV16AhgBhQEAAQAAEoAEChAmCOpHN6fX3l0shQvTLjrB
+      EgjLTyt4A1p7wyoMVGFzayBDcmVhdGVkMAE5gN7juBSsNRhBmFfkuBSsNRhKLgoIY3Jld19rZXkS
+      IgogMDdhNzE3NjhjYzRjOTNlYWIzYjMxZTNjOGQyODMyYzZKMQoHY3Jld19pZBImCiRjZTIwYWU1
+      Zi1mYzI3LTRhYmEtYTFhYy03NTBjZmFmYzAxMThKLgoIdGFza19rZXkSIgogN2I0MmRmM2MzYzc0
+      YzIxYzg5NDgwZTBjMDcwNTM4NWZKMQoHdGFza19pZBImCiRiYTUyMWM0OC1jNzA1LTQwNGUtYTkw
+      OS0yOTBkYzQ1OWQ5OGRKOgoQY3Jld19maW5nZXJwcmludBImCiQ0ODRhZmQ4Yy03MzJhLTRjNTYt
+      YWY5Ni01NjM5MDIzZmI4YzlKOgoQdGFza19maW5nZXJwcmludBImCiRhMDcyNjgwNC05ZjIwLTQw
+      ODgtYWFmOC1iNzhkYTUyNmM3NjlKOwobdGFza19maW5nZXJwcmludF9jcmVhdGVkX2F0EhwKGjIw
+      MjUtMDQtMTJUMTc6Mjc6MTUuNjczMTgxSjsKEWFnZW50X2ZpbmdlcnByaW50EiYKJDNiZDE2MmNm
+      LWNmMWQtNGUwZi04ZmIzLTk3MDljMDkyNmM4ZHoCGAGFAQABAAA=
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '1634'
+      Content-Type:
+      - application/x-protobuf
+      User-Agent:
+      - OTel-OTLP-Exporter-Python/1.31.1
+    method: POST
+    uri: https://telemetry.crewai.com:4319/v1/traces
+  response:
+    body:
+      string: "\n\0"
+    headers:
+      Content-Length:
+      - '2'
+      Content-Type:
+      - application/x-protobuf
+      Date:
+      - Sat, 12 Apr 2025 20:27:16 GMT
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Researcher. You are
+      a researcher at a leading tech think tank.\nYour personal goal is: Search relevant
+      data and provide results\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Perform a search
+      on specific topics.\n\nThis is the expected criteria for your final answer:
+      A list of relevant URLs based on the search query.\nyou MUST return the actual
+      complete content as the final answer, not a summary.\n\n# Useful context: \nExternal
+      memories:\n\n\nBegin! This is VERY important to you, use the tools available
+      and give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
+      "gpt-4o-mini", "stop": ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '989'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=nSje5Zn_Lk69BDG85XIauC2hrZjGl0pR2sel9__KWGw-1744489610-1.0.1.1-CPlAgcgTAE30uWrbi_2wiCWrbRDRWiaa.YuQMgST42DLDVg_wdNlJMDQT3Lsqk.g.BO68A66TTirWA0blQaQw.9xdBbPwKO609_ftjdwi5U;
+        _cfuvid=XLC52GLAWCOeWn2vI379CnSGKjPa7f.qr2vSAQ_R66M-1744489610542-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-BLbjXyMvmR8ctf0sqhp7F1ePskveM\",\n  \"object\"\
+        : \"chat.completion\",\n  \"created\": 1744489635,\n  \"model\": \"gpt-4o-mini-2024-07-18\"\
+        ,\n  \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n  \
+        \      \"role\": \"assistant\",\n        \"content\": \"I now can give a great\
+        \ answer  \\nFinal Answer: Here is a list of relevant URLs based on the search\
+        \ query:\\n\\n1. **Artificial Intelligence in Healthcare**\\n   - https://www.healthit.gov/topic/scientific-initiatives/ai-healthcare\\\
+        n   - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7317789/\\n   - https://www.forbes.com/sites/bernardmarr/2021/10/18/the-top-5-ways-ai-is-being-used-in-healthcare/?sh=3edf5df51c9c\\\
+        n\\n2. **Blockchain Technology in Supply Chain Management**\\n   - https://www.ibm.com/blockchain/supply-chain\\\
+        n   - https://www.gartner.com/en/newsroom/press-releases/2021-06-23-gartner-says-three-use-cases-for-blockchain-in-supply-chain-are-scaling\\\
+        n   - https://www2.deloitte.com/us/en/insights/industry/retail-distribution/blockchain-in-supply-chain.html\\\
+        n\\n3. **Renewable Energy Innovations**\\n   - https://www.irena.org/publications/2020/Sep/Renewable-Power-Generation-Costs-in-2020\\\
+        n   - https://www.nrel.gov/docs/fy20osti/77021.pdf\\n   - https://www.cnbc.com/2021/11/03/renewable-energy-could-get-its-first-taste-of-markets-in-2021.html\\\
+        n\\n4. **7G Technology Developments**\\n   - https://www.sciencedirect.com/science/article/pii/S1389128619308189\\\
+        n   - https://www.forbes.com/sites/bernardmarr/2021/11/01/what-is-7g-technology-a-beginners-guide-to-the-future-of-mobile-communications/?sh=51b8a7e1464a\\\
+        n   - https://www.ericsson.com/en/reports-and-research/reports/7g-networks-a-powerful-future-for-connected-society\\\
+        n\\n5. **Impact of Quantum Computing on Cybersecurity**\\n   - https://www.ibm.com/blogs/research/2021/09/quantum-computing-cybersecurity/\\\
+        n   - https://www.sciencedirect.com/science/article/pii/S0167739X21000072\\\
+        n   - https://www.techrepublic.com/article/how-quantum-computing-will-change-cybersecurity/\\\
+        n\\nThese URLs should provide comprehensive information on the topics searched,\
+        \ providing valuable insights and data for your research needs.\",\n     \
+        \   \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\"\
+        : null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n\
+        \    \"prompt_tokens\": 185,\n    \"completion_tokens\": 534,\n    \"total_tokens\"\
+        : 719,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\": 0,\n  \
+        \    \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n \
+        \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\"\
+        : 0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\"\
+        : \"default\",\n  \"system_fingerprint\": \"fp_80cf447eee\"\n}\n"
+    headers:
+      CF-RAY:
+      - 92f576a01d3b7e05-GRU
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 12 Apr 2025 20:27:24 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '8805'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999788'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_7c2d313d0b5997e903553a782b2afa25
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/memory/test_crew_external_memory_save_with_memory_flag[save].yaml
+++ b/lib/crewai/tests/cassettes/memory/test_crew_external_memory_save_with_memory_flag[save].yaml
--- a/Show More
+++ b/Show More