ensure configurable timeouts, max_replans and max step iterations

addressing crictical comments
regen cassette
2026-03-13 15:28:14 +00:00 · 2026-03-12 14:26:36 -07:00 · 2026-03-12 10:55:57 -07:00 · 2026-03-11 16:43:14 -07:00 · 2026-03-11 16:41:40 -07:00 · 2026-03-11 16:00:19 -07:00
204 changed files with 52416 additions and 4908 deletions
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -0,0 +1,127 @@
+name: Nightly Canary Release
+
+on:
+  schedule:
+    - cron: '0 6 * * *' # daily at 6am UTC
+  workflow_dispatch:
+
+jobs:
+  check:
+    name: Check for new commits
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    outputs:
+      has_changes: ${{ steps.check.outputs.has_changes }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Check for commits in last 24h
+        id: check
+        run: |
+          RECENT=$(git log --since="24 hours ago" --oneline | head -1)
+          if [ -n "$RECENT" ]; then
+            echo "has_changes=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "has_changes=false" >> "$GITHUB_OUTPUT"
+          fi
+
+  build:
+    name: Build nightly packages
+    needs: check
+    if: needs.check.outputs.has_changes == 'true' || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Stamp nightly versions
+        run: |
+          DATE=$(date +%Y%m%d)
+          for init_file in \
+            lib/crewai/src/crewai/__init__.py \
+            lib/crewai-tools/src/crewai_tools/__init__.py \
+            lib/crewai-files/src/crewai_files/__init__.py; do
+            CURRENT=$(python -c "
+          import re
+          text = open('$init_file').read()
+          print(re.search(r'__version__\s*=\s*\"(.*?)\"\s*$', text, re.MULTILINE).group(1))
+          ")
+            NIGHTLY="${CURRENT}.dev${DATE}"
+            sed -i "s/__version__ = .*/__version__ = \"${NIGHTLY}\"/" "$init_file"
+            echo "$init_file: $CURRENT -> $NIGHTLY"
+          done
+
+          # Update cross-package dependency pins to nightly versions
+          sed -i "s/\"crewai-tools==[^\"]*\"/\"crewai-tools==${NIGHTLY}\"/" lib/crewai/pyproject.toml
+          sed -i "s/\"crewai==[^\"]*\"/\"crewai==${NIGHTLY}\"/" lib/crewai-tools/pyproject.toml
+          echo "Updated cross-package dependency pins to ${NIGHTLY}"
+
+      - name: Build packages
+        run: |
+          uv build --all-packages
+          rm dist/.gitignore
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+  publish:
+    name: Publish nightly to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/crewai
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          version: "0.8.4"
+          python-version: "3.12"
+          enable-cache: false
+
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist
+
+      - name: Publish to PyPI
+        env:
+          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+        run: |
+          failed=0
+          for package in dist/*; do
+            if [[ "$package" == *"crewai_devtools"* ]]; then
+              echo "Skipping private package: $package"
+              continue
+            fi
+            echo "Publishing $package"
+            if ! uv publish "$package"; then
+              echo "Failed to publish $package"
+              failed=1
+            fi
+          done
+          if [ $failed -eq 1 ]; then
+            echo "Some packages failed to publish"
+            exit 1
+          fi
--- a/conftest.py
+++ b/conftest.py
@@ -12,6 +12,7 @@ from dotenv import load_dotenv
 import pytest
 from vcr.request import Request  # type: ignore[import-untyped]

+
 try:
    import vcr.stubs.httpx_stubs as httpx_stubs  # type: ignore[import-untyped]
 except ModuleNotFoundError:
--- a/docs/docs.json
+++ b/docs/docs.json
--- a/docs/en/changelog.mdx
+++ b/docs/en/changelog.mdx
@@ -4,6 +4,71 @@ description: "Product updates, improvements, and bug fixes for CrewAI"
 icon: "clock"
 mode: "wide"
 ---
+<Update label="Mar 11, 2026">
+  ## v1.10.2a1
+
+  [View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.10.2a1)
+
+  ## What's Changed
+
+  ### Features
+  - Add support for tool search, saving tokens, and dynamically injecting appropriate tools during execution for Anthropics.
+  - Introduce more Brave Search tools.
+  - Create action for nightly releases.
+
+  ### Bug Fixes
+  - Fix LockException under concurrent multi-process execution.
+  - Resolve issues with grouping parallel tool results in a single user message.
+  - Address MCP tools resolutions and eliminate all shared mutable connections.
+  - Update LLM parameter handling in the human_feedback function.
+  - Add missing list/dict methods to LockedListProxy and LockedDictProxy.
+  - Propagate contextvars context to parallel tool call threads.
+  - Bump gitpython dependency to >=3.1.41 to resolve CVE path traversal vulnerability.
+
+  ### Refactoring
+  - Refactor memory classes to be serializable.
+
+  ### Documentation
+  - Update changelog and version for v1.10.1.
+
+  ## Contributors
+
+  @akaKuruma, @github-actions[bot], @giulio-leone, @greysonlalonde, @joaomdmoura, @jonathansampson, @lorenzejay, @lucasgomide, @mattatcha
+
+</Update>
+
+<Update label="Mar 04, 2026">
+  ## v1.10.1
+
+  [View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.10.1)
+
+  ## What's Changed
+
+  ### Features
+  - Upgrade Gemini GenAI
+
+  ### Bug Fixes
+  - Adjust executor listener value to avoid recursion
+  - Group parallel function response parts in a single Content object in Gemini
+  - Surface thought output from thinking models in Gemini
+  - Load MCP and platform tools when agent tools are None
+  - Support Jupyter environments with running event loops in A2A
+  - Use anonymous ID for ephemeral traces
+  - Conditionally pass plus header
+  - Skip signal handler registration in non-main threads for telemetry
+  - Inject tool errors as observations and resolve name collisions
+  - Upgrade pypdf from 4.x to 6.7.4 to resolve Dependabot alerts
+  - Resolve critical and high Dependabot security alerts
+
+  ### Documentation
+  - Sync Composio tool documentation across locales
+
+  ## Contributors
+
+  @giulio-leone, @greysonlalonde, @haxzie, @joaomdmoura, @lorenzejay, @mattatcha, @mplachta, @nicoferdi96
+
+</Update>
+
 <Update label="Feb 27, 2026">
  ## v1.10.1a1

--- a/docs/en/guides/migration/migrating-from-langgraph.mdx
+++ b/docs/en/guides/migration/migrating-from-langgraph.mdx
@@ -0,0 +1,518 @@
+---
+title: "Moving from LangGraph to CrewAI: A Practical Guide for Engineers"
+description: If you already have built with LangGraph, learn how to quickly port your projects to CrewAI
+icon: switch
+mode: "wide"
+---
+
+You've built agents with LangGraph. You've wrestled with `StateGraph`, wired up conditional edges, and debugged state dictionaries at 2 AM. It works — but somewhere along the way, you started wondering if there's a better path to production.
+
+There is. **CrewAI Flows** gives you the same power — event-driven orchestration, conditional routing, shared state — with dramatically less boilerplate and a mental model that maps cleanly to how you actually think about multi-step AI workflows.
+
+This article walks through the core concepts side by side, shows real code comparisons, and demonstrates why CrewAI Flows is the framework you'll want to reach for next.
+
+---
+
+## The Mental Model Shift
+
+LangGraph asks you to think in **graphs**: nodes, edges, and state dictionaries. Every workflow is a directed graph where you explicitly wire transitions between computation steps. It's powerful, but the abstraction carries overhead — especially when your workflow is fundamentally sequential with a few decision points.
+
+CrewAI Flows asks you to think in **events**: methods that start things, methods that listen for results, and methods that route execution. The topology of your workflow emerges from decorator annotations rather than explicit graph construction. This isn't just syntactic sugar — it changes how you design, read, and maintain your pipelines.
+
+Here's the core mapping:
+
+| LangGraph Concept | CrewAI Flows Equivalent |
+| --- | --- |
+| `StateGraph` class | `Flow` class |
+| `add_node()` | Methods decorated with `@start`, `@listen` |
+| `add_edge()` / `add_conditional_edges()` | `@listen()` / `@router()` decorators |
+| `TypedDict` state | Pydantic `BaseModel` state |
+| `START` / `END` constants | `@start()` decorator / natural method return |
+| `graph.compile()` | `flow.kickoff()` |
+| Checkpointer / persistence | Built-in memory (LanceDB-backed) |
+
+Let's see what this looks like in practice.
+
+---
+
+## Demo 1: A Simple Sequential Pipeline
+
+Imagine you're building a pipeline that takes a topic, researches it, writes a summary, and formats the output. Here's how each framework handles it.
+
+### LangGraph Approach
+
+```python
+from typing import TypedDict
+from langgraph.graph import StateGraph, START, END
+
+class ResearchState(TypedDict):
+    topic: str
+    raw_research: str
+    summary: str
+    formatted_output: str
+
+def research_topic(state: ResearchState) -> dict:
+    # Call an LLM or search API
+    result = llm.invoke(f"Research the topic: {state['topic']}")
+    return {"raw_research": result}
+
+def write_summary(state: ResearchState) -> dict:
+    result = llm.invoke(
+        f"Summarize this research:\n{state['raw_research']}"
+    )
+    return {"summary": result}
+
+def format_output(state: ResearchState) -> dict:
+    result = llm.invoke(
+        f"Format this summary as a polished article section:\n{state['summary']}"
+    )
+    return {"formatted_output": result}
+
+# Build the graph
+graph = StateGraph(ResearchState)
+graph.add_node("research", research_topic)
+graph.add_node("summarize", write_summary)
+graph.add_node("format", format_output)
+
+graph.add_edge(START, "research")
+graph.add_edge("research", "summarize")
+graph.add_edge("summarize", "format")
+graph.add_edge("format", END)
+
+# Compile and run
+app = graph.compile()
+result = app.invoke({"topic": "quantum computing advances in 2026"})
+print(result["formatted_output"])
+```
+
+You define functions, register them as nodes, and manually wire every transition. For a simple sequence like this, there's a lot of ceremony.
+
+### CrewAI Flows Approach
+
+```python
+from crewai import LLM, Agent, Crew, Process, Task
+from crewai.flow.flow import Flow, listen, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class ResearchState(BaseModel):
+    topic: str = ""
+    raw_research: str = ""
+    summary: str = ""
+    formatted_output: str = ""
+
+class ResearchFlow(Flow[ResearchState]):
+    @start()
+    def research_topic(self):
+        # Option 1: Direct LLM call
+        result = llm.call(f"Research the topic: {self.state.topic}")
+        self.state.raw_research = result
+        return result
+
+    @listen(research_topic)
+    def write_summary(self, research_output):
+        # Option 2: A single agent
+        summarizer = Agent(
+            role="Research Summarizer",
+            goal="Produce concise, accurate summaries of research content",
+            backstory="You are an expert at distilling complex research into clear, "
+            "digestible summaries.",
+            llm=llm,
+            verbose=True,
+        )
+        result = summarizer.kickoff(
+            f"Summarize this research:\n{self.state.raw_research}"
+        )
+        self.state.summary = str(result)
+        return self.state.summary
+
+    @listen(write_summary)
+    def format_output(self, summary_output):
+        # Option 3: a complete crew (with one or more agents)
+        formatter = Agent(
+            role="Content Formatter",
+            goal="Transform research summaries into polished, publication-ready article sections",
+            backstory="You are a skilled editor with expertise in structuring and "
+            "presenting technical content for a general audience.",
+            llm=llm,
+            verbose=True,
+        )
+        format_task = Task(
+            description=f"Format this summary as a polished article section:\n{self.state.summary}",
+            expected_output="A well-structured, polished article section ready for publication.",
+            agent=formatter,
+        )
+        crew = Crew(
+            agents=[formatter],
+            tasks=[format_task],
+            process=Process.sequential,
+            verbose=True,
+        )
+        result = crew.kickoff()
+        self.state.formatted_output = str(result)
+        return self.state.formatted_output
+
+# Run the flow
+flow = ResearchFlow()
+flow.state.topic = "quantum computing advances in 2026"
+result = flow.kickoff()
+print(flow.state.formatted_output)
+
+```
+
+Notice what's different: no graph construction, no edge wiring, no compile step. The execution order is declared right where the logic lives. `@start()` marks the entry point, and `@listen(method_name)` chains steps together. The state is a proper Pydantic model with type safety, validation, and IDE auto-completion.
+
+---
+
+## Demo 2: Conditional Routing
+
+This is where things get interesting. Say you're building a content pipeline that routes to different processing paths based on the type of content detected.
+
+### LangGraph Approach
+
+```python
+from typing import TypedDict, Literal
+from langgraph.graph import StateGraph, START, END
+
+class ContentState(TypedDict):
+    input_text: str
+    content_type: str
+    result: str
+
+def classify_content(state: ContentState) -> dict:
+    content_type = llm.invoke(
+        f"Classify this content as 'technical', 'creative', or 'business':\n{state['input_text']}"
+    )
+    return {"content_type": content_type.strip().lower()}
+
+def process_technical(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as technical doc:\n{state['input_text']}")
+    return {"result": result}
+
+def process_creative(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as creative writing:\n{state['input_text']}")
+    return {"result": result}
+
+def process_business(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as business content:\n{state['input_text']}")
+    return {"result": result}
+
+# Routing function
+def route_content(state: ContentState) -> Literal["technical", "creative", "business"]:
+    return state["content_type"]
+
+# Build the graph
+graph = StateGraph(ContentState)
+graph.add_node("classify", classify_content)
+graph.add_node("technical", process_technical)
+graph.add_node("creative", process_creative)
+graph.add_node("business", process_business)
+
+graph.add_edge(START, "classify")
+graph.add_conditional_edges(
+    "classify",
+    route_content,
+    {
+        "technical": "technical",
+        "creative": "creative",
+        "business": "business",
+    }
+)
+graph.add_edge("technical", END)
+graph.add_edge("creative", END)
+graph.add_edge("business", END)
+
+app = graph.compile()
+result = app.invoke({"input_text": "Explain how TCP handshakes work"})
+```
+
+You need a separate routing function, explicit conditional edge mapping, and termination edges for every branch. The routing logic is decoupled from the node that produces the routing decision.
+
+### CrewAI Flows Approach
+
+```python
+from crewai import LLM, Agent
+from crewai.flow.flow import Flow, listen, router, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class ContentState(BaseModel):
+    input_text: str = ""
+    content_type: str = ""
+    result: str = ""
+
+class ContentFlow(Flow[ContentState]):
+    @start()
+    def classify_content(self):
+        self.state.content_type = (
+            llm.call(
+                f"Classify this content as 'technical', 'creative', or 'business':\n"
+                f"{self.state.input_text}"
+            )
+            .strip()
+            .lower()
+        )
+        return self.state.content_type
+
+    @router(classify_content)
+    def route_content(self, classification):
+        if classification == "technical":
+            return "process_technical"
+        elif classification == "creative":
+            return "process_creative"
+        else:
+            return "process_business"
+
+    @listen("process_technical")
+    def handle_technical(self):
+        agent = Agent(
+            role="Technical Writer",
+            goal="Produce clear, accurate technical documentation",
+            backstory="You are an expert technical writer who specializes in "
+            "explaining complex technical concepts precisely.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as technical doc:\n{self.state.input_text}")
+        )
+
+    @listen("process_creative")
+    def handle_creative(self):
+        agent = Agent(
+            role="Creative Writer",
+            goal="Craft engaging and imaginative creative content",
+            backstory="You are a talented creative writer with a flair for "
+            "compelling storytelling and vivid expression.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as creative writing:\n{self.state.input_text}")
+        )
+
+    @listen("process_business")
+    def handle_business(self):
+        agent = Agent(
+            role="Business Writer",
+            goal="Produce professional, results-oriented business content",
+            backstory="You are an experienced business writer who communicates "
+            "strategy and value clearly to professional audiences.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as business content:\n{self.state.input_text}")
+        )
+
+flow = ContentFlow()
+flow.state.input_text = "Explain how TCP handshakes work"
+flow.kickoff()
+print(flow.state.result)
+
+```
+
+The `@router()` decorator turns a method into a decision point. It returns a string that matches a listener — no mapping dictionaries, no separate routing functions. The branching logic reads like a Python `if` statement because it *is* one.
+
+---
+
+## Demo 3: Integrating AI Agent Crews into Flows
+
+Here's where CrewAI's real power shines. Flows aren't just for chaining LLM calls — they orchestrate full **Crews** of autonomous agents. This is something LangGraph simply doesn't have a native equivalent for.
+
+```python
+from crewai import Agent, Task, Crew
+from crewai.flow.flow import Flow, listen, start
+from pydantic import BaseModel
+
+class ArticleState(BaseModel):
+    topic: str = ""
+    research: str = ""
+    draft: str = ""
+    final_article: str = ""
+
+class ArticleFlow(Flow[ArticleState]):
+
+    @start()
+    def run_research_crew(self):
+        """A full Crew of agents handles research."""
+        researcher = Agent(
+            role="Senior Research Analyst",
+            goal=f"Produce comprehensive research on: {self.state.topic}",
+            backstory="You're a veteran analyst known for thorough, "
+                       "well-sourced research reports.",
+            llm="gpt-4o"
+        )
+
+        research_task = Task(
+            description=f"Research '{self.state.topic}' thoroughly. "
+                        "Cover key trends, data points, and expert opinions.",
+            expected_output="A detailed research brief with sources.",
+            agent=researcher
+        )
+
+        crew = Crew(agents=[researcher], tasks=[research_task])
+        result = crew.kickoff()
+        self.state.research = result.raw
+        return result.raw
+
+    @listen(run_research_crew)
+    def run_writing_crew(self, research_output):
+        """A different Crew handles writing."""
+        writer = Agent(
+            role="Technical Writer",
+            goal="Write a compelling article based on provided research.",
+            backstory="You turn complex research into engaging, clear prose.",
+            llm="gpt-4o"
+        )
+
+        editor = Agent(
+            role="Senior Editor",
+            goal="Review and polish articles for publication quality.",
+            backstory="20 years of editorial experience at top tech publications.",
+            llm="gpt-4o"
+        )
+
+        write_task = Task(
+            description=f"Write an article based on this research:\n{self.state.research}",
+            expected_output="A well-structured draft article.",
+            agent=writer
+        )
+
+        edit_task = Task(
+            description="Review, fact-check, and polish the draft article.",
+            expected_output="A publication-ready article.",
+            agent=editor
+        )
+
+        crew = Crew(agents=[writer, editor], tasks=[write_task, edit_task])
+        result = crew.kickoff()
+        self.state.final_article = result.raw
+        return result.raw
+
+# Run the full pipeline
+flow = ArticleFlow()
+flow.state.topic = "The Future of Edge AI"
+flow.kickoff()
+print(flow.state.final_article)
+```
+
+This is the key insight: **Flows provide the orchestration layer, and Crews provide the intelligence layer.** Each step in a Flow can spin up a full team of collaborating agents, each with their own roles, goals, and tools. You get structured, predictable control flow *and* autonomous agent collaboration — the best of both worlds.
+
+In LangGraph, achieving something similar means manually implementing agent communication protocols, tool-calling loops, and delegation logic inside your node functions. It's possible, but it's plumbing you're building from scratch every time.
+
+---
+
+## Demo 4: Parallel Execution and Synchronization
+
+Real-world pipelines often need to fan out work and join the results. CrewAI Flows handles this elegantly with `and_` and `or_` operators.
+
+```python
+from crewai import LLM
+from crewai.flow.flow import Flow, and_, listen, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class AnalysisState(BaseModel):
+    topic: str = ""
+    market_data: str = ""
+    tech_analysis: str = ""
+    competitor_intel: str = ""
+    final_report: str = ""
+
+class ParallelAnalysisFlow(Flow[AnalysisState]):
+    @start()
+    def start_method(self):
+        pass
+
+    @listen(start_method)
+    def gather_market_data(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(start_method)
+    def run_tech_analysis(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(start_method)
+    def gather_competitor_intel(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(and_(gather_market_data, run_tech_analysis, gather_competitor_intel))
+    def synthesize_report(self):
+        # Your agentic or deterministic code
+        pass
+
+flow = ParallelAnalysisFlow()
+flow.state.topic = "AI-powered developer tools"
+flow.kickoff()
+
+```
+
+Multiple `@start()` decorators fire in parallel. The `and_()` combinator on the `@listen` decorator ensures `synthesize_report` only executes after *all three* upstream methods complete. There's also `or_()` for when you want to proceed as soon as *any* upstream task finishes.
+
+In LangGraph, you'd need to build a fan-out/fan-in pattern with parallel branches, a synchronization node, and careful state merging — all wired explicitly through edges.
+
+---
+
+## Why CrewAI Flows for Production
+
+Beyond cleaner syntax, Flows deliver several production-critical advantages:
+
+**Built-in state persistence.** Flow state is backed by LanceDB, meaning your workflows can survive crashes, be resumed, and accumulate knowledge across runs. LangGraph requires you to configure a separate checkpointer.
+
+**Type-safe state management.** Pydantic models give you validation, serialization, and IDE support out of the box. LangGraph's `TypedDict` states don't validate at runtime.
+
+**First-class agent orchestration.** Crews are a native primitive. You define agents with roles, goals, backstories, and tools — and they collaborate autonomously within the structured envelope of a Flow. No need to reinvent multi-agent coordination.
+
+**Simpler mental model.** Decorators declare intent. `@start` means "begin here." `@listen(x)` means "run after x." `@router(x)` means "decide where to go after x." The code reads like the workflow it describes.
+
+**CLI integration.** Run flows with `crewai run`. No separate compilation step, no graph serialization. Your Flow is a Python class, and it runs like one.
+
+---
+
+## Migration Cheat Sheet
+
+If you're sitting on a LangGraph codebase and want to move to CrewAI Flows, here's a practical conversion guide:
+
+1. **Map your state.** Convert your `TypedDict` to a Pydantic `BaseModel`. Add default values for all fields.
+2. **Convert nodes to methods.** Each `add_node` function becomes a method on your `Flow` subclass. Replace `state["field"]` reads with `self.state.field`.
+3. **Replace edges with decorators.** Your `add_edge(START, "first_node")` becomes `@start()` on the first method. Sequential `add_edge("a", "b")` becomes `@listen(a)` on method `b`.
+4. **Replace conditional edges with `@router`.** Your routing function and `add_conditional_edges()` mapping become a single `@router()` method that returns a route string.
+5. **Replace compile + invoke with kickoff.** Drop `graph.compile()`. Call `flow.kickoff()` instead.
+6. **Consider where Crews fit.** Any node where you have complex multi-step agent logic is a candidate for extraction into a Crew. This is where you'll see the biggest quality improvement.
+
+---
+
+## Getting Started
+
+Install CrewAI and scaffold a new Flow project:
+
+```bash
+pip install crewai
+crewai create flow my_first_flow
+cd my_first_flow
+```
+
+This generates a project structure with a ready-to-edit Flow class, configuration files, and a `pyproject.toml` with `type = "flow"` already set. Run it with:
+
+```bash
+crewai run
+```
+
+From there, add your agents, wire up your listeners, and ship it.
+
+---
+
+## Final Thoughts
+
+LangGraph taught the ecosystem that AI workflows need structure. That was an important lesson. But CrewAI Flows takes that lesson and delivers it in a form that's faster to write, easier to read, and more powerful in production — especially when your workflows involve multiple collaborating agents.
+
+If you're building anything beyond a single-agent chain, give Flows a serious look. The decorator-driven model, native Crew integration, and built-in state management mean you'll spend less time on plumbing and more time on the problems that matter.
+
+Start with `crewai create flow`. You won't look back.
--- a/docs/en/tools/search-research/bravesearchtool.mdx
+++ b/docs/en/tools/search-research/bravesearchtool.mdx
@@ -1,97 +1,316 @@
 ---
-title: Brave Search
-description: The `BraveSearchTool` is designed to search the internet using the Brave Search API.
+title: Brave Search Tools
+description: A suite of tools for querying the Brave Search API — covering web, news, image, and video search.
 icon: searchengin
 mode: "wide"
 ---

-# `BraveSearchTool`
+# Brave Search Tools

 ## Description

-This tool is designed to perform web searches using the Brave Search API. It allows you to search the internet with a specified query and retrieve relevant results. The tool supports customizable result counts and country-specific searches.
+CrewAI offers a family of Brave Search tools, each targeting a specific [Brave Search API](https://brave.com/search/api/) endpoint.
+Rather than a single catch-all tool, you can pick exactly the tool that matches the kind of results your agent needs:
+
+| Tool | Endpoint | Use case |
+| --- | --- | --- |
+| `BraveWebSearchTool` | Web Search | General web results, snippets, and URLs |
+| `BraveNewsSearchTool` | News Search | Recent news articles and headlines |
+| `BraveImageSearchTool` | Image Search | Image results with dimensions and source URLs |
+| `BraveVideoSearchTool` | Video Search | Video results from across the web |
+| `BraveLocalPOIsTool` | Local POIs | Find points of interest (e.g., restaurants) |
+| `BraveLocalPOIsDescriptionTool` | Local POIs | Retrieve AI-generated location descriptions |
+| `BraveLLMContextTool` | LLM Context | Pre-extracted web content optimized for AI agents, LLM grounding, and RAG pipelines. |
+
+All tools share a common base class (`BraveSearchToolBase`) that provides consistent behavior — rate limiting, automatic retries on `429` responses, header and parameter validation, and optional file saving.
+
+<Note>
+  The older `BraveSearchTool` class is still available for backwards compatibility, but it is considered **legacy** and will not receive the same level of attention going forward. We recommend migrating to the specific tools listed above, which offer richer configuration and a more focused interface.
+</Note>
+
+<Note>
+    While many tools (e.g., _BraveWebSearchTool_, _BraveNewsSearchTool_, _BraveImageSearchTool_, and _BraveVideoSearchTool_) can be used with a free Brave Search API subscription/plan, some parameters (e.g., `enable_snippets`) and tools (e.g., _BraveLocalPOIsTool_ and _BraveLocalPOIsDescriptionTool_) require a paid plan. Consult your subscription plan's capabilities for clarification.
+</Note>

 ## Installation

-To incorporate this tool into your project, follow the installation instructions below:
-
 ```shell
 pip install 'crewai[tools]'
 ```

-## Steps to Get Started
+## Getting Started

-To effectively use the `BraveSearchTool`, follow these steps:
+1. **Install the package** — confirm that `crewai[tools]` is installed in your Python environment.
+2. **Get an API key** — sign up at [api-dashboard.search.brave.com/login](https://api-dashboard.search.brave.com/login) to generate a key.
+3. **Set the environment variable** — store your key as `BRAVE_API_KEY`, or pass it directly via the `api_key` parameter.

-1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
-2. **API Key Acquisition**: Acquire a Brave Search API key at https://api.search.brave.com/app/keys (sign in to generate a key).
-3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool.
+## Quick Examples

-## Example
-
-The following example demonstrates how to initialize the tool and execute a search with a given query:
+### Web Search

 ```python Code
-from crewai_tools import BraveSearchTool
+from crewai_tools import BraveWebSearchTool

-# Initialize the tool for internet searching capabilities
-tool = BraveSearchTool()
-
-# Execute a search
-results = tool.run(search_query="CrewAI agent framework")
+tool = BraveWebSearchTool()
+results = tool.run(q="CrewAI agent framework")
 print(results)
 ```

-## Parameters
-
-The `BraveSearchTool` accepts the following parameters:
-
- **search_query**: Mandatory. The search query you want to use to search the internet.
- **country**: Optional. Specify the country for the search results. Default is empty string.
- **n_results**: Optional. Number of search results to return. Default is `10`.
- **save_file**: Optional. Whether to save the search results to a file. Default is `False`.
-
-## Example with Parameters
-
-Here is an example demonstrating how to use the tool with additional parameters:
+### News Search

 ```python Code
-from crewai_tools import BraveSearchTool
+from crewai_tools import BraveNewsSearchTool

-# Initialize the tool with custom parameters
-tool = BraveSearchTool(
-    country="US",
-    n_results=5,
-    save_file=True
+tool = BraveNewsSearchTool()
+results = tool.run(q="latest AI breakthroughs")
+print(results)
+```
+
+### Image Search
+
+```python Code
+from crewai_tools import BraveImageSearchTool
+
+tool = BraveImageSearchTool()
+results = tool.run(q="northern lights photography")
+print(results)
+```
+
+### Video Search
+
+```python Code
+from crewai_tools import BraveVideoSearchTool
+
+tool = BraveVideoSearchTool()
+results = tool.run(q="how to build AI agents")
+print(results)
+```
+
+### Location POI Descriptions
+
+```python Code
+from crewai_tools import (
+    BraveWebSearchTool,
+    BraveLocalPOIsDescriptionTool,
 )

-# Execute a search
-results = tool.run(search_query="Latest AI developments")
-print(results)
+web_search = BraveWebSearchTool(raw=True)
+poi_details = BraveLocalPOIsDescriptionTool()
+
+results = web_search.run(q="italian restaurants in pensacola, florida")
+
+if "locations" in results:
+    location_ids = [ loc["id"] for loc in results["locations"]["results"] ]
+    if location_ids:
+        descriptions = poi_details.run(ids=location_ids)
+        print(descriptions)
+```
+
+## Common Constructor Parameters
+
+Every Brave Search tool accepts the following parameters at initialization:
+
+| Parameter | Type | Default | Description |
+| --- | --- | --- | --- |
+| `api_key` | `str \| None` | `None` | Brave API key. Falls back to the `BRAVE_API_KEY` environment variable. |
+| `headers` | `dict \| None` | `None` | Additional HTTP headers to send with every request (e.g., `api-version`, geolocation headers). |
+| `requests_per_second` | `float` | `1.0` | Maximum request rate. The tool will sleep between calls to stay within this limit. |
+| `save_file` | `bool` | `False` | When `True`, each response is written to a timestamped `.txt` file. |
+| `raw` | `bool` | `False` | When `True`, the full API JSON response is returned without any refinement. |
+| `timeout` | `int` | `30` | HTTP request timeout in seconds. |
+| `country` | `str \| None` | `None` | Legacy shorthand for geo-targeting (e.g., `"US"`). Prefer using the `country` query parameter directly. |
+| `n_results` | `int` | `10` | Legacy shorthand for result count. Prefer using the `count` query parameter directly. |
+
+<Warning>
+  The `country` and `n_results` constructor parameters exist for backwards compatibility. They are applied as defaults when the corresponding query parameters (`country`, `count`) are not provided at call time. For new code, we recommend passing `country` and `count` directly as query parameters instead.
+</Warning>
+
+## Query Parameters
+
+Each tool validates its query parameters against a Pydantic schema before sending the request.
+The parameters vary slightly per endpoint — here is a summary of the most commonly used ones:
+
+### BraveWebSearchTool
+
+| Parameter | Description |
+| --- | --- |
+| `q` | **(required)** Search query string (max 400 chars). |
+| `country` | Two-letter country code for geo-targeting (e.g., `"US"`). |
+| `search_lang` | Two-letter language code for results (e.g., `"en"`). |
+| `count` | Max number of results to return (1–20). |
+| `offset` | Skip the first N pages of results (0–9). |
+| `safesearch` | Content filter: `"off"`, `"moderate"`, or `"strict"`. |
+| `freshness` | Recency filter: `"pd"` (past day), `"pw"` (past week), `"pm"` (past month), `"py"` (past year), or a date range like `"2025-01-01to2025-06-01"`. |
+| `extra_snippets` | Include up to 5 additional text snippets per result. |
+| `goggles` | Brave Goggles URL(s) and/or source for custom re-ranking. |
+
+For the complete parameter and header reference, see the [Brave Web Search API documentation](https://api-dashboard.search.brave.com/api-reference/web/search/get).
+
+### BraveNewsSearchTool
+
+| Parameter | Description |
+| --- | --- |
+| `q` | **(required)** Search query string (max 400 chars). |
+| `country` | Two-letter country code for geo-targeting. |
+| `search_lang` | Two-letter language code for results. |
+| `count` | Max number of results to return (1–50). |
+| `offset` | Skip the first N pages of results (0–9). |
+| `safesearch` | Content filter: `"off"`, `"moderate"`, or `"strict"`. |
+| `freshness` | Recency filter (same options as Web Search). |
+| `goggles` | Brave Goggles URL(s) and/or source for custom re-ranking. |
+
+For the complete parameter and header reference, see the [Brave News Search API documentation](https://api-dashboard.search.brave.com/api-reference/news/news_search/get).
+
+### BraveImageSearchTool
+
+| Parameter | Description |
+| --- | --- |
+| `q` | **(required)** Search query string (max 400 chars). |
+| `country` | Two-letter country code for geo-targeting. |
+| `search_lang` | Two-letter language code for results. |
+| `count` | Max number of results to return (1–200). |
+| `safesearch` | Content filter: `"off"` or `"strict"`. |
+| `spellcheck` | Attempt to correct spelling errors in the query. |
+
+For the complete parameter and header reference, see the [Brave Image Search API documentation](https://api-dashboard.search.brave.com/api-reference/images/image_search).
+
+### BraveVideoSearchTool
+
+| Parameter | Description |
+| --- | --- |
+| `q` | **(required)** Search query string (max 400 chars). |
+| `country` | Two-letter country code for geo-targeting. |
+| `search_lang` | Two-letter language code for results. |
+| `count` | Max number of results to return (1–50). |
+| `offset` | Skip the first N pages of results (0–9). |
+| `safesearch` | Content filter: `"off"`, `"moderate"`, or `"strict"`. |
+| `freshness` | Recency filter (same options as Web Search). |
+
+For the complete parameter and header reference, see the [Brave Video Search API documentation](https://api-dashboard.search.brave.com/api-reference/videos/video_search/get).
+
+### BraveLocalPOIsTool
+
+| Parameter | Description |
+| --- | --- |
+| `ids` | **(required)** A list of unique identifiers for the desired locations. |
+| `search_lang` | Two-letter language code for results. |
+
+For the complete parameter and header reference, see [Brave Local POIs API documentation](https://api-dashboard.search.brave.com/api-reference/web/local_pois).
+
+### BraveLocalPOIsDescriptionTool
+
+| Parameter | Description |
+| --- | --- |
+| `ids` | **(required)** A list of unique identifiers for the desired locations. |
+
+For the complete parameter and header reference, see [Brave POI Descriptions API documentation](https://api-dashboard.search.brave.com/api-reference/web/poi_descriptions).
+
+## Custom Headers
+
+All tools support custom HTTP request headers. The Web Search tool, for example, accepts geolocation headers for location-aware results:
+
+```python Code
+from crewai_tools import BraveWebSearchTool
+
+tool = BraveWebSearchTool(
+    headers={
+        "x-loc-lat": "37.7749",
+        "x-loc-long": "-122.4194",
+        "x-loc-city": "San Francisco",
+        "x-loc-state": "CA",
+        "x-loc-country": "US",
+    }
+)
+
+results = tool.run(q="best coffee shops nearby")
+```
+
+You can also update headers after initialization using the `set_headers()` method:
+
+```python Code
+tool.set_headers({"api-version": "2025-01-01"})
+```
+
+## Raw Mode
+
+By default, each tool refines the API response into a concise list of results. If you need the full, unprocessed API response, enable raw mode:
+
+```python Code
+from crewai_tools import BraveWebSearchTool
+
+tool = BraveWebSearchTool(raw=True)
+full_response = tool.run(q="Brave Search API")
 ```

 ## Agent Integration Example

-Here's how to integrate the `BraveSearchTool` with a CrewAI agent:
+Here's how to equip a CrewAI agent with multiple Brave Search tools:

 ```python Code
 from crewai import Agent
 from crewai.project import agent
-from crewai_tools import BraveSearchTool
+from crewai_tools import BraveWebSearchTool, BraveNewsSearchTool

-# Initialize the tool
-brave_search_tool = BraveSearchTool()
+web_search = BraveWebSearchTool()
+news_search = BraveNewsSearchTool()

-# Define an agent with the BraveSearchTool
@agent
 def researcher(self) -> Agent:
    return Agent(
        config=self.agents_config["researcher"],
-        allow_delegation=False,
-        tools=[brave_search_tool]
+        tools=[web_search, news_search],
    )
 ```

+## Advanced Example
+
+Combining multiple parameters for a targeted search:
+
+```python Code
+from crewai_tools import BraveWebSearchTool
+
+tool = BraveWebSearchTool(
+    requests_per_second=0.5,  # conservative rate limit
+    save_file=True,
+)
+
+results = tool.run(
+    q="artificial intelligence news",
+    country="US",
+    search_lang="en",
+    count=5,
+    freshness="pm",           # past month only
+    extra_snippets=True,
+)
+print(results)
+```
+
+## Migrating from `BraveSearchTool` (Legacy)
+
+If you are currently using `BraveSearchTool`, switching to the new tools is straightforward:
+
+```python Code
+# Before (legacy)
+from crewai_tools import BraveSearchTool
+
+tool = BraveSearchTool(country="US", n_results=5, save_file=True)
+results = tool.run(search_query="AI agents")
+
+# After (recommended)
+from crewai_tools import BraveWebSearchTool
+
+tool = BraveWebSearchTool(save_file=True)
+results = tool.run(q="AI agents", country="US", count=5)
+```
+
+Key differences:
+- **Import**: Use `BraveWebSearchTool` (or the news/image/video variant) instead of `BraveSearchTool`.
+- **Query parameter**: Use `q` instead of `search_query`. (Both `search_query` and `query` are still accepted for convenience, but `q` is the preferred parameter.)
+- **Result count**: Pass `count` as a query parameter instead of `n_results` at init time.
+- **Country**: Pass `country` as a query parameter instead of at init time.
+- **API key**: Can now be passed directly via `api_key=` in addition to the `BRAVE_API_KEY` environment variable.
+- **Rate limiting**: Configurable via `requests_per_second` with automatic retry on `429` responses.
+
 ## Conclusion

-By integrating the `BraveSearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. The tool provides a simple interface to the powerful Brave Search API, making it easy to retrieve and process search results programmatically. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. 
+The Brave Search tool suite gives your CrewAI agents flexible, endpoint-specific access to the Brave Search API. Whether you need web pages, breaking news, images, or videos, there is a dedicated tool with validated parameters and built-in resilience. Pick the tool that fits your use case, and refer to the [Brave Search API documentation](https://brave.com/search/api/) for the full details on available parameters and response formats.
--- a/docs/ko/changelog.mdx
+++ b/docs/ko/changelog.mdx
@@ -4,6 +4,71 @@ description: "CrewAI의 제품 업데이트, 개선 사항 및 버그 수정"
 icon: "clock"
 mode: "wide"
 ---
+<Update label="2026년 3월 11일">
+  ## v1.10.2a1
+
+  [GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/1.10.2a1)
+
+  ## 변경 사항
+
+  ### 기능
+  - Anthropics에 대한 도구 검색 지원 추가, 토큰 저장, 실행 중 적절한 도구를 동적으로 주입하는 기능 추가.
+  - 더 많은 Brave Search 도구 도입.
+  - 야간 릴리스를 위한 액션 생성.
+
+  ### 버그 수정
+  - 동시 다중 프로세스 실행 중 LockException 수정.
+  - 단일 사용자 메시지에서 병렬 도구 결과 그룹화 문제 해결.
+  - MCP 도구 해상도 문제 해결 및 모든 공유 가변 연결 제거.
+  - human_feedback 함수에서 LLM 매개변수 처리 업데이트.
+  - LockedListProxy 및 LockedDictProxy에 누락된 list/dict 메서드 추가.
+  - 병렬 도구 호출 스레드에 contextvars 컨텍스트 전파.
+  - CVE 경로 탐색 취약점을 해결하기 위해 gitpython 의존성을 >=3.1.41로 업데이트.
+
+  ### 리팩토링
+  - 메모리 클래스를 직렬화 가능하도록 리팩토링.
+
+  ### 문서
+  - v1.10.1에 대한 변경 로그 및 버전 업데이트.
+
+  ## 기여자
+
+  @akaKuruma, @github-actions[bot], @giulio-leone, @greysonlalonde, @joaomdmoura, @jonathansampson, @lorenzejay, @lucasgomide, @mattatcha
+
+</Update>
+
+<Update label="2026년 3월 4일">
+  ## v1.10.1
+
+  [GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/1.10.1)
+
+  ## 변경 사항
+
+  ### 기능
+  - Gemini GenAI 업그레이드
+
+  ### 버그 수정
+  - 재귀를 피하기 위해 실행기 리스너 값을 조정
+  - Gemini에서 병렬 함수 응답 부분을 단일 Content 객체로 그룹화
+  - Gemini에서 사고 모델의 사고 출력을 표시
+  - 에이전트 도구가 None일 때 MCP 및 플랫폼 도구 로드
+  - A2A에서 실행 이벤트 루프가 있는 Jupyter 환경 지원
+  - 일시적인 추적을 위해 익명 ID 사용
+  - 조건부로 플러스 헤더 전달
+  - 원격 측정을 위해 비주 스레드에서 신호 처리기 등록 건너뛰기
+  - 도구 오류를 관찰로 주입하고 이름 충돌 해결
+  - Dependabot 경고를 해결하기 위해 pypdf를 4.x에서 6.7.4로 업그레이드
+  - 심각 및 높은 Dependabot 보안 경고 해결
+
+  ### 문서
+  - Composio 도구 문서를 지역별로 동기화
+
+  ## 기여자
+
+  @giulio-leone, @greysonlalonde, @haxzie, @joaomdmoura, @lorenzejay, @mattatcha, @mplachta, @nicoferdi96
+
+</Update>
+
 <Update label="2026년 2월 27일">
  ## v1.10.1a1

--- a/docs/ko/guides/migration/migrating-from-langgraph.mdx
+++ b/docs/ko/guides/migration/migrating-from-langgraph.mdx
@@ -0,0 +1,518 @@
+---
+title: "LangGraph에서 CrewAI로 옮기기: 엔지니어를 위한 실전 가이드"
+description: LangGraph로 이미 구축했다면, 프로젝트를 CrewAI로 빠르게 옮기는 방법을 알아보세요
+icon: switch
+mode: "wide"
+---
+
+LangGraph로 에이전트를 구축해 왔습니다. `StateGraph`와 씨름하고, 조건부 에지를 연결하고, 새벽 2시에 상태 딕셔너리를 디버깅해 본 적도 있죠. 동작은 하지만 — 어느 순간부터 프로덕션으로 가는 더 나은 길이 없을까 고민하게 됩니다.
+
+있습니다. **CrewAI Flows**는 이벤트 기반 오케스트레이션, 조건부 라우팅, 공유 상태라는 동일한 힘을 훨씬 적은 보일러플레이트와 실제로 다단계 AI 워크플로우를 생각하는 방식에 잘 맞는 정신적 모델로 제공합니다.
+
+이 글은 핵심 개념을 나란히 비교하고 실제 코드 비교를 보여주며, 다음으로 손이 갈 프레임워크가 왜 CrewAI Flows인지 설명합니다.
+
+---
+
+## 정신적 모델의 전환
+
+LangGraph는 **그래프**로 생각하라고 요구합니다: 노드, 에지, 그리고 상태 딕셔너리. 모든 워크플로우는 계산 단계 사이의 전이를 명시적으로 연결하는 방향 그래프입니다. 강력하지만, 특히 워크플로우가 몇 개의 결정 지점이 있는 순차적 흐름일 때 이 추상화는 오버헤드를 가져옵니다.
+
+CrewAI Flows는 **이벤트**로 생각하라고 요구합니다: 시작하는 메서드, 결과를 듣는 메서드, 실행을 라우팅하는 메서드. 워크플로우의 토폴로지는 명시적 그래프 구성 대신 데코레이터 어노테이션에서 드러납니다. 이것은 단순한 문법 설탕이 아니라 — 파이프라인을 설계하고 읽고 유지하는 방식을 바꿉니다.
+
+핵심 매핑은 다음과 같습니다:
+
+| LangGraph 개념 | CrewAI Flows 대응 |
+| --- | --- |
+| `StateGraph` class | `Flow` class |
+| `add_node()` | Methods decorated with `@start`, `@listen` |
+| `add_edge()` / `add_conditional_edges()` | `@listen()` / `@router()` decorators |
+| `TypedDict` state | Pydantic `BaseModel` state |
+| `START` / `END` constants | `@start()` decorator / natural method return |
+| `graph.compile()` | `flow.kickoff()` |
+| Checkpointer / persistence | Built-in memory (LanceDB-backed) |
+
+실제로 어떻게 보이는지 살펴보겠습니다.
+
+---
+
+## 데모 1: 간단한 순차 파이프라인
+
+주제를 받아 조사하고, 요약을 작성한 뒤, 결과를 포맷팅하는 파이프라인을 만든다고 해봅시다. 각 프레임워크는 이렇게 처리합니다.
+
+### LangGraph 방식
+
+```python
+from typing import TypedDict
+from langgraph.graph import StateGraph, START, END
+
+class ResearchState(TypedDict):
+    topic: str
+    raw_research: str
+    summary: str
+    formatted_output: str
+
+def research_topic(state: ResearchState) -> dict:
+    # Call an LLM or search API
+    result = llm.invoke(f"Research the topic: {state['topic']}")
+    return {"raw_research": result}
+
+def write_summary(state: ResearchState) -> dict:
+    result = llm.invoke(
+        f"Summarize this research:\n{state['raw_research']}"
+    )
+    return {"summary": result}
+
+def format_output(state: ResearchState) -> dict:
+    result = llm.invoke(
+        f"Format this summary as a polished article section:\n{state['summary']}"
+    )
+    return {"formatted_output": result}
+
+# Build the graph
+graph = StateGraph(ResearchState)
+graph.add_node("research", research_topic)
+graph.add_node("summarize", write_summary)
+graph.add_node("format", format_output)
+
+graph.add_edge(START, "research")
+graph.add_edge("research", "summarize")
+graph.add_edge("summarize", "format")
+graph.add_edge("format", END)
+
+# Compile and run
+app = graph.compile()
+result = app.invoke({"topic": "quantum computing advances in 2026"})
+print(result["formatted_output"])
+```
+
+함수를 정의하고 노드로 등록한 다음, 모든 전이를 수동으로 연결합니다. 이렇게 단순한 순서인데도 의례처럼 해야 할 작업이 많습니다.
+
+### CrewAI Flows 방식
+
+```python
+from crewai import LLM, Agent, Crew, Process, Task
+from crewai.flow.flow import Flow, listen, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class ResearchState(BaseModel):
+    topic: str = ""
+    raw_research: str = ""
+    summary: str = ""
+    formatted_output: str = ""
+
+class ResearchFlow(Flow[ResearchState]):
+    @start()
+    def research_topic(self):
+        # Option 1: Direct LLM call
+        result = llm.call(f"Research the topic: {self.state.topic}")
+        self.state.raw_research = result
+        return result
+
+    @listen(research_topic)
+    def write_summary(self, research_output):
+        # Option 2: A single agent
+        summarizer = Agent(
+            role="Research Summarizer",
+            goal="Produce concise, accurate summaries of research content",
+            backstory="You are an expert at distilling complex research into clear, "
+            "digestible summaries.",
+            llm=llm,
+            verbose=True,
+        )
+        result = summarizer.kickoff(
+            f"Summarize this research:\n{self.state.raw_research}"
+        )
+        self.state.summary = str(result)
+        return self.state.summary
+
+    @listen(write_summary)
+    def format_output(self, summary_output):
+        # Option 3: a complete crew (with one or more agents)
+        formatter = Agent(
+            role="Content Formatter",
+            goal="Transform research summaries into polished, publication-ready article sections",
+            backstory="You are a skilled editor with expertise in structuring and "
+            "presenting technical content for a general audience.",
+            llm=llm,
+            verbose=True,
+        )
+        format_task = Task(
+            description=f"Format this summary as a polished article section:\n{self.state.summary}",
+            expected_output="A well-structured, polished article section ready for publication.",
+            agent=formatter,
+        )
+        crew = Crew(
+            agents=[formatter],
+            tasks=[format_task],
+            process=Process.sequential,
+            verbose=True,
+        )
+        result = crew.kickoff()
+        self.state.formatted_output = str(result)
+        return self.state.formatted_output
+
+# Run the flow
+flow = ResearchFlow()
+flow.state.topic = "quantum computing advances in 2026"
+result = flow.kickoff()
+print(flow.state.formatted_output)
+
+```
+
+눈에 띄는 차이점이 있습니다: 그래프 구성 없음, 에지 연결 없음, 컴파일 단계 없음. 실행 순서는 로직이 있는 곳에서 바로 선언됩니다. `@start()`는 진입점을 표시하고, `@listen(method_name)`은 단계들을 연결합니다. 상태는 타입 안전성, 검증, IDE 자동 완성까지 제공하는 제대로 된 Pydantic 모델입니다.
+
+---
+
+## 데모 2: 조건부 라우팅
+
+여기서 흥미로워집니다. 콘텐츠 유형에 따라 서로 다른 처리 경로로 라우팅하는 파이프라인을 만든다고 해봅시다.
+
+### LangGraph 방식
+
+```python
+from typing import TypedDict, Literal
+from langgraph.graph import StateGraph, START, END
+
+class ContentState(TypedDict):
+    input_text: str
+    content_type: str
+    result: str
+
+def classify_content(state: ContentState) -> dict:
+    content_type = llm.invoke(
+        f"Classify this content as 'technical', 'creative', or 'business':\n{state['input_text']}"
+    )
+    return {"content_type": content_type.strip().lower()}
+
+def process_technical(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as technical doc:\n{state['input_text']}")
+    return {"result": result}
+
+def process_creative(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as creative writing:\n{state['input_text']}")
+    return {"result": result}
+
+def process_business(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as business content:\n{state['input_text']}")
+    return {"result": result}
+
+# Routing function
+def route_content(state: ContentState) -> Literal["technical", "creative", "business"]:
+    return state["content_type"]
+
+# Build the graph
+graph = StateGraph(ContentState)
+graph.add_node("classify", classify_content)
+graph.add_node("technical", process_technical)
+graph.add_node("creative", process_creative)
+graph.add_node("business", process_business)
+
+graph.add_edge(START, "classify")
+graph.add_conditional_edges(
+    "classify",
+    route_content,
+    {
+        "technical": "technical",
+        "creative": "creative",
+        "business": "business",
+    }
+)
+graph.add_edge("technical", END)
+graph.add_edge("creative", END)
+graph.add_edge("business", END)
+
+app = graph.compile()
+result = app.invoke({"input_text": "Explain how TCP handshakes work"})
+```
+
+별도의 라우팅 함수, 명시적 조건부 에지 매핑, 그리고 모든 분기에 대한 종료 에지가 필요합니다. 라우팅 결정 로직이 그 결정을 만들어 내는 노드와 분리됩니다.
+
+### CrewAI Flows 방식
+
+```python
+from crewai import LLM, Agent
+from crewai.flow.flow import Flow, listen, router, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class ContentState(BaseModel):
+    input_text: str = ""
+    content_type: str = ""
+    result: str = ""
+
+class ContentFlow(Flow[ContentState]):
+    @start()
+    def classify_content(self):
+        self.state.content_type = (
+            llm.call(
+                f"Classify this content as 'technical', 'creative', or 'business':\n"
+                f"{self.state.input_text}"
+            )
+            .strip()
+            .lower()
+        )
+        return self.state.content_type
+
+    @router(classify_content)
+    def route_content(self, classification):
+        if classification == "technical":
+            return "process_technical"
+        elif classification == "creative":
+            return "process_creative"
+        else:
+            return "process_business"
+
+    @listen("process_technical")
+    def handle_technical(self):
+        agent = Agent(
+            role="Technical Writer",
+            goal="Produce clear, accurate technical documentation",
+            backstory="You are an expert technical writer who specializes in "
+            "explaining complex technical concepts precisely.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as technical doc:\n{self.state.input_text}")
+        )
+
+    @listen("process_creative")
+    def handle_creative(self):
+        agent = Agent(
+            role="Creative Writer",
+            goal="Craft engaging and imaginative creative content",
+            backstory="You are a talented creative writer with a flair for "
+            "compelling storytelling and vivid expression.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as creative writing:\n{self.state.input_text}")
+        )
+
+    @listen("process_business")
+    def handle_business(self):
+        agent = Agent(
+            role="Business Writer",
+            goal="Produce professional, results-oriented business content",
+            backstory="You are an experienced business writer who communicates "
+            "strategy and value clearly to professional audiences.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as business content:\n{self.state.input_text}")
+        )
+
+flow = ContentFlow()
+flow.state.input_text = "Explain how TCP handshakes work"
+flow.kickoff()
+print(flow.state.result)
+
+```
+
+`@router()` 데코레이터는 메서드를 결정 지점으로 만듭니다. 리스너와 매칭되는 문자열을 반환하므로, 매핑 딕셔너리도, 별도의 라우팅 함수도 필요 없습니다. 분기 로직이 Python `if` 문처럼 읽히는 이유는, 실제로 `if` 문이기 때문입니다.
+
+---
+
+## 데모 3: AI 에이전트 Crew를 Flow에 통합하기
+
+여기서 CrewAI의 진짜 힘이 드러납니다. Flows는 LLM 호출을 연결하는 것에 그치지 않고 자율적인 에이전트 **Crew** 전체를 오케스트레이션합니다. 이는 LangGraph에 기본으로 대응되는 개념이 없습니다.
+
+```python
+from crewai import Agent, Task, Crew
+from crewai.flow.flow import Flow, listen, start
+from pydantic import BaseModel
+
+class ArticleState(BaseModel):
+    topic: str = ""
+    research: str = ""
+    draft: str = ""
+    final_article: str = ""
+
+class ArticleFlow(Flow[ArticleState]):
+
+    @start()
+    def run_research_crew(self):
+        """A full Crew of agents handles research."""
+        researcher = Agent(
+            role="Senior Research Analyst",
+            goal=f"Produce comprehensive research on: {self.state.topic}",
+            backstory="You're a veteran analyst known for thorough, "
+                       "well-sourced research reports.",
+            llm="gpt-4o"
+        )
+
+        research_task = Task(
+            description=f"Research '{self.state.topic}' thoroughly. "
+                        "Cover key trends, data points, and expert opinions.",
+            expected_output="A detailed research brief with sources.",
+            agent=researcher
+        )
+
+        crew = Crew(agents=[researcher], tasks=[research_task])
+        result = crew.kickoff()
+        self.state.research = result.raw
+        return result.raw
+
+    @listen(run_research_crew)
+    def run_writing_crew(self, research_output):
+        """A different Crew handles writing."""
+        writer = Agent(
+            role="Technical Writer",
+            goal="Write a compelling article based on provided research.",
+            backstory="You turn complex research into engaging, clear prose.",
+            llm="gpt-4o"
+        )
+
+        editor = Agent(
+            role="Senior Editor",
+            goal="Review and polish articles for publication quality.",
+            backstory="20 years of editorial experience at top tech publications.",
+            llm="gpt-4o"
+        )
+
+        write_task = Task(
+            description=f"Write an article based on this research:\n{self.state.research}",
+            expected_output="A well-structured draft article.",
+            agent=writer
+        )
+
+        edit_task = Task(
+            description="Review, fact-check, and polish the draft article.",
+            expected_output="A publication-ready article.",
+            agent=editor
+        )
+
+        crew = Crew(agents=[writer, editor], tasks=[write_task, edit_task])
+        result = crew.kickoff()
+        self.state.final_article = result.raw
+        return result.raw
+
+# Run the full pipeline
+flow = ArticleFlow()
+flow.state.topic = "The Future of Edge AI"
+flow.kickoff()
+print(flow.state.final_article)
+```
+
+핵심 인사이트는 다음과 같습니다: **Flows는 오케스트레이션 레이어를, Crews는 지능 레이어를 제공합니다.** Flow의 각 단계는 각자의 역할, 목표, 도구를 가진 협업 에이전트 팀을 띄울 수 있습니다. 구조화되고 예측 가능한 제어 흐름 *그리고* 자율적 에이전트 협업 — 두 세계의 장점을 모두 얻습니다.
+
+LangGraph에서 비슷한 것을 하려면 노드 함수 안에 에이전트 통신 프로토콜, 도구 호출 루프, 위임 로직을 직접 구현해야 합니다. 가능하긴 하지만, 매번 처음부터 배관을 만드는 셈입니다.
+
+---
+
+## 데모 4: 병렬 실행과 동기화
+
+실제 파이프라인은 종종 작업을 병렬로 분기하고 결과를 합쳐야 합니다. CrewAI Flows는 `and_`와 `or_` 연산자로 이를 우아하게 처리합니다.
+
+```python
+from crewai import LLM
+from crewai.flow.flow import Flow, and_, listen, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class AnalysisState(BaseModel):
+    topic: str = ""
+    market_data: str = ""
+    tech_analysis: str = ""
+    competitor_intel: str = ""
+    final_report: str = ""
+
+class ParallelAnalysisFlow(Flow[AnalysisState]):
+    @start()
+    def start_method(self):
+        pass
+
+    @listen(start_method)
+    def gather_market_data(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(start_method)
+    def run_tech_analysis(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(start_method)
+    def gather_competitor_intel(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(and_(gather_market_data, run_tech_analysis, gather_competitor_intel))
+    def synthesize_report(self):
+        # Your agentic or deterministic code
+        pass
+
+flow = ParallelAnalysisFlow()
+flow.state.topic = "AI-powered developer tools"
+flow.kickoff()
+
+```
+
+여러 `@start()` 데코레이터는 병렬로 실행됩니다. `@listen` 데코레이터의 `and_()` 결합자는 `synthesize_report`가 *세 가지* 상위 메서드가 모두 완료된 뒤에만 실행되도록 보장합니다. *어떤* 상위 작업이든 끝나는 즉시 진행하고 싶다면 `or_()`도 사용할 수 있습니다.
+
+LangGraph에서는 병렬 분기, 동기화 노드, 신중한 상태 병합이 포함된 fan-out/fan-in 패턴을 만들어야 하며 — 모든 것을 에지로 명시적으로 연결해야 합니다.
+
+---
+
+## 프로덕션에서 CrewAI Flows를 쓰는 이유
+
+깔끔한 문법을 넘어, Flows는 여러 프로덕션 핵심 이점을 제공합니다:
+
+**내장 상태 지속성.** Flow 상태는 LanceDB에 의해 백업되므로 워크플로우가 크래시에서 살아남고, 재개될 수 있으며, 실행 간에 지식을 축적할 수 있습니다. LangGraph는 별도의 체크포인터를 구성해야 합니다.
+
+**타입 안전한 상태 관리.** Pydantic 모델은 즉시 검증, 직렬화, IDE 지원을 제공합니다. LangGraph의 `TypedDict` 상태는 런타임 검증을 하지 않습니다.
+
+**일급 에이전트 오케스트레이션.** Crews는 기본 프리미티브입니다. 역할, 목표, 배경, 도구를 가진 에이전트를 정의하고, Flow의 구조적 틀 안에서 자율적으로 협업하게 합니다. 다중 에이전트 조율을 다시 만들 필요가 없습니다.
+
+**더 단순한 정신적 모델.** 데코레이터는 의도를 선언합니다. `@start`는 "여기서 시작", `@listen(x)`는 "x 이후 실행", `@router(x)`는 "x 이후 어디로 갈지 결정"을 의미합니다. 코드는 자신이 설명하는 워크플로우처럼 읽힙니다.
+
+**CLI 통합.** `crewai run`으로 Flows를 실행합니다. 별도의 컴파일 단계나 그래프 직렬화가 없습니다. Flow는 Python 클래스이며, 그대로 실행됩니다.
+
+---
+
+## 마이그레이션 치트 시트
+
+LangGraph 코드베이스를 CrewAI Flows로 옮기고 싶다면, 다음의 실전 변환 가이드를 참고하세요:
+
+1. **상태를 매핑하세요.** `TypedDict`를 Pydantic `BaseModel`로 변환하고 모든 필드에 기본값을 추가하세요.
+2. **노드를 메서드로 변환하세요.** 각 `add_node` 함수는 `Flow` 서브클래스의 메서드가 됩니다. `state["field"]` 읽기는 `self.state.field`로 바꾸세요.
+3. **에지를 데코레이터로 교체하세요.** `add_edge(START, "first_node")`는 첫 메서드의 `@start()`가 됩니다. 순차적인 `add_edge("a", "b")`는 `b` 메서드의 `@listen(a)`가 됩니다.
+4. **조건부 에지는 `@router`로 교체하세요.** 라우팅 함수와 `add_conditional_edges()` 매핑은 하나의 `@router()` 메서드로 통합하고, 라우트 문자열을 반환하세요.
+5. **compile + invoke를 kickoff으로 교체하세요.** `graph.compile()`를 제거하고 `flow.kickoff()`를 호출하세요.
+6. **Crew가 들어갈 지점을 고려하세요.** 복잡한 다단계 에이전트 로직이 있는 노드는 Crew로 분리할 후보입니다. 이 부분에서 가장 큰 품질 향상을 체감할 수 있습니다.
+
+---
+
+## 시작하기
+
+CrewAI를 설치하고 새 Flow 프로젝트를 스캐폴딩하세요:
+
+```bash
+pip install crewai
+crewai create flow my_first_flow
+cd my_first_flow
+```
+
+이렇게 하면 바로 편집 가능한 Flow 클래스, 설정 파일, 그리고 `type = "flow"`가 이미 설정된 `pyproject.toml`이 포함된 프로젝트 구조가 생성됩니다. 다음으로 실행하세요:
+
+```bash
+crewai run
+```
+
+그 다음부터는 에이전트를 추가하고 리스너를 연결한 뒤, 배포하면 됩니다.
+
+---
+
+## 마무리
+
+LangGraph는 AI 워크플로우에 구조가 필요하다는 사실을 생태계에 일깨워 주었습니다. 중요한 교훈이었습니다. 하지만 CrewAI Flows는 그 교훈을 더 빠르게 쓰고, 더 쉽게 읽으며, 프로덕션에서 더 강력한 형태로 제공합니다 — 특히 워크플로우에 여러 에이전트의 협업이 포함될 때 그렇습니다.
+
+단일 에이전트 체인을 넘는 무엇인가를 만들고 있다면, Flows를 진지하게 검토해 보세요. 데코레이터 기반 모델, Crews의 네이티브 통합, 내장 상태 관리를 통해 배관 작업에 쓰는 시간을 줄이고, 중요한 문제에 더 많은 시간을 쓸 수 있습니다.
+
+`crewai create flow`로 시작하세요. 후회하지 않을 겁니다.
--- a/docs/pt-BR/changelog.mdx
+++ b/docs/pt-BR/changelog.mdx
@@ -4,6 +4,71 @@ description: "Atualizações de produto, melhorias e correções do CrewAI"
 icon: "clock"
 mode: "wide"
 ---
+<Update label="11 mar 2026">
+  ## v1.10.2a1
+
+  [Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.10.2a1)
+
+  ## O que mudou
+
+  ### Recursos
+  - Adicionar suporte para busca de ferramentas, salvamento de tokens e injeção dinâmica de ferramentas apropriadas durante a execução para Anthropics.
+  - Introduzir mais ferramentas de Busca Brave.
+  - Criar ação para lançamentos noturnos.
+
+  ### Correções de Bugs
+  - Corrigir LockException durante a execução concorrente de múltiplos processos.
+  - Resolver problemas com a agrupação de resultados de ferramentas paralelas em uma única mensagem de usuário.
+  - Abordar resoluções de ferramentas MCP e eliminar todas as conexões mutáveis compartilhadas.
+  - Atualizar o manuseio de parâmetros LLM na função human_feedback.
+  - Adicionar métodos de lista/dicionário ausentes a LockedListProxy e LockedDictProxy.
+  - Propagar o contexto de contextvars para as threads de chamada de ferramentas paralelas.
+  - Atualizar a dependência gitpython para >=3.1.41 para resolver a vulnerabilidade de travessia de diretórios CVE.
+
+  ### Refatoração
+  - Refatorar classes de memória para serem serializáveis.
+
+  ### Documentação
+  - Atualizar o changelog e a versão para v1.10.1.
+
+  ## Contribuidores
+
+  @akaKuruma, @github-actions[bot], @giulio-leone, @greysonlalonde, @joaomdmoura, @jonathansampson, @lorenzejay, @lucasgomide, @mattatcha
+
+</Update>
+
+<Update label="04 mar 2026">
+  ## v1.10.1
+
+  [Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.10.1)
+
+  ## O que mudou
+
+  ### Recursos
+  - Atualizar Gemini GenAI
+
+  ### Correções de Bugs
+  - Ajustar o valor do listener do executor para evitar recursão
+  - Agrupar partes da resposta da função paralela em um único objeto Content no Gemini
+  - Exibir a saída de pensamento dos modelos de pensamento no Gemini
+  - Carregar ferramentas MCP e da plataforma quando as ferramentas do agente forem None
+  - Suportar ambientes Jupyter com loops de eventos em A2A
+  - Usar ID anônimo para rastreamentos efêmeros
+  - Passar condicionalmente o cabeçalho plus
+  - Ignorar o registro do manipulador de sinal em threads não principais para telemetria
+  - Injetar erros de ferramentas como observações e resolver colisões de nomes
+  - Atualizar pypdf de 4.x para 6.7.4 para resolver alertas do Dependabot
+  - Resolver alertas de segurança críticos e altos do Dependabot
+
+  ### Documentação
+  - Sincronizar a documentação da ferramenta Composio entre locais
+
+  ## Contribuidores
+
+  @giulio-leone, @greysonlalonde, @haxzie, @joaomdmoura, @lorenzejay, @mattatcha, @mplachta, @nicoferdi96
+
+</Update>
+
 <Update label="27 fev 2026">
  ## v1.10.1a1

--- a/docs/pt-BR/guides/migration/migrating-from-langgraph.mdx
+++ b/docs/pt-BR/guides/migration/migrating-from-langgraph.mdx
@@ -0,0 +1,518 @@
+---
+title: "Migrando do LangGraph para o CrewAI: um guia prático para engenheiros"
+description: Se você já construiu com LangGraph, saiba como portar rapidamente seus projetos para o CrewAI
+icon: switch
+mode: "wide"
+---
+
+Você construiu agentes com LangGraph. Já lutou com o `StateGraph`, ligou arestas condicionais e depurou dicionários de estado às 2 da manhã. Funciona — mas, em algum momento, você começou a se perguntar se existe um caminho melhor para produção.
+
+Existe. **CrewAI Flows** entrega o mesmo poder — orquestração orientada a eventos, roteamento condicional, estado compartilhado — com muito menos boilerplate e um modelo mental que se alinha a como você realmente pensa sobre fluxos de trabalho de IA em múltiplas etapas.
+
+Este artigo apresenta os conceitos principais lado a lado, mostra comparações reais de código e demonstra por que o CrewAI Flows é o framework que você vai querer usar a seguir.
+
+---
+
+## A Mudança de Modelo Mental
+
+LangGraph pede que você pense em **grafos**: nós, arestas e dicionários de estado. Todo workflow é um grafo direcionado em que você conecta explicitamente as transições entre as etapas de computação. É poderoso, mas a abstração traz overhead — especialmente quando o seu fluxo é fundamentalmente sequencial com alguns pontos de decisão.
+
+CrewAI Flows pede que você pense em **eventos**: métodos que iniciam, métodos que escutam resultados e métodos que roteiam a execução. A topologia do workflow emerge de anotações com decorators, em vez de construção explícita do grafo. Isso não é apenas açúcar sintático — muda como você projeta, lê e mantém seus pipelines.
+
+Veja o mapeamento principal:
+
+| Conceito no LangGraph | Equivalente no CrewAI Flows |
+| --- | --- |
+| `StateGraph` class | `Flow` class |
+| `add_node()` | Methods decorated with `@start`, `@listen` |
+| `add_edge()` / `add_conditional_edges()` | `@listen()` / `@router()` decorators |
+| `TypedDict` state | Pydantic `BaseModel` state |
+| `START` / `END` constants | `@start()` decorator / natural method return |
+| `graph.compile()` | `flow.kickoff()` |
+| Checkpointer / persistence | Built-in memory (LanceDB-backed) |
+
+Vamos ver como isso fica na prática.
+
+---
+
+## Demo 1: Um Pipeline Sequencial Simples
+
+Imagine que você está construindo um pipeline que recebe um tema, pesquisa, escreve um resumo e formata a saída. Veja como cada framework lida com isso.
+
+### Abordagem com LangGraph
+
+```python
+from typing import TypedDict
+from langgraph.graph import StateGraph, START, END
+
+class ResearchState(TypedDict):
+    topic: str
+    raw_research: str
+    summary: str
+    formatted_output: str
+
+def research_topic(state: ResearchState) -> dict:
+    # Call an LLM or search API
+    result = llm.invoke(f"Research the topic: {state['topic']}")
+    return {"raw_research": result}
+
+def write_summary(state: ResearchState) -> dict:
+    result = llm.invoke(
+        f"Summarize this research:\n{state['raw_research']}"
+    )
+    return {"summary": result}
+
+def format_output(state: ResearchState) -> dict:
+    result = llm.invoke(
+        f"Format this summary as a polished article section:\n{state['summary']}"
+    )
+    return {"formatted_output": result}
+
+# Build the graph
+graph = StateGraph(ResearchState)
+graph.add_node("research", research_topic)
+graph.add_node("summarize", write_summary)
+graph.add_node("format", format_output)
+
+graph.add_edge(START, "research")
+graph.add_edge("research", "summarize")
+graph.add_edge("summarize", "format")
+graph.add_edge("format", END)
+
+# Compile and run
+app = graph.compile()
+result = app.invoke({"topic": "quantum computing advances in 2026"})
+print(result["formatted_output"])
+```
+
+Você define funções, registra-as como nós e conecta manualmente cada transição. Para uma sequência simples como essa, há muita cerimônia.
+
+### Abordagem com CrewAI Flows
+
+```python
+from crewai import LLM, Agent, Crew, Process, Task
+from crewai.flow.flow import Flow, listen, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class ResearchState(BaseModel):
+    topic: str = ""
+    raw_research: str = ""
+    summary: str = ""
+    formatted_output: str = ""
+
+class ResearchFlow(Flow[ResearchState]):
+    @start()
+    def research_topic(self):
+        # Option 1: Direct LLM call
+        result = llm.call(f"Research the topic: {self.state.topic}")
+        self.state.raw_research = result
+        return result
+
+    @listen(research_topic)
+    def write_summary(self, research_output):
+        # Option 2: A single agent
+        summarizer = Agent(
+            role="Research Summarizer",
+            goal="Produce concise, accurate summaries of research content",
+            backstory="You are an expert at distilling complex research into clear, "
+            "digestible summaries.",
+            llm=llm,
+            verbose=True,
+        )
+        result = summarizer.kickoff(
+            f"Summarize this research:\n{self.state.raw_research}"
+        )
+        self.state.summary = str(result)
+        return self.state.summary
+
+    @listen(write_summary)
+    def format_output(self, summary_output):
+        # Option 3: a complete crew (with one or more agents)
+        formatter = Agent(
+            role="Content Formatter",
+            goal="Transform research summaries into polished, publication-ready article sections",
+            backstory="You are a skilled editor with expertise in structuring and "
+            "presenting technical content for a general audience.",
+            llm=llm,
+            verbose=True,
+        )
+        format_task = Task(
+            description=f"Format this summary as a polished article section:\n{self.state.summary}",
+            expected_output="A well-structured, polished article section ready for publication.",
+            agent=formatter,
+        )
+        crew = Crew(
+            agents=[formatter],
+            tasks=[format_task],
+            process=Process.sequential,
+            verbose=True,
+        )
+        result = crew.kickoff()
+        self.state.formatted_output = str(result)
+        return self.state.formatted_output
+
+# Run the flow
+flow = ResearchFlow()
+flow.state.topic = "quantum computing advances in 2026"
+result = flow.kickoff()
+print(flow.state.formatted_output)
+
+```
+
+Repare a diferença: nada de construção de grafo, de ligação de arestas, nem de etapa de compilação. A ordem de execução é declarada exatamente onde a lógica vive. `@start()` marca o ponto de entrada, e `@listen(method_name)` encadeia as etapas. O estado é um modelo Pydantic de verdade, com segurança de tipos, validação e auto-complete na IDE.
+
+---
+
+## Demo 2: Roteamento Condicional
+
+Aqui é que fica interessante. Digamos que você está construindo um pipeline de conteúdo que roteia para diferentes caminhos de processamento com base no tipo de conteúdo detectado.
+
+### Abordagem com LangGraph
+
+```python
+from typing import TypedDict, Literal
+from langgraph.graph import StateGraph, START, END
+
+class ContentState(TypedDict):
+    input_text: str
+    content_type: str
+    result: str
+
+def classify_content(state: ContentState) -> dict:
+    content_type = llm.invoke(
+        f"Classify this content as 'technical', 'creative', or 'business':\n{state['input_text']}"
+    )
+    return {"content_type": content_type.strip().lower()}
+
+def process_technical(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as technical doc:\n{state['input_text']}")
+    return {"result": result}
+
+def process_creative(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as creative writing:\n{state['input_text']}")
+    return {"result": result}
+
+def process_business(state: ContentState) -> dict:
+    result = llm.invoke(f"Process as business content:\n{state['input_text']}")
+    return {"result": result}
+
+# Routing function
+def route_content(state: ContentState) -> Literal["technical", "creative", "business"]:
+    return state["content_type"]
+
+# Build the graph
+graph = StateGraph(ContentState)
+graph.add_node("classify", classify_content)
+graph.add_node("technical", process_technical)
+graph.add_node("creative", process_creative)
+graph.add_node("business", process_business)
+
+graph.add_edge(START, "classify")
+graph.add_conditional_edges(
+    "classify",
+    route_content,
+    {
+        "technical": "technical",
+        "creative": "creative",
+        "business": "business",
+    }
+)
+graph.add_edge("technical", END)
+graph.add_edge("creative", END)
+graph.add_edge("business", END)
+
+app = graph.compile()
+result = app.invoke({"input_text": "Explain how TCP handshakes work"})
+```
+
+Você precisa de uma função de roteamento separada, de um mapeamento explícito de arestas condicionais e de arestas de término para cada ramificação. A lógica de roteamento fica desacoplada do nó que produz a decisão.
+
+### Abordagem com CrewAI Flows
+
+```python
+from crewai import LLM, Agent
+from crewai.flow.flow import Flow, listen, router, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class ContentState(BaseModel):
+    input_text: str = ""
+    content_type: str = ""
+    result: str = ""
+
+class ContentFlow(Flow[ContentState]):
+    @start()
+    def classify_content(self):
+        self.state.content_type = (
+            llm.call(
+                f"Classify this content as 'technical', 'creative', or 'business':\n"
+                f"{self.state.input_text}"
+            )
+            .strip()
+            .lower()
+        )
+        return self.state.content_type
+
+    @router(classify_content)
+    def route_content(self, classification):
+        if classification == "technical":
+            return "process_technical"
+        elif classification == "creative":
+            return "process_creative"
+        else:
+            return "process_business"
+
+    @listen("process_technical")
+    def handle_technical(self):
+        agent = Agent(
+            role="Technical Writer",
+            goal="Produce clear, accurate technical documentation",
+            backstory="You are an expert technical writer who specializes in "
+            "explaining complex technical concepts precisely.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as technical doc:\n{self.state.input_text}")
+        )
+
+    @listen("process_creative")
+    def handle_creative(self):
+        agent = Agent(
+            role="Creative Writer",
+            goal="Craft engaging and imaginative creative content",
+            backstory="You are a talented creative writer with a flair for "
+            "compelling storytelling and vivid expression.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as creative writing:\n{self.state.input_text}")
+        )
+
+    @listen("process_business")
+    def handle_business(self):
+        agent = Agent(
+            role="Business Writer",
+            goal="Produce professional, results-oriented business content",
+            backstory="You are an experienced business writer who communicates "
+            "strategy and value clearly to professional audiences.",
+            llm=llm,
+            verbose=True,
+        )
+        self.state.result = str(
+            agent.kickoff(f"Process as business content:\n{self.state.input_text}")
+        )
+
+flow = ContentFlow()
+flow.state.input_text = "Explain how TCP handshakes work"
+flow.kickoff()
+print(flow.state.result)
+
+```
+
+O decorator `@router()` transforma um método em um ponto de decisão. Ele retorna uma string que corresponde a um listener — sem dicionários de mapeamento, sem funções de roteamento separadas. A lógica de ramificação parece um `if` em Python porque *é* um.
+
+---
+
+## Demo 3: Integrando Crews de Agentes de IA em Flows
+
+É aqui que o verdadeiro poder do CrewAI aparece. Flows não servem apenas para encadear chamadas de LLM — elas orquestram **Crews** completas de agentes autônomos. Isso é algo para o qual o LangGraph simplesmente não tem um equivalente nativo.
+
+```python
+from crewai import Agent, Task, Crew
+from crewai.flow.flow import Flow, listen, start
+from pydantic import BaseModel
+
+class ArticleState(BaseModel):
+    topic: str = ""
+    research: str = ""
+    draft: str = ""
+    final_article: str = ""
+
+class ArticleFlow(Flow[ArticleState]):
+
+    @start()
+    def run_research_crew(self):
+        """A full Crew of agents handles research."""
+        researcher = Agent(
+            role="Senior Research Analyst",
+            goal=f"Produce comprehensive research on: {self.state.topic}",
+            backstory="You're a veteran analyst known for thorough, "
+                       "well-sourced research reports.",
+            llm="gpt-4o"
+        )
+
+        research_task = Task(
+            description=f"Research '{self.state.topic}' thoroughly. "
+                        "Cover key trends, data points, and expert opinions.",
+            expected_output="A detailed research brief with sources.",
+            agent=researcher
+        )
+
+        crew = Crew(agents=[researcher], tasks=[research_task])
+        result = crew.kickoff()
+        self.state.research = result.raw
+        return result.raw
+
+    @listen(run_research_crew)
+    def run_writing_crew(self, research_output):
+        """A different Crew handles writing."""
+        writer = Agent(
+            role="Technical Writer",
+            goal="Write a compelling article based on provided research.",
+            backstory="You turn complex research into engaging, clear prose.",
+            llm="gpt-4o"
+        )
+
+        editor = Agent(
+            role="Senior Editor",
+            goal="Review and polish articles for publication quality.",
+            backstory="20 years of editorial experience at top tech publications.",
+            llm="gpt-4o"
+        )
+
+        write_task = Task(
+            description=f"Write an article based on this research:\n{self.state.research}",
+            expected_output="A well-structured draft article.",
+            agent=writer
+        )
+
+        edit_task = Task(
+            description="Review, fact-check, and polish the draft article.",
+            expected_output="A publication-ready article.",
+            agent=editor
+        )
+
+        crew = Crew(agents=[writer, editor], tasks=[write_task, edit_task])
+        result = crew.kickoff()
+        self.state.final_article = result.raw
+        return result.raw
+
+# Run the full pipeline
+flow = ArticleFlow()
+flow.state.topic = "The Future of Edge AI"
+flow.kickoff()
+print(flow.state.final_article)
+```
+
+Este é o insight-chave: **Flows fornecem a camada de orquestração, e Crews fornecem a camada de inteligência.** Cada etapa em um Flow pode subir uma equipe completa de agentes colaborativos, cada um com seus próprios papéis, objetivos e ferramentas. Você obtém fluxo de controle estruturado e previsível *e* colaboração autônoma de agentes — o melhor dos dois mundos.
+
+No LangGraph, alcançar algo similar significa implementar manualmente protocolos de comunicação entre agentes, loops de chamada de ferramentas e lógica de delegação dentro das funções dos nós. É possível, mas é encanamento que você constrói do zero todas as vezes.
+
+---
+
+## Demo 4: Execução Paralela e Sincronização
+
+Pipelines do mundo real frequentemente precisam dividir o trabalho e juntar os resultados. O CrewAI Flows lida com isso de forma elegante com os operadores `and_` e `or_`.
+
+```python
+from crewai import LLM
+from crewai.flow.flow import Flow, and_, listen, start
+from pydantic import BaseModel
+
+llm = LLM(model="openai/gpt-5.2")
+
+class AnalysisState(BaseModel):
+    topic: str = ""
+    market_data: str = ""
+    tech_analysis: str = ""
+    competitor_intel: str = ""
+    final_report: str = ""
+
+class ParallelAnalysisFlow(Flow[AnalysisState]):
+    @start()
+    def start_method(self):
+        pass
+
+    @listen(start_method)
+    def gather_market_data(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(start_method)
+    def run_tech_analysis(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(start_method)
+    def gather_competitor_intel(self):
+        # Your agentic or deterministic code
+        pass
+
+    @listen(and_(gather_market_data, run_tech_analysis, gather_competitor_intel))
+    def synthesize_report(self):
+        # Your agentic or deterministic code
+        pass
+
+flow = ParallelAnalysisFlow()
+flow.state.topic = "AI-powered developer tools"
+flow.kickoff()
+
+```
+
+Vários decorators `@start()` disparam em paralelo. O combinador `and_()` no decorator `@listen` garante que `synthesize_report` só execute depois que *todos os três* métodos upstream forem concluídos. Também existe `or_()` para quando você quer prosseguir assim que *qualquer* tarefa upstream terminar.
+
+No LangGraph, você precisaria construir um padrão fan-out/fan-in com ramificações paralelas, um nó de sincronização e uma mesclagem de estado cuidadosa — tudo conectado explicitamente por arestas.
+
+---
+
+## Por que CrewAI Flows em Produção
+
+Além de uma sintaxe mais limpa, Flows entrega várias vantagens críticas para produção:
+
+**Persistência de estado integrada.** O estado do Flow é respaldado pelo LanceDB, o que significa que seus workflows podem sobreviver a falhas, ser retomados e acumular conhecimento entre execuções. No LangGraph, você precisa configurar um checkpointer separado.
+
+**Gerenciamento de estado com segurança de tipos.** Modelos Pydantic oferecem validação, serialização e suporte de IDE prontos para uso. Estados `TypedDict` do LangGraph não validam em runtime.
+
+**Orquestração de agentes de primeira classe.** Crews são um primitivo nativo. Você define agentes com papéis, objetivos, histórias e ferramentas — e eles colaboram de forma autônoma dentro do envelope estruturado de um Flow. Não é preciso reinventar a coordenação multiagente.
+
+**Modelo mental mais simples.** Decorators declaram intenção. `@start` significa "comece aqui". `@listen(x)` significa "execute depois de x". `@router(x)` significa "decida para onde ir depois de x". O código lê como o workflow que ele descreve.
+
+**Integração com CLI.** Execute flows com `crewai run`. Sem etapa de compilação separada, sem serialização de grafo. Seu Flow é uma classe Python, e ele roda como tal.
+
+---
+
+## Cheat Sheet de Migração
+
+Se você está com uma base de código LangGraph e quer migrar para o CrewAI Flows, aqui vai um guia prático de conversão:
+
+1. **Mapeie seu estado.** Converta seu `TypedDict` para um `BaseModel` do Pydantic. Adicione valores padrão para todos os campos.
+2. **Converta nós em métodos.** Cada função de `add_node` vira um método na sua subclasse de `Flow`. Substitua leituras `state["field"]` por `self.state.field`.
+3. **Substitua arestas por decorators.** `add_edge(START, "first_node")` vira `@start()` no primeiro método. A sequência `add_edge("a", "b")` vira `@listen(a)` no método `b`.
+4. **Substitua arestas condicionais por `@router`.** A função de roteamento e o mapeamento do `add_conditional_edges()` viram um único método `@router()` que retorna a string de rota.
+5. **Troque compile + invoke por kickoff.** Remova `graph.compile()`. Chame `flow.kickoff()`.
+6. **Considere onde as Crews se encaixam.** Qualquer nó com lógica complexa de agentes em múltiplas etapas é um candidato a extração para uma Crew. É aqui que você verá a maior melhoria de qualidade.
+
+---
+
+## Primeiros Passos
+
+Instale o CrewAI e crie o scaffold de um novo projeto Flow:
+
+```bash
+pip install crewai
+crewai create flow my_first_flow
+cd my_first_flow
+```
+
+Isso gera uma estrutura de projeto com uma classe Flow pronta para edição, arquivos de configuração e um `pyproject.toml` com `type = "flow"` já definido. Execute com:
+
+```bash
+crewai run
+```
+
+A partir daí, adicione seus agentes, conecte seus listeners e publique.
+
+---
+
+## Considerações Finais
+
+O LangGraph ensinou ao ecossistema que workflows de IA precisam de estrutura. Essa foi uma lição importante. Mas o CrewAI Flows pega essa lição e a entrega de um jeito mais rápido de escrever, mais fácil de ler e mais poderoso em produção — especialmente quando seus workflows envolvem múltiplos agentes colaborando.
+
+Se você está construindo algo além de uma cadeia de agente único, dê uma olhada séria no Flows. O modelo baseado em decorators, a integração nativa com Crews e o gerenciamento de estado embutido significam menos tempo com encanamento e mais tempo nos problemas que importam.
+
+Comece com `crewai create flow`. Você não vai olhar para trás.
--- a/lib/crewai-files/pyproject.toml
+++ b/lib/crewai-files/pyproject.toml
@@ -9,7 +9,7 @@ authors = [
 requires-python = ">=3.10, <3.14"
 dependencies = [
    "Pillow~=12.1.1",
-    "pypdf~=6.7.4",
+    "pypdf~=6.7.5",
    "python-magic>=0.4.27",
    "aiocache~=0.12.3",
    "aiofiles~=24.1.0",
--- a/lib/crewai-files/src/crewai_files/init.py
+++ b/lib/crewai-files/src/crewai_files/init.py
@@ -152,4 +152,4 @@ __all__ = [
    "wrap_file_source",
 ]

-__version__ = "1.10.1a1"
+__version__ = "1.10.2a1"
--- a/lib/crewai-tools/pyproject.toml
+++ b/lib/crewai-tools/pyproject.toml
@@ -11,7 +11,7 @@ dependencies = [
    "pytube~=15.0.0",
    "requests~=2.32.5",
    "docker~=7.1.0",
-    "crewai==1.10.1a1",
+    "crewai==1.10.2a1",
    "tiktoken~=0.8.0",
    "beautifulsoup4~=4.13.4",
    "python-docx~=1.2.0",
@@ -108,7 +108,7 @@ stagehand = [
    "stagehand>=0.4.1",
 ]
 github = [
-    "gitpython==3.1.38",
+    "gitpython>=3.1.41,<4",
    "PyGithub==1.59.1",
 ]
 rag = [
--- a/lib/crewai-tools/src/crewai_tools/init.py
+++ b/lib/crewai-tools/src/crewai_tools/init.py
@@ -10,7 +10,18 @@ from crewai_tools.aws.s3.writer_tool import S3WriterTool
 from crewai_tools.tools.ai_mind_tool.ai_mind_tool import AIMindTool
 from crewai_tools.tools.apify_actors_tool.apify_actors_tool import ApifyActorsTool
 from crewai_tools.tools.arxiv_paper_tool.arxiv_paper_tool import ArxivPaperTool
+from crewai_tools.tools.brave_search_tool.brave_image_tool import BraveImageSearchTool
+from crewai_tools.tools.brave_search_tool.brave_llm_context_tool import (
+    BraveLLMContextTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_local_pois_tool import (
+    BraveLocalPOIsDescriptionTool,
+    BraveLocalPOIsTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_news_tool import BraveNewsSearchTool
 from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
+from crewai_tools.tools.brave_search_tool.brave_video_tool import BraveVideoSearchTool
+from crewai_tools.tools.brave_search_tool.brave_web_tool import BraveWebSearchTool
 from crewai_tools.tools.brightdata_tool.brightdata_dataset import (
    BrightDataDatasetTool,
 )
@@ -200,7 +211,14 @@ __all__ = [
    "ArxivPaperTool",
    "BedrockInvokeAgentTool",
    "BedrockKBRetrieverTool",
+    "BraveImageSearchTool",
+    "BraveLLMContextTool",
+    "BraveLocalPOIsDescriptionTool",
+    "BraveLocalPOIsTool",
+    "BraveNewsSearchTool",
    "BraveSearchTool",
+    "BraveVideoSearchTool",
+    "BraveWebSearchTool",
    "BrightDataDatasetTool",
    "BrightDataSearchTool",
    "BrightDataWebUnlockerTool",
@@ -291,4 +309,4 @@ __all__ = [
    "ZapierActionTools",
 ]

-__version__ = "1.10.1a1"
+__version__ = "1.10.2a1"
--- a/lib/crewai-tools/src/crewai_tools/tools/init.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/init.py
@@ -1,7 +1,18 @@
 from crewai_tools.tools.ai_mind_tool.ai_mind_tool import AIMindTool
 from crewai_tools.tools.apify_actors_tool.apify_actors_tool import ApifyActorsTool
 from crewai_tools.tools.arxiv_paper_tool.arxiv_paper_tool import ArxivPaperTool
+from crewai_tools.tools.brave_search_tool.brave_image_tool import BraveImageSearchTool
+from crewai_tools.tools.brave_search_tool.brave_llm_context_tool import (
+    BraveLLMContextTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_local_pois_tool import (
+    BraveLocalPOIsDescriptionTool,
+    BraveLocalPOIsTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_news_tool import BraveNewsSearchTool
 from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
+from crewai_tools.tools.brave_search_tool.brave_video_tool import BraveVideoSearchTool
+from crewai_tools.tools.brave_search_tool.brave_web_tool import BraveWebSearchTool
 from crewai_tools.tools.brightdata_tool import (
    BrightDataDatasetTool,
    BrightDataSearchTool,
@@ -185,7 +196,14 @@ __all__ = [
    "AIMindTool",
    "ApifyActorsTool",
    "ArxivPaperTool",
+    "BraveImageSearchTool",
+    "BraveLLMContextTool",
+    "BraveLocalPOIsDescriptionTool",
+    "BraveLocalPOIsTool",
+    "BraveNewsSearchTool",
    "BraveSearchTool",
+    "BraveVideoSearchTool",
+    "BraveWebSearchTool",
    "BrightDataDatasetTool",
    "BrightDataSearchTool",
    "BrightDataWebUnlockerTool",
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/base.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/base.py
@@ -0,0 +1,322 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from datetime import datetime
+import json
+import logging
+import os
+import threading
+import time
+from typing import Any, ClassVar
+
+from crewai.tools import BaseTool, EnvVar
+from pydantic import BaseModel, Field
+import requests
+
+
+logger = logging.getLogger(__name__)
+
+# Brave API error codes that indicate non-retryable quota/usage exhaustion.
+_QUOTA_CODES = frozenset({"QUOTA_LIMITED", "USAGE_LIMIT_EXCEEDED"})
+
+
+def _save_results_to_file(content: str) -> None:
+    """Saves the search results to a file."""
+    filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
+    with open(filename, "w") as file:
+        file.write(content)
+
+
+def _parse_error_body(resp: requests.Response) -> dict[str, Any] | None:
+    """Extract the structured "error" object from a Brave API error response."""
+    try:
+        body = resp.json()
+        error = body.get("error")
+        return error if isinstance(error, dict) else None
+    except (ValueError, KeyError):
+        return None
+
+
+def _raise_for_error(resp: requests.Response) -> None:
+    """Brave Search API error responses contain helpful JSON payloads"""
+    status = resp.status_code
+    try:
+        body = json.dumps(resp.json())
+    except (ValueError, KeyError):
+        body = resp.text[:500]
+
+    raise RuntimeError(f"Brave Search API error (HTTP {status}): {body}")
+
+
+def _is_retryable(resp: requests.Response) -> bool:
+    """Return True for transient failures that are worth retrying.
+
+    * 429 + RATE_LIMITED — the per-second sliding window is full.
+    * 5xx — transient server-side errors.
+
+    Quota exhaustion (QUOTA_LIMITED, USAGE_LIMIT_EXCEEDED) is
+    explicitly excluded: retrying will never succeed until the billing
+    period resets.
+    """
+    if resp.status_code == 429:
+        error = _parse_error_body(resp) or {}
+        return error.get("code") not in _QUOTA_CODES
+    return 500 <= resp.status_code < 600
+
+
+def _retry_delay(resp: requests.Response, attempt: int) -> float:
+    """Compute wait time before the next retry attempt.
+
+    Prefers the server-supplied Retry-After header when available;
+    falls back to exponential backoff (1s, 2s, 4s, ...).
+    """
+    retry_after = resp.headers.get("Retry-After")
+    if retry_after is not None:
+        try:
+            return max(0.0, float(retry_after))
+        except (ValueError, TypeError):
+            pass
+    return float(2**attempt)
+
+
+class BraveSearchToolBase(BaseTool, ABC):
+    """
+    Base class for Brave Search API interactions.
+
+    Individual tool subclasses must provide the following:
+      - search_url
+      - header_schema (pydantic model)
+      - args_schema (pydantic model)
+      - _refine_payload() -> dict[str, Any]
+    """
+
+    search_url: str
+    raw: bool = False
+    args_schema: type[BaseModel]
+    header_schema: type[BaseModel]
+
+    # Tool options (legacy parameters)
+    country: str | None = None
+    save_file: bool = False
+    n_results: int = 10
+
+    env_vars: list[EnvVar] = Field(
+        default_factory=lambda: [
+            EnvVar(
+                name="BRAVE_API_KEY",
+                description="API key for Brave Search",
+                required=True,
+            ),
+        ]
+    )
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        headers: dict[str, Any] | None = None,
+        requests_per_second: float = 1.0,
+        save_file: bool = False,
+        raw: bool = False,
+        timeout: int = 30,
+        **kwargs: Any,
+    ):
+        super().__init__(**kwargs)
+
+        self._api_key = api_key or os.environ.get("BRAVE_API_KEY")
+        if not self._api_key:
+            raise ValueError("BRAVE_API_KEY environment variable is required")
+
+        self.raw = bool(raw)
+        self._timeout = int(timeout)
+        self.save_file = bool(save_file)
+        self._requests_per_second = float(requests_per_second)
+        self._headers = self._build_and_validate_headers(headers or {})
+        # Per-instance rate limiting: each instance has its own clock and lock.
+        # Total process rate is the sum of limits of instances you create.
+        self._last_request_time: float = 0
+        self._rate_limit_lock = threading.Lock()
+
+    @property
+    def api_key(self) -> str:
+        return self._api_key
+
+    @property
+    def headers(self) -> dict[str, Any]:
+        return self._headers
+
+    def set_headers(self, headers: dict[str, Any]) -> BraveSearchToolBase:
+        merged = {**self._headers, **{k.lower(): v for k, v in headers.items()}}
+        self._headers = self._build_and_validate_headers(merged)
+        return self
+
+    def _build_and_validate_headers(self, headers: dict[str, Any]) -> dict[str, Any]:
+        normalized = {k.lower(): v for k, v in headers.items()}
+        normalized.setdefault("x-subscription-token", self._api_key)
+        normalized.setdefault("accept", "application/json")
+
+        try:
+            self.header_schema(**normalized)
+        except Exception as e:
+            raise ValueError(f"Invalid headers: {e}") from e
+
+        return normalized
+
+    def _rate_limit(self) -> None:
+        """Enforce minimum interval between requests for this instance. Thread-safe."""
+        if self._requests_per_second <= 0:
+            return
+
+        min_interval = 1.0 / self._requests_per_second
+        with self._rate_limit_lock:
+            now = time.time()
+            next_allowed = self._last_request_time + min_interval
+            if now < next_allowed:
+                time.sleep(next_allowed - now)
+                now = time.time()
+            self._last_request_time = now
+
+    def _make_request(
+        self, params: dict[str, Any], *, _max_retries: int = 3
+    ) -> dict[str, Any]:
+        """Execute an HTTP GET against the Brave Search API with retry logic."""
+        last_resp: requests.Response | None = None
+
+        # Retry the request up to _max_retries times
+        for attempt in range(_max_retries):
+            self._rate_limit()
+
+            # Make the request
+            try:
+                resp = requests.get(
+                    self.search_url,
+                    headers=self._headers,
+                    params=params,
+                    timeout=self._timeout,
+                )
+            except requests.ConnectionError as exc:
+                raise RuntimeError(
+                    f"Brave Search API connection failed: {exc}"
+                ) from exc
+            except requests.Timeout as exc:
+                raise RuntimeError(
+                    f"Brave Search API request timed out after {self._timeout}s: {exc}"
+                ) from exc
+
+            # Log the rate limit headers and request details
+            logger.debug(
+                "Brave Search API request: %s %s -> %d",
+                "GET",
+                resp.url,
+                resp.status_code,
+            )
+
+            # Response was OK, return the JSON body
+            if resp.ok:
+                try:
+                    return resp.json()
+                except ValueError as exc:
+                    raise RuntimeError(
+                        f"Brave Search API returned invalid JSON (HTTP {resp.status_code}): {exc}"
+                    ) from exc
+
+            # Response was not OK, but is retryable
+            # (e.g., 429 Too Many Requests, 500 Internal Server Error)
+            if _is_retryable(resp) and attempt < _max_retries - 1:
+                delay = _retry_delay(resp, attempt)
+                logger.warning(
+                    "Brave Search API returned %d. Retrying in %.1fs (attempt %d/%d)",
+                    resp.status_code,
+                    delay,
+                    attempt + 1,
+                    _max_retries,
+                )
+                time.sleep(delay)
+                last_resp = resp
+                continue
+
+            # Response was not OK, nor was it retryable
+            # (e.g., 422 Unprocessable Entity, 400 Bad Request (OPTION_NOT_IN_PLAN))
+            _raise_for_error(resp)
+
+        # All retries exhausted
+        _raise_for_error(last_resp or resp)  # type: ignore[possibly-undefined]
+        return {}  # unreachable (here to satisfy the type checker and linter)
+
+    def _run(self, q: str | None = None, **params: Any) -> Any:
+        # Allow positional usage: tool.run("latest Brave browser features")
+        if q is not None:
+            params["q"] = q
+
+        params = self._common_payload_refinement(params)
+
+        # Validate only schema fields
+        schema_keys = self.args_schema.model_fields
+        payload_in = {k: v for k, v in params.items() if k in schema_keys}
+
+        try:
+            validated = self.args_schema(**payload_in)
+        except Exception as e:
+            raise ValueError(f"Invalid parameters: {e}") from e
+
+        # The subclass may have additional refinements to apply to the payload, such as goggles or other parameters
+        payload = self._refine_request_payload(validated.model_dump(exclude_none=True))
+        response = self._make_request(payload)
+
+        if not self.raw:
+            response = self._refine_response(response)
+
+        if self.save_file:
+            _save_results_to_file(json.dumps(response, indent=2))
+
+        return response
+
+    @abstractmethod
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        """Subclass must implement: transform validated params dict into API request params."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def _refine_response(self, response: dict[str, Any]) -> Any:
+        """Subclass must implement: transform response dict into a more useful format."""
+        raise NotImplementedError
+
+    _EMPTY_VALUES: ClassVar[tuple[None, str, str, list[Any]]] = (None, "", "null", [])
+
+    def _common_payload_refinement(self, params: dict[str, Any]) -> dict[str, Any]:
+        """Common payload refinement for all tools."""
+        # crewAI's schema pipeline (ensure_all_properties_required in
+        # pydantic_schema_utils.py) marks every property as required so
+        # that OpenAI strict-mode structured outputs work correctly.
+        # The side-effect is that the LLM fills in *every* parameter —
+        # even truly optional ones — using placeholder values such as
+        # None, "", "null", or [].  Only optional fields are affected,
+        # so we limit the check to those.
+        fields = self.args_schema.model_fields
+        params = {
+            k: v
+            for k, v in params.items()
+            # Permit custom and required fields, and fields with non-empty values
+            if k not in fields or fields[k].is_required() or v not in self._EMPTY_VALUES
+        }
+
+        # Make sure params has "q" for query instead of "query" or "search_query"
+        query = params.get("query") or params.get("search_query")
+        if query is not None and "q" not in params:
+            params["q"] = query
+        params.pop("query", None)
+        params.pop("search_query", None)
+
+        # If "count" was not explicitly provided, use n_results
+        # (only when the schema actually supports a "count" field)
+        if "count" in self.args_schema.model_fields:
+            if "count" not in params and self.n_results is not None:
+                params["count"] = self.n_results
+
+        # If "country" was not explicitly provided, but self.country is set, use it
+        # (only when the schema actually supports a "country" field)
+        if "country" in self.args_schema.model_fields:
+            if "country" not in params and self.country is not None:
+                params["country"] = self.country
+
+        return params
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_image_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_image_tool.py
@@ -0,0 +1,42 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.schemas import (
+    ImageSearchHeaders,
+    ImageSearchParams,
+)
+
+
+class BraveImageSearchTool(BraveSearchToolBase):
+    """A tool that performs image searches using the Brave Search API."""
+
+    name: str = "Brave Image Search"
+    args_schema: type[BaseModel] = ImageSearchParams
+    header_schema: type[BaseModel] = ImageSearchHeaders
+
+    description: str = (
+        "A tool that performs image searches using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/images/search"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: dict[str, Any]) -> list[dict[str, Any]]:
+        # Make the response more concise, and easier to consume
+        results = response.get("results", [])
+        return [
+            {
+                "title": result.get("title"),
+                "url": result.get("properties", {}).get("url"),
+                "dimensions": f"{w}x{h}"
+                if (w := result.get("properties", {}).get("width"))
+                and (h := result.get("properties", {}).get("height"))
+                else None,
+            }
+            for result in results
+        ]
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_llm_context_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_llm_context_tool.py
@@ -0,0 +1,32 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.response_types import LLMContext
+from crewai_tools.tools.brave_search_tool.schemas import (
+    LLMContextHeaders,
+    LLMContextParams,
+)
+
+
+class BraveLLMContextTool(BraveSearchToolBase):
+    """A tool that retrieves context for LLM usage from the Brave Search API."""
+
+    name: str = "Brave LLM Context"
+    args_schema: type[BaseModel] = LLMContextParams
+    header_schema: type[BaseModel] = LLMContextHeaders
+
+    description: str = (
+        "A tool that retrieves context for LLM usage from the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/llm/context"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: LLMContext.Response) -> LLMContext.Response:
+        """The LLM Context response schema is fairly simple. Return as is."""
+        return response
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_local_pois_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_local_pois_tool.py
@@ -0,0 +1,109 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.response_types import LocalPOIs
+from crewai_tools.tools.brave_search_tool.schemas import (
+    LocalPOIsDescriptionHeaders,
+    LocalPOIsDescriptionParams,
+    LocalPOIsHeaders,
+    LocalPOIsParams,
+)
+
+
+DayOpeningHours = LocalPOIs.DayOpeningHours
+OpeningHours = LocalPOIs.OpeningHours
+LocationResult = LocalPOIs.LocationResult
+LocalPOIsResponse = LocalPOIs.Response
+
+
+def _flatten_slots(slots: list[DayOpeningHours]) -> list[dict[str, str]]:
+    """Convert a list of DayOpeningHours dicts into simplified entries."""
+    return [
+        {
+            "day": slot["full_name"].lower(),
+            "opens": slot["opens"],
+            "closes": slot["closes"],
+        }
+        for slot in slots
+    ]
+
+
+def _simplify_opening_hours(result: LocationResult) -> list[dict[str, str]] | None:
+    """Collapse opening_hours into a flat list of {day, opens, closes} dicts."""
+    hours = result.get("opening_hours")
+    if not hours:
+        return None
+
+    entries: list[dict[str, str]] = []
+
+    current = hours.get("current_day")
+    if current:
+        entries.extend(_flatten_slots(current))
+
+    days = hours.get("days")
+    if days:
+        for day_slots in days:
+            entries.extend(_flatten_slots(day_slots))
+
+    return entries or None
+
+
+class BraveLocalPOIsTool(BraveSearchToolBase):
+    """A tool that retrieves local POIs using the Brave Search API."""
+
+    name: str = "Brave Local POIs"
+    args_schema: type[BaseModel] = LocalPOIsParams
+    header_schema: type[BaseModel] = LocalPOIsHeaders
+    description: str = (
+        "A tool that retrieves local POIs using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+    search_url: str = "https://api.search.brave.com/res/v1/local/pois"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: LocalPOIsResponse) -> list[dict[str, Any]]:
+        results = response.get("results", [])
+        return [
+            {
+                "title": result.get("title"),
+                "url": result.get("url"),
+                "description": result.get("description"),
+                "address": result.get("postal_address", {}).get("displayAddress"),
+                "contact": result.get("contact", {}).get("telephone")
+                or result.get("contact", {}).get("email")
+                or None,
+                "opening_hours": _simplify_opening_hours(result),
+            }
+            for result in results
+        ]
+
+
+class BraveLocalPOIsDescriptionTool(BraveSearchToolBase):
+    """A tool that retrieves AI-generated descriptions for local POIs using the Brave Search API."""
+
+    name: str = "Brave Local POI Descriptions"
+    args_schema: type[BaseModel] = LocalPOIsDescriptionParams
+    header_schema: type[BaseModel] = LocalPOIsDescriptionHeaders
+    description: str = (
+        "A tool that retrieves AI-generated descriptions for local POIs using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+    search_url: str = "https://api.search.brave.com/res/v1/local/descriptions"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: LocalPOIsResponse) -> list[dict[str, Any]]:
+        # Make the response more concise, and easier to consume
+        results = response.get("results", [])
+        return [
+            {
+                "id": result.get("id"),
+                "description": result.get("description"),
+            }
+            for result in results
+        ]
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_news_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_news_tool.py
@@ -0,0 +1,39 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.schemas import (
+    NewsSearchHeaders,
+    NewsSearchParams,
+)
+
+
+class BraveNewsSearchTool(BraveSearchToolBase):
+    """A tool that performs news searches using the Brave Search API."""
+
+    name: str = "Brave News Search"
+    args_schema: type[BaseModel] = NewsSearchParams
+    header_schema: type[BaseModel] = NewsSearchHeaders
+
+    description: str = (
+        "A tool that performs news searches using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/news/search"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: dict[str, Any]) -> list[dict[str, Any]]:
+        # Make the response more concise, and easier to consume
+        results = response.get("results", [])
+        return [
+            {
+                "url": result.get("url"),
+                "title": result.get("title"),
+                "description": result.get("description"),
+            }
+            for result in results
+        ]
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py
@@ -10,16 +10,13 @@ from pydantic import BaseModel, Field
 from pydantic.types import StringConstraints
 import requests

+from crewai_tools.tools.brave_search_tool.schemas import WebSearchParams
+from crewai_tools.tools.brave_search_tool.base import _save_results_to_file
+
+
 load_dotenv()


-def _save_results_to_file(content: str) -> None:
-    """Saves the search results to a file."""
-    filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
-    with open(filename, "w") as file:
-        file.write(content)
-
-
 FreshnessPreset = Literal["pd", "pw", "pm", "py"]
 FreshnessRange = Annotated[
    str, StringConstraints(pattern=r"^\d{4}-\d{2}-\d{2}to\d{4}-\d{2}-\d{2}$")
@@ -28,51 +25,6 @@ Freshness = FreshnessPreset | FreshnessRange
 SafeSearch = Literal["off", "moderate", "strict"]


-class BraveSearchToolSchema(BaseModel):
-    """Input for BraveSearchTool"""
-
-    query: str = Field(..., description="Search query to perform")
-    country: str | None = Field(
-        default=None,
-        description="Country code for geo-targeting (e.g., 'US', 'BR').",
-    )
-    search_language: str | None = Field(
-        default=None,
-        description="Language code for the search results (e.g., 'en', 'es').",
-    )
-    count: int | None = Field(
-        default=None,
-        description="The maximum number of results to return. Actual number may be less.",
-    )
-    offset: int | None = Field(
-        default=None, description="Skip the first N result sets/pages. Max is 9."
-    )
-    safesearch: SafeSearch | None = Field(
-        default=None,
-        description="Filter out explicit content. Options: off/moderate/strict",
-    )
-    spellcheck: bool | None = Field(
-        default=None,
-        description="Attempt to correct spelling errors in the search query.",
-    )
-    freshness: Freshness | None = Field(
-        default=None,
-        description="Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
-    )
-    text_decorations: bool | None = Field(
-        default=None,
-        description="Include markup to highlight search terms in the results.",
-    )
-    extra_snippets: bool | None = Field(
-        default=None,
-        description="Include up to 5 text snippets for each page if possible.",
-    )
-    operators: bool | None = Field(
-        default=None,
-        description="Whether to apply search operators (e.g., site:example.com).",
-    )
-
-
 # TODO: Extend support to additional endpoints (e.g., /images, /news, etc.)
 class BraveSearchTool(BaseTool):
    """A tool that performs web searches using the Brave Search API."""
@@ -82,7 +34,7 @@ class BraveSearchTool(BaseTool):
        "A tool that performs web searches using the Brave Search API. "
        "Results are returned as structured JSON data."
    )
-    args_schema: type[BaseModel] = BraveSearchToolSchema
+    args_schema: type[BaseModel] = WebSearchParams
    search_url: str = "https://api.search.brave.com/res/v1/web/search"
    n_results: int = 10
    save_file: bool = False
@@ -119,8 +71,8 @@ class BraveSearchTool(BaseTool):

        # Construct and send the request
        try:
-            # Maintain both "search_query" and "query" for backwards compatibility
-            query = kwargs.get("search_query") or kwargs.get("query")
+            # Fallback to "query" or "search_query" for backwards compatibility
+            query = kwargs.get("q") or kwargs.get("query") or kwargs.get("search_query")
            if not query:
                raise ValueError("Query is required")

@@ -129,8 +81,11 @@ class BraveSearchTool(BaseTool):
            if country := kwargs.get("country"):
                payload["country"] = country

-            if search_language := kwargs.get("search_language"):
-                payload["search_language"] = search_language
+            # Fallback to "search_language" for backwards compatibility
+            if search_lang := kwargs.get("search_lang") or kwargs.get(
+                "search_language"
+            ):
+                payload["search_lang"] = search_lang

            # Fallback to deprecated n_results parameter if no count is provided
            count = kwargs.get("count")
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_video_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_video_tool.py
@@ -0,0 +1,39 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.schemas import (
+    VideoSearchHeaders,
+    VideoSearchParams,
+)
+
+
+class BraveVideoSearchTool(BraveSearchToolBase):
+    """A tool that performs video searches using the Brave Search API."""
+
+    name: str = "Brave Video Search"
+    args_schema: type[BaseModel] = VideoSearchParams
+    header_schema: type[BaseModel] = VideoSearchHeaders
+
+    description: str = (
+        "A tool that performs video searches using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/videos/search"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: dict[str, Any]) -> list[dict[str, Any]]:
+        # Make the response more concise, and easier to consume
+        results = response.get("results", [])
+        return [
+            {
+                "url": result.get("url"),
+                "title": result.get("title"),
+                "description": result.get("description"),
+            }
+            for result in results
+        ]
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_web_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/brave_web_tool.py
@@ -0,0 +1,45 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.schemas import (
+    WebSearchHeaders,
+    WebSearchParams,
+)
+
+
+class BraveWebSearchTool(BraveSearchToolBase):
+    """A tool that performs web searches using the Brave Search API."""
+
+    name: str = "Brave Web Search"
+    args_schema: type[BaseModel] = WebSearchParams
+    header_schema: type[BaseModel] = WebSearchHeaders
+
+    description: str = (
+        "A tool that performs web searches using the Brave Search API. "
+        "Results are returned as structured JSON data."
+    )
+
+    search_url: str = "https://api.search.brave.com/res/v1/web/search"
+
+    def _refine_request_payload(self, params: dict[str, Any]) -> dict[str, Any]:
+        return params
+
+    def _refine_response(self, response: dict[str, Any]) -> list[dict[str, Any]]:
+        results = response.get("web", {}).get("results", [])
+        refined = []
+        for result in results:
+            snippets = result.get("extra_snippets") or []
+            if not snippets:
+                desc = result.get("description")
+                if desc:
+                    snippets = [desc]
+            refined.append(
+                {
+                    "url": result.get("url"),
+                    "title": result.get("title"),
+                    "snippets": snippets,
+                }
+            )
+        return refined
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/response_types.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/response_types.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+from typing import Literal, TypedDict
+
+
+class LocalPOIs:
+    class PostalAddress(TypedDict, total=False):
+        type: Literal["PostalAddress"]
+        country: str
+        postalCode: str
+        streetAddress: str
+        addressRegion: str
+        addressLocality: str
+        displayAddress: str
+
+    class DayOpeningHours(TypedDict):
+        abbr_name: str
+        full_name: str
+        opens: str
+        closes: str
+
+    class OpeningHours(TypedDict, total=False):
+        current_day: list[LocalPOIs.DayOpeningHours]
+        days: list[list[LocalPOIs.DayOpeningHours]]
+
+    class LocationResult(TypedDict, total=False):
+        provider_url: str
+        title: str
+        url: str
+        id: str | None
+        opening_hours: LocalPOIs.OpeningHours | None
+        postal_address: LocalPOIs.PostalAddress | None
+
+    class Response(TypedDict, total=False):
+        type: Literal["local_pois"]
+        results: list[LocalPOIs.LocationResult]
+
+
+class LLMContext:
+    class LLMContextItem(TypedDict, total=False):
+        snippets: list[str]
+        title: str
+        url: str
+
+    class LLMContextMapItem(TypedDict, total=False):
+        name: str
+        snippets: list[str]
+        title: str
+        url: str
+
+    class LLMContextPOIItem(TypedDict, total=False):
+        name: str
+        snippets: list[str]
+        title: str
+        url: str
+
+    class Grounding(TypedDict, total=False):
+        generic: list[LLMContext.LLMContextItem]
+        poi: LLMContext.LLMContextPOIItem
+        map: list[LLMContext.LLMContextMapItem]
+
+    class Sources(TypedDict, total=False):
+        pass
+
+    class Response(TypedDict, total=False):
+        grounding: LLMContext.Grounding
+        sources: LLMContext.Sources
--- a/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/schemas.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/brave_search_tool/schemas.py
@@ -0,0 +1,525 @@
+from typing import Annotated, Literal
+
+from pydantic import BaseModel, Field
+from pydantic.types import StringConstraints
+
+
+# Common types
+Units = Literal["metric", "imperial"]
+SafeSearch = Literal["off", "moderate", "strict"]
+Freshness = (
+    Literal["pd", "pw", "pm", "py"]
+    | Annotated[
+        str, StringConstraints(pattern=r"^\d{4}-\d{2}-\d{2}to\d{4}-\d{2}-\d{2}$")
+    ]
+)
+ResultFilter = list[
+    Literal[
+        "discussions",
+        "faq",
+        "infobox",
+        "news",
+        "query",
+        "summarizer",
+        "videos",
+        "web",
+        "locations",
+    ]
+]
+
+
+class LLMContextParams(BaseModel):
+    """Parameters for Brave LLM Context endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return. Actual number may be less.",
+        ge=1,
+        le=50,
+    )
+    maximum_number_of_urls: int | None = Field(
+        default=None,
+        description="The maximum number of URLs to include in the context.",
+        ge=1,
+        le=50,
+    )
+    maximum_number_of_tokens: int | None = Field(
+        default=None,
+        description="The approximate maximum number of tokens to include in the context.",
+        ge=1,
+        le=32768,
+    )
+    maximum_number_of_snippets: int | None = Field(
+        default=None,
+        description="The maximum number of different snippets to include in the context.",
+        ge=1,
+        le=100,
+    )
+    context_threshold_mode: (
+        Literal["disabled", "strict", "lenient", "balanced"] | None
+    ) = Field(
+        default=None,
+        description="The mode to use for the context thresholding.",
+    )
+    maximum_number_of_tokens_per_url: int | None = Field(
+        default=None,
+        description="The maximum number of tokens to include for each URL in the context.",
+        ge=1,
+        le=8192,
+    )
+    maximum_number_of_snippets_per_url: int | None = Field(
+        default=None,
+        description="The maximum number of snippets to include per URL.",
+        ge=1,
+        le=100,
+    )
+    goggles: str | list[str] | None = Field(
+        default=None,
+        description="Goggles act as a custom re-ranking mechanism. Goggle source or URLs.",
+    )
+    enable_local: bool | None = Field(
+        default=None,
+        description="Whether to enable local recall. Not setting this value means auto-detect and uses local recall if any of the localization headers are provided.",
+    )
+
+
+class WebSearchParams(BaseModel):
+    """Parameters for Brave Web Search endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    ui_lang: str | None = Field(
+        default=None,
+        description="Language code for the user interface (e.g., 'en-US', 'es-AR').",
+        pattern=r"^[a-z]{2}-[A-Z]{2}$",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return. Actual number may be less.",
+        ge=1,
+        le=20,
+    )
+    offset: int | None = Field(
+        default=None,
+        description="Skip the first N result sets/pages. Max is 9.",
+        ge=0,
+        le=9,
+    )
+    safesearch: Literal["off", "moderate", "strict"] | None = Field(
+        default=None,
+        description="Filter out explicit content. Options: off/moderate/strict",
+    )
+    spellcheck: bool | None = Field(
+        default=None,
+        description="Attempt to correct spelling errors in the search query.",
+    )
+    freshness: Freshness | None = Field(
+        default=None,
+        description="Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
+    )
+    text_decorations: bool | None = Field(
+        default=None,
+        description="Include markup to highlight search terms in the results.",
+    )
+    extra_snippets: bool | None = Field(
+        default=None,
+        description="Include up to 5 text snippets for each page if possible.",
+    )
+    result_filter: ResultFilter | None = Field(
+        default=None,
+        description="Filter the results by type. Options: discussions/faq/infobox/news/query/summarizer/videos/web/locations. Note: The `count` parameter is applied only to the `web` results.",
+    )
+    units: Units | None = Field(
+        default=None,
+        description="The units to use for the results. Options: metric/imperial",
+    )
+    goggles: str | list[str] | None = Field(
+        default=None,
+        description="Goggles act as a custom re-ranking mechanism. Goggle source or URLs.",
+    )
+    summary: bool | None = Field(
+        default=None,
+        description="Whether to generate a summarizer ID for the results.",
+    )
+    enable_rich_callback: bool | None = Field(
+        default=None,
+        description="Whether to enable rich callbacks for the results. Requires Pro level subscription.",
+    )
+    include_fetch_metadata: bool | None = Field(
+        default=None,
+        description="Whether to include fetch metadata (e.g., last fetch time) in the results.",
+    )
+    operators: bool | None = Field(
+        default=None,
+        description="Whether to apply search operators (e.g., site:example.com).",
+    )
+
+
+class LocalPOIsParams(BaseModel):
+    """Parameters for Brave Local POIs endpoint."""
+
+    ids: list[str] = Field(
+        description="List of POI IDs to retrieve. Maximum of 20. IDs are valid for 8 hours.",
+        min_length=1,
+        max_length=20,
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    ui_lang: str | None = Field(
+        default=None,
+        description="Language code for the user interface (e.g., 'en-US', 'es-AR').",
+        pattern=r"^[a-z]{2}-[A-Z]{2}$",
+    )
+    units: Units | None = Field(
+        default=None,
+        description="The units to use for the results. Options: metric/imperial",
+    )
+
+
+class LocalPOIsDescriptionParams(BaseModel):
+    """Parameters for Brave Local POI Descriptions endpoint."""
+
+    ids: list[str] = Field(
+        description="List of POI IDs to retrieve. Maximum of 20. IDs are valid for 8 hours.",
+        min_length=1,
+        max_length=20,
+    )
+
+
+class ImageSearchParams(BaseModel):
+    """Parameters for Brave Image Search endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    safesearch: Literal["off", "strict"] | None = Field(
+        default=None,
+        description="Filter out explicit content. Default is strict.",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return.",
+        ge=1,
+        le=200,
+    )
+    spellcheck: bool | None = Field(
+        default=None,
+        description="Attempt to correct spelling errors in the search query.",
+    )
+
+
+class VideoSearchParams(BaseModel):
+    """Parameters for Brave Video Search endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    ui_lang: str | None = Field(
+        default=None,
+        description="Language code for the user interface (e.g., 'en-US', 'es-AR').",
+        pattern=r"^[a-z]{2}-[A-Z]{2}$",
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    safesearch: SafeSearch | None = Field(
+        default=None,
+        description="Filter out explicit content. Options: off/moderate/strict",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return.",
+        ge=1,
+        le=50,
+    )
+    offset: int | None = Field(
+        default=None,
+        description="Skip the first N result sets/pages. Max is 9.",
+        ge=0,
+        le=9,
+    )
+    spellcheck: bool | None = Field(
+        default=None,
+        description="Attempt to correct spelling errors in the search query.",
+    )
+    freshness: Freshness | None = Field(
+        default=None,
+        description="Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
+    )
+    include_fetch_metadata: bool | None = Field(
+        default=None,
+        description="Whether to include fetch metadata (e.g., last fetch time) in the results.",
+    )
+    operators: bool | None = Field(
+        default=None,
+        description="Whether to apply search operators (e.g., site:example.com).",
+    )
+
+
+class NewsSearchParams(BaseModel):
+    """Parameters for Brave News Search endpoint."""
+
+    q: str = Field(
+        description="Search query to perform",
+        min_length=1,
+        max_length=400,
+    )
+    search_lang: str | None = Field(
+        default=None,
+        description="Language code for the search results (e.g., 'en', 'es').",
+        pattern=r"^[a-z]{2}$",
+    )
+    ui_lang: str | None = Field(
+        default=None,
+        description="Language code for the user interface (e.g., 'en-US', 'es-AR').",
+        pattern=r"^[a-z]{2}-[A-Z]{2}$",
+    )
+    country: str | None = Field(
+        default=None,
+        description="Country code for geo-targeting (e.g., 'US', 'BR').",
+        pattern=r"^[A-Z]{2}$",
+    )
+    safesearch: Literal["off", "moderate", "strict"] | None = Field(
+        default=None,
+        description="Filter out explicit content. Options: off/moderate/strict",
+    )
+    count: int | None = Field(
+        default=None,
+        description="The maximum number of results to return.",
+        ge=1,
+        le=50,
+    )
+    offset: int | None = Field(
+        default=None,
+        description="Skip the first N result sets/pages. Max is 9.",
+        ge=0,
+        le=9,
+    )
+    spellcheck: bool | None = Field(
+        default=None,
+        description="Attempt to correct spelling errors in the search query.",
+    )
+    freshness: Freshness | None = Field(
+        default=None,
+        description="Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
+    )
+    extra_snippets: bool | None = Field(
+        default=None,
+        description="Include up to 5 text snippets for each page if possible.",
+    )
+    goggles: str | list[str] | None = Field(
+        default=None,
+        description="Goggles act as a custom re-ranking mechanism. Goggle source or URLs.",
+    )
+    include_fetch_metadata: bool | None = Field(
+        default=None,
+        description="Whether to include fetch metadata in the results.",
+    )
+    operators: bool | None = Field(
+        default=None,
+        description="Whether to apply search operators (e.g., site:example.com).",
+    )
+
+
+class BaseSearchHeaders(BaseModel):
+    """Common headers for Brave Search endpoints."""
+
+    x_subscription_token: str = Field(
+        alias="x-subscription-token",
+        description="API key for Brave Search",
+    )
+    api_version: str | None = Field(
+        alias="api-version",
+        default=None,
+        description="API version to use. Default is latest available.",
+        pattern=r"^\d{4}-\d{2}-\d{2}$",  # YYYY-MM-DD
+    )
+    accept: Literal["application/json"] | Literal["*/*"] | None = Field(
+        default=None,
+        description="Accept header for the request.",
+    )
+    cache_control: Literal["no-cache"] | None = Field(
+        alias="cache-control",
+        default=None,
+        description="Cache control header for the request.",
+    )
+    user_agent: str | None = Field(
+        alias="user-agent",
+        default=None,
+        description="User agent for the request.",
+    )
+
+
+class LLMContextHeaders(BaseSearchHeaders):
+    """Headers for Brave LLM Context endpoint."""
+
+    x_loc_lat: float | None = Field(
+        alias="x-loc-lat",
+        default=None,
+        description="Latitude of the user's location.",
+        ge=-90.0,
+        le=90.0,
+    )
+    x_loc_long: float | None = Field(
+        alias="x-loc-long",
+        default=None,
+        description="Longitude of the user's location.",
+        ge=-180.0,
+        le=180.0,
+    )
+    x_loc_city: str | None = Field(
+        alias="x-loc-city",
+        default=None,
+        description="City of the user's location.",
+    )
+    x_loc_state: str | None = Field(
+        alias="x-loc-state",
+        default=None,
+        description="State of the user's location.",
+    )
+    x_loc_state_name: str | None = Field(
+        alias="x-loc-state-name",
+        default=None,
+        description="Name of the state of the user's location.",
+    )
+    x_loc_country: str | None = Field(
+        alias="x-loc-country",
+        default=None,
+        description="The ISO 3166-1 alpha-2 country code of the user's location.",
+    )
+
+
+class LocalPOIsHeaders(BaseSearchHeaders):
+    """Headers for Brave Local POIs endpoint."""
+
+    x_loc_lat: float | None = Field(
+        alias="x-loc-lat",
+        default=None,
+        description="Latitude of the user's location.",
+        ge=-90.0,
+        le=90.0,
+    )
+    x_loc_long: float | None = Field(
+        alias="x-loc-long",
+        default=None,
+        description="Longitude of the user's location.",
+        ge=-180.0,
+        le=180.0,
+    )
+
+
+class LocalPOIsDescriptionHeaders(BaseSearchHeaders):
+    """Headers for Brave Local POI Descriptions endpoint."""
+
+
+class VideoSearchHeaders(BaseSearchHeaders):
+    """Headers for Brave Video Search endpoint."""
+
+
+class ImageSearchHeaders(BaseSearchHeaders):
+    """Headers for Brave Image Search endpoint."""
+
+
+class NewsSearchHeaders(BaseSearchHeaders):
+    """Headers for Brave News Search endpoint."""
+
+
+class WebSearchHeaders(BaseSearchHeaders):
+    """Headers for Brave Web Search endpoint."""
+
+    x_loc_lat: float | None = Field(
+        alias="x-loc-lat",
+        default=None,
+        description="Latitude of the user's location.",
+        ge=-90.0,
+        le=90.0,
+    )
+    x_loc_long: float | None = Field(
+        alias="x-loc-long",
+        default=None,
+        description="Longitude of the user's location.",
+        ge=-180.0,
+        le=180.0,
+    )
+    x_loc_timezone: str | None = Field(
+        alias="x-loc-timezone",
+        default=None,
+        description="Timezone of the user's location.",
+    )
+    x_loc_city: str | None = Field(
+        alias="x-loc-city",
+        default=None,
+        description="City of the user's location.",
+    )
+    x_loc_state: str | None = Field(
+        alias="x-loc-state",
+        default=None,
+        description="State of the user's location.",
+    )
+    x_loc_state_name: str | None = Field(
+        alias="x-loc-state-name",
+        default=None,
+        description="Name of the state of the user's location.",
+    )
+    x_loc_country: str | None = Field(
+        alias="x-loc-country",
+        default=None,
+        description="The ISO 3166-1 alpha-2 country code of the user's location.",
+    )
+    x_loc_postal_code: str | None = Field(
+        alias="x-loc-postal-code",
+        default=None,
+        description="The postal code of the user's location.",
+    )
--- a/lib/crewai-tools/src/crewai_tools/tools/multion_tool/example.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/multion_tool/example.py
@@ -1,7 +1,7 @@
 import os

 from crewai import Agent, Crew, Task
-from multion_tool import MultiOnTool # type: ignore[import-not-found]
+from multion_tool import MultiOnTool  # type: ignore[import-not-found]


 os.environ["OPENAI_API_KEY"] = "Your Key"
--- a/lib/crewai-tools/src/crewai_tools/tools/stagehand_tool/example.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/stagehand_tool/example.py
@@ -17,11 +17,11 @@ Usage:

 import os

+from crewai import Agent, Crew, Process, Task
 from crewai.utilities.printer import Printer
 from dotenv import load_dotenv
 from stagehand.schemas import AvailableModel  # type: ignore[import-untyped]

-from crewai import Agent, Crew, Process, Task
 from crewai_tools import StagehandTool


--- a/lib/crewai-tools/tests/tools/brave_search_tool_test.py
+++ b/lib/crewai-tools/tests/tools/brave_search_tool_test.py
@@ -1,80 +1,777 @@
-import json
-from unittest.mock import patch
+import os
+from unittest.mock import MagicMock, patch

 import pytest
+import requests as requests_lib

-from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
+from crewai_tools.tools.brave_search_tool.base import BraveSearchToolBase
+from crewai_tools.tools.brave_search_tool.brave_web_tool import BraveWebSearchTool
+from crewai_tools.tools.brave_search_tool.brave_image_tool import BraveImageSearchTool
+from crewai_tools.tools.brave_search_tool.brave_news_tool import BraveNewsSearchTool
+from crewai_tools.tools.brave_search_tool.brave_video_tool import BraveVideoSearchTool
+from crewai_tools.tools.brave_search_tool.brave_llm_context_tool import (
+    BraveLLMContextTool,
+)
+from crewai_tools.tools.brave_search_tool.brave_local_pois_tool import (
+    BraveLocalPOIsTool,
+    BraveLocalPOIsDescriptionTool,
+)
+from crewai_tools.tools.brave_search_tool.schemas import (
+    WebSearchParams,
+    WebSearchHeaders,
+    ImageSearchParams,
+    ImageSearchHeaders,
+    NewsSearchParams,
+    NewsSearchHeaders,
+    VideoSearchParams,
+    VideoSearchHeaders,
+    LLMContextParams,
+    LLMContextHeaders,
+    LocalPOIsParams,
+    LocalPOIsHeaders,
+    LocalPOIsDescriptionParams,
+    LocalPOIsDescriptionHeaders,
+)
+
+
+def _mock_response(
+    status_code: int = 200,
+    json_data: dict | None = None,
+    headers: dict | None = None,
+    text: str = "",
+) -> MagicMock:
+    """Build a ``requests.Response``-like mock with the attributes used by ``_make_request``."""
+    resp = MagicMock(spec=requests_lib.Response)
+    resp.status_code = status_code
+    resp.ok = 200 <= status_code < 400
+    resp.url = "https://api.search.brave.com/res/v1/web/search?q=test"
+    resp.text = text or (str(json_data) if json_data else "")
+    resp.headers = headers or {}
+    resp.json.return_value = json_data if json_data is not None else {}
+    return resp
+
+
+# Fixtures
+
+
+@pytest.fixture(autouse=True)
+def _brave_env_and_rate_limit():
+    """Set BRAVE_API_KEY for every test. Rate limiting is per-instance (each tool starts with a fresh clock)."""
+    with patch.dict(os.environ, {"BRAVE_API_KEY": "test-api-key"}):
+        yield


@pytest.fixture
-def brave_tool():
-    return BraveSearchTool(n_results=2)
+def web_tool():
+    return BraveWebSearchTool()


-def test_brave_tool_initialization():
-    tool = BraveSearchTool()
-    assert tool.n_results == 10
+@pytest.fixture
+def image_tool():
+    return BraveImageSearchTool()
+
+
+@pytest.fixture
+def news_tool():
+    return BraveNewsSearchTool()
+
+
+@pytest.fixture
+def video_tool():
+    return BraveVideoSearchTool()
+
+
+# Initialization
+
+ALL_TOOL_CLASSES = [
+    BraveWebSearchTool,
+    BraveImageSearchTool,
+    BraveNewsSearchTool,
+    BraveVideoSearchTool,
+    BraveLLMContextTool,
+    BraveLocalPOIsTool,
+    BraveLocalPOIsDescriptionTool,
+]
+
+
+@pytest.mark.parametrize("tool_cls", ALL_TOOL_CLASSES)
+def test_instantiation_with_env_var(tool_cls):
+    """Each tool can be created when BRAVE_API_KEY is in the environment."""
+    tool = tool_cls()
+    assert tool.api_key == "test-api-key"
+
+
+@pytest.mark.parametrize("tool_cls", ALL_TOOL_CLASSES)
+def test_instantiation_with_explicit_key(tool_cls):
+    """An explicit api_key takes precedence over the environment."""
+    tool = tool_cls(api_key="explicit-key")
+    assert tool.api_key == "explicit-key"
+
+
+def test_missing_api_key_raises():
+    with patch.dict(os.environ, {}, clear=True):
+        with pytest.raises(ValueError, match="BRAVE_API_KEY"):
+            BraveWebSearchTool()
+
+
+def test_default_attributes():
+    tool = BraveWebSearchTool()
    assert tool.save_file is False
+    assert tool.n_results == 10
+    assert tool._timeout == 30
+    assert tool._requests_per_second == 1.0
+    assert tool.raw is False


-@patch("requests.get")
-def test_brave_tool_search(mock_get, brave_tool):
-    mock_response = {
+def test_custom_constructor_args():
+    tool = BraveWebSearchTool(
+        save_file=True,
+        timeout=60,
+        n_results=5,
+        requests_per_second=0.5,
+        raw=True,
+    )
+    assert tool.save_file is True
+    assert tool._timeout == 60
+    assert tool.n_results == 5
+    assert tool._requests_per_second == 0.5
+    assert tool.raw is True
+
+
+# Headers
+
+
+def test_default_headers():
+    tool = BraveWebSearchTool()
+    assert tool.headers["x-subscription-token"] == "test-api-key"
+    assert tool.headers["accept"] == "application/json"
+
+
+def test_set_headers_merges_and_normalizes():
+    tool = BraveWebSearchTool()
+    tool.set_headers({"Cache-Control": "no-cache"})
+    assert tool.headers["cache-control"] == "no-cache"
+    assert tool.headers["x-subscription-token"] == "test-api-key"
+
+
+def test_set_headers_returns_self_for_chaining():
+    tool = BraveWebSearchTool()
+    assert tool.set_headers({"Cache-Control": "no-cache"}) is tool
+
+
+def test_invalid_header_value_raises():
+    tool = BraveImageSearchTool()
+    with pytest.raises(ValueError, match="Invalid headers"):
+        tool.set_headers({"Accept": "text/xml"})
+
+
+# Endpoint & Schema Wiring
+
+
+@pytest.mark.parametrize(
+    "tool_cls, expected_url, expected_params, expected_headers",
+    [
+        (
+            BraveWebSearchTool,
+            "https://api.search.brave.com/res/v1/web/search",
+            WebSearchParams,
+            WebSearchHeaders,
+        ),
+        (
+            BraveImageSearchTool,
+            "https://api.search.brave.com/res/v1/images/search",
+            ImageSearchParams,
+            ImageSearchHeaders,
+        ),
+        (
+            BraveNewsSearchTool,
+            "https://api.search.brave.com/res/v1/news/search",
+            NewsSearchParams,
+            NewsSearchHeaders,
+        ),
+        (
+            BraveVideoSearchTool,
+            "https://api.search.brave.com/res/v1/videos/search",
+            VideoSearchParams,
+            VideoSearchHeaders,
+        ),
+        (
+            BraveLLMContextTool,
+            "https://api.search.brave.com/res/v1/llm/context",
+            LLMContextParams,
+            LLMContextHeaders,
+        ),
+        (
+            BraveLocalPOIsTool,
+            "https://api.search.brave.com/res/v1/local/pois",
+            LocalPOIsParams,
+            LocalPOIsHeaders,
+        ),
+        (
+            BraveLocalPOIsDescriptionTool,
+            "https://api.search.brave.com/res/v1/local/descriptions",
+            LocalPOIsDescriptionParams,
+            LocalPOIsDescriptionHeaders,
+        ),
+    ],
+)
+def test_tool_wiring(tool_cls, expected_url, expected_params, expected_headers):
+    tool = tool_cls()
+    assert tool.search_url == expected_url
+    assert tool.args_schema is expected_params
+    assert tool.header_schema is expected_headers
+
+
+# Payload Refinement  (e.g., `query` -> `q`, `count` fallback, param pass-through)
+
+
+def test_web_refine_request_payload_passes_all_params(web_tool):
+    params = web_tool._common_payload_refinement(
+        {
+            "query": "test",
+            "country": "US",
+            "search_lang": "en",
+            "count": 5,
+            "offset": 2,
+            "safesearch": "moderate",
+            "freshness": "pw",
+        }
+    )
+    refined_params = web_tool._refine_request_payload(params)
+
+    assert refined_params["q"] == "test"
+    assert "query" not in refined_params
+    assert refined_params["count"] == 5
+    assert refined_params["country"] == "US"
+    assert refined_params["search_lang"] == "en"
+    assert refined_params["offset"] == 2
+    assert refined_params["safesearch"] == "moderate"
+    assert refined_params["freshness"] == "pw"
+
+
+def test_image_refine_request_payload_passes_all_params(image_tool):
+    params = image_tool._common_payload_refinement(
+        {
+            "query": "cat photos",
+            "country": "US",
+            "search_lang": "en",
+            "safesearch": "strict",
+            "count": 50,
+            "spellcheck": True,
+        }
+    )
+    refined_params = image_tool._refine_request_payload(params)
+
+    assert refined_params["q"] == "cat photos"
+    assert "query" not in refined_params
+    assert refined_params["country"] == "US"
+    assert refined_params["safesearch"] == "strict"
+    assert refined_params["count"] == 50
+    assert refined_params["spellcheck"] is True
+
+
+def test_news_refine_request_payload_passes_all_params(news_tool):
+    params = news_tool._common_payload_refinement(
+        {
+            "query": "breaking news",
+            "country": "US",
+            "count": 10,
+            "offset": 1,
+            "freshness": "pd",
+            "extra_snippets": True,
+        }
+    )
+    refined_params = news_tool._refine_request_payload(params)
+
+    assert refined_params["q"] == "breaking news"
+    assert "query" not in refined_params
+    assert refined_params["country"] == "US"
+    assert refined_params["offset"] == 1
+    assert refined_params["freshness"] == "pd"
+    assert refined_params["extra_snippets"] is True
+
+
+def test_video_refine_request_payload_passes_all_params(video_tool):
+    params = video_tool._common_payload_refinement(
+        {
+            "query": "tutorial",
+            "country": "US",
+            "count": 25,
+            "offset": 0,
+            "safesearch": "strict",
+            "freshness": "pm",
+        }
+    )
+    refined_params = video_tool._refine_request_payload(params)
+
+    assert refined_params["q"] == "tutorial"
+    assert "query" not in refined_params
+    assert refined_params["country"] == "US"
+    assert refined_params["offset"] == 0
+    assert refined_params["freshness"] == "pm"
+
+
+def test_legacy_constructor_params_flow_into_query_params():
+    """The legacy n_results and country constructor params are applied as defaults
+    when count/country are not explicitly provided at call time."""
+    tool = BraveWebSearchTool(n_results=3, country="BR")
+    params = tool._common_payload_refinement({"query": "test"})
+
+    assert params["count"] == 3
+    assert params["country"] == "BR"
+
+
+def test_legacy_constructor_params_do_not_override_explicit_query_params():
+    """Explicit query-time count/country take precedence over constructor defaults."""
+    tool = BraveWebSearchTool(n_results=3, country="BR")
+    params = tool._common_payload_refinement(
+        {"query": "test", "count": 10, "country": "US"}
+    )
+
+    assert params["count"] == 10
+    assert params["country"] == "US"
+
+
+def test_refine_request_payload_passes_multiple_goggles_as_multiple_params(web_tool):
+    result = web_tool._refine_request_payload(
+        {
+            "query": "test",
+            "goggles": ["goggle1", "goggle2"],
+        }
+    )
+    assert result["goggles"] == ["goggle1", "goggle2"]
+
+
+# Null-like / empty value stripping
+#
+# crewAI's ensure_all_properties_required (pydantic_schema_utils.py) marks
+# every schema property as required for OpenAI strict-mode compatibility.
+# Because optional Brave API parameters look required to the LLM, it fills
+# them with placeholder junk — None, "", "null", or [].  The test below
+# verifies that _common_payload_refinement strips these from optional fields.
+
+
+def test_common_refinement_strips_null_like_values(web_tool):
+    """_common_payload_refinement drops optional keys with None / '' / 'null' / []."""
+    params = web_tool._common_payload_refinement(
+        {
+            "query": "test",
+            "country": "US",
+            "search_lang": "",
+            "freshness": "null",
+            "count": 5,
+            "goggles": [],
+        }
+    )
+    assert params["q"] == "test"
+    assert params["country"] == "US"
+    assert params["count"] == 5
+    assert "search_lang" not in params
+    assert "freshness" not in params
+    assert "goggles" not in params
+
+
+# End-to-End _run() with Mocked HTTP Response
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_web_search_end_to_end(mock_get, web_tool):
+    web_tool.raw = True
+    data = {"web": {"results": [{"title": "R", "url": "http://r.co"}]}}
+    mock_get.return_value = _mock_response(json_data=data)
+
+    result = web_tool._run(query="test")
+
+    mock_get.assert_called_once()
+    call_args = mock_get.call_args.kwargs
+    assert call_args["params"]["q"] == "test"
+    assert call_args["headers"]["x-subscription-token"] == "test-api-key"
+    assert result == data
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_image_search_end_to_end(mock_get, image_tool):
+    image_tool.raw = True
+    data = {"results": [{"url": "http://img.co/a.jpg"}]}
+    mock_get.return_value = _mock_response(json_data=data)
+
+    assert image_tool._run(query="cats") == data
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_news_search_end_to_end(mock_get, news_tool):
+    news_tool.raw = True
+    data = {"results": [{"title": "News", "url": "http://n.co"}]}
+    mock_get.return_value = _mock_response(json_data=data)
+
+    assert news_tool._run(query="headlines") == data
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_video_search_end_to_end(mock_get, video_tool):
+    video_tool.raw = True
+    data = {"results": [{"title": "Vid", "url": "http://v.co"}]}
+    mock_get.return_value = _mock_response(json_data=data)
+
+    assert video_tool._run(query="python tutorial") == data
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_raw_false_calls_refine_response(mock_get, web_tool):
+    """With raw=False (the default), _refine_response transforms the API response."""
+    api_response = {
        "web": {
            "results": [
                {
-                    "title": "Test Title",
-                    "url": "http://test.com",
-                    "description": "Test Description",
+                    "title": "CrewAI",
+                    "url": "https://crewai.com",
+                    "description": "AI agent framework",
                }
            ]
        }
    }
-    mock_get.return_value.json.return_value = mock_response
+    mock_get.return_value = _mock_response(json_data=api_response)

-    result = brave_tool.run(query="test")
-    data = json.loads(result)
-    assert isinstance(data, list)
-    assert len(data) >= 1
-    assert data[0]["title"] == "Test Title"
-    assert data[0]["url"] == "http://test.com"
+    assert web_tool.raw is False
+    result = web_tool._run(query="crewai")
+
+    # The web tool's _refine_response extracts and reshapes results.
+    # The key assertion: we should NOT get back the raw API envelope.
+    assert result != api_response


-@patch("requests.get")
-def test_brave_tool(mock_get):
-    mock_response = {
-        "web": {
-            "results": [
-                {
-                    "title": "Brave Browser",
-                    "url": "https://brave.com",
-                    "description": "Brave Browser description",
-                }
-            ]
-        }
-    }
-    mock_get.return_value.json.return_value = mock_response
-
-    tool = BraveSearchTool(n_results=2)
-    result = tool.run(query="Brave Browser")
-    assert result is not None
-
-    # Parse JSON so we can examine the structure
-    data = json.loads(result)
-    assert isinstance(data, list)
-    assert len(data) >= 1
-
-    # First item should have expected fields: title, url, and description
-    first = data[0]
-    assert "title" in first
-    assert first["title"] == "Brave Browser"
-    assert "url" in first
-    assert first["url"] == "https://brave.com"
-    assert "description" in first
-    assert first["description"] == "Brave Browser description"
+# Backward Compatibility & Legacy Parameter Support


-if __name__ == "__main__":
-    test_brave_tool()
-    test_brave_tool_initialization()
-    # test_brave_tool_search(brave_tool)
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_positional_query_argument(mock_get, web_tool):
+    """tool.run('my query') works as a positional argument."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    web_tool._run("positional test")
+
+    assert mock_get.call_args.kwargs["params"]["q"] == "positional test"
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_search_query_backward_compat(mock_get, web_tool):
+    """The legacy 'search_query' param is mapped to 'query'."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    web_tool._run(search_query="legacy test")
+
+    assert mock_get.call_args.kwargs["params"]["q"] == "legacy test"
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base._save_results_to_file")
+def test_save_file_called_when_enabled(mock_save, mock_get):
+    mock_get.return_value = _mock_response(json_data={"results": []})
+
+    tool = BraveWebSearchTool(save_file=True)
+    tool._run(query="test")
+
+    mock_save.assert_called_once()
+
+
+# Error Handling
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_connection_error_raises_runtime_error(mock_get, web_tool):
+    mock_get.side_effect = requests_lib.exceptions.ConnectionError("refused")
+    with pytest.raises(RuntimeError, match="Brave Search API connection failed"):
+        web_tool._run(query="test")
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_timeout_raises_runtime_error(mock_get, web_tool):
+    mock_get.side_effect = requests_lib.exceptions.Timeout("timed out")
+    with pytest.raises(RuntimeError, match="timed out"):
+        web_tool._run(query="test")
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_invalid_params_raises_value_error(mock_get, web_tool):
+    """count=999 exceeds WebSearchParams.count le=20."""
+    with pytest.raises(ValueError, match="Invalid parameters"):
+        web_tool._run(query="test", count=999)
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_4xx_error_raises_with_api_detail(mock_get, web_tool):
+    """A 422 with a structured error body includes code and detail in the message."""
+    mock_get.return_value = _mock_response(
+        status_code=422,
+        json_data={
+            "error": {
+                "id": "abc-123",
+                "status": 422,
+                "code": "OPTION_NOT_IN_PLAN",
+                "detail": "extra_snippets requires a Pro plan",
+            }
+        },
+    )
+    with pytest.raises(RuntimeError, match="OPTION_NOT_IN_PLAN") as exc_info:
+        web_tool._run(query="test")
+    assert "extra_snippets requires a Pro plan" in str(exc_info.value)
+    assert "HTTP 422" in str(exc_info.value)
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_auth_error_raises_immediately(mock_get, web_tool):
+    """A 401 with SUBSCRIPTION_TOKEN_INVALID is not retried."""
+    mock_get.return_value = _mock_response(
+        status_code=401,
+        json_data={
+            "error": {
+                "id": "xyz",
+                "status": 401,
+                "code": "SUBSCRIPTION_TOKEN_INVALID",
+                "detail": "The subscription token is invalid",
+            }
+        },
+    )
+    with pytest.raises(RuntimeError, match="SUBSCRIPTION_TOKEN_INVALID"):
+        web_tool._run(query="test")
+    # Should NOT have retried — only one call.
+    assert mock_get.call_count == 1
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_quota_limited_429_raises_immediately(mock_get, web_tool):
+    """A 429 with QUOTA_LIMITED is NOT retried — quota exhaustion is terminal."""
+    mock_get.return_value = _mock_response(
+        status_code=429,
+        json_data={
+            "error": {
+                "id": "ql-1",
+                "status": 429,
+                "code": "QUOTA_LIMITED",
+                "detail": "Monthly quota exceeded",
+            }
+        },
+    )
+    with pytest.raises(RuntimeError, match="QUOTA_LIMITED") as exc_info:
+        web_tool._run(query="test")
+    assert "Monthly quota exceeded" in str(exc_info.value)
+    # Terminal — only one HTTP call, no retries.
+    assert mock_get.call_count == 1
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_usage_limit_exceeded_429_raises_immediately(mock_get, web_tool):
+    """USAGE_LIMIT_EXCEEDED is also non-retryable, just like QUOTA_LIMITED."""
+    mock_get.return_value = _mock_response(
+        status_code=429,
+        json_data={
+            "error": {
+                "id": "ule-1",
+                "status": 429,
+                "code": "USAGE_LIMIT_EXCEEDED",
+            }
+        },
+        text="usage limit exceeded",
+    )
+    with pytest.raises(RuntimeError, match="USAGE_LIMIT_EXCEEDED"):
+        web_tool._run(query="test")
+    assert mock_get.call_count == 1
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_error_body_is_fully_included_in_message(mock_get, web_tool):
+    """The full JSON error body is included in the RuntimeError message."""
+    mock_get.return_value = _mock_response(
+        status_code=429,
+        json_data={
+            "error": {
+                "id": "x",
+                "status": 429,
+                "code": "QUOTA_LIMITED",
+                "detail": "Exceeded",
+                "meta": {"plan": "free", "limit": 1000},
+            }
+        },
+    )
+    with pytest.raises(RuntimeError) as exc_info:
+        web_tool._run(query="test")
+    msg = str(exc_info.value)
+    assert "HTTP 429" in msg
+    assert "QUOTA_LIMITED" in msg
+    assert "free" in msg
+    assert "1000" in msg
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_error_without_json_body_falls_back_to_text(mock_get, web_tool):
+    """When the error response isn't valid JSON, resp.text is used as the detail."""
+    resp = _mock_response(status_code=500, text="Internal Server Error")
+    resp.json.side_effect = ValueError("No JSON")
+    mock_get.return_value = resp
+
+    with pytest.raises(RuntimeError, match="Internal Server Error"):
+        web_tool._run(query="test")
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+def test_invalid_json_on_success_raises_runtime_error(mock_get, web_tool):
+    """A 200 OK with a non-JSON body raises RuntimeError."""
+    resp = _mock_response(status_code=200)
+    resp.json.side_effect = ValueError("Expecting value")
+    mock_get.return_value = resp
+
+    with pytest.raises(RuntimeError, match="invalid JSON"):
+        web_tool._run(query="test")
+
+
+# Rate Limiting
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_rate_limit_sleeps_when_too_fast(mock_time, mock_get, web_tool):
+    """Back-to-back calls within the interval trigger a sleep."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    # Simulate: last request was at t=100, "now" is t=100.2 (only 0.2s elapsed).
+    # With default 1 req/s the min interval is 1.0s, so it should sleep ~0.8s.
+    mock_time.time.return_value = 100.2
+    web_tool._last_request_time = 100.0
+
+    web_tool._run(query="test")
+
+    mock_time.sleep.assert_called_once()
+    sleep_duration = mock_time.sleep.call_args[0][0]
+    assert 0.7 < sleep_duration < 0.9  # approximately 0.8s
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_rate_limit_skips_sleep_when_enough_time_passed(mock_time, mock_get, web_tool):
+    """No sleep when the elapsed time already exceeds the interval."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    # Last request was at t=100, "now" is t=102 (2s elapsed > 1s interval).
+    mock_time.time.return_value = 102.0
+    web_tool._last_request_time = 100.0
+
+    web_tool._run(query="test")
+
+    mock_time.sleep.assert_not_called()
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_rate_limit_disabled_when_zero(mock_time, mock_get, web_tool):
+    """requests_per_second=0 disables rate limiting entirely."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    web_tool._last_request_time = 100.0
+    mock_time.time.return_value = 100.0  # same instant
+
+    web_tool._run(query="test")
+
+    mock_time.sleep.assert_not_called()
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_rate_limit_per_instance_independent(mock_time, mock_get, web_tool, image_tool):
+    """Each instance has its own rate-limit clock; a request on one does not delay the other."""
+    mock_get.return_value = _mock_response(json_data={})
+
+    # Web tool fires at t=100 (its clock goes 0 -> 100).
+    mock_time.time.return_value = 100.0
+    web_tool._run(query="test")
+
+    # Image tool fires at t=100.3. Its clock is still 0 (separate instance), so
+    # next_allowed = 1.0 and 100.3 > 1.0 — no sleep. Total process rate can be sum of instance limits.
+    mock_time.time.return_value = 100.3
+    image_tool._run(query="cats")
+
+    mock_time.sleep.assert_not_called()
+
+
+# Retry Behavior
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_429_rate_limited_retries_then_succeeds(mock_time, mock_get, web_tool):
+    """A transient RATE_LIMITED 429 is retried; success on the second attempt."""
+    mock_time.time.return_value = 200.0
+
+    resp_429 = _mock_response(
+        status_code=429,
+        json_data={"error": {"id": "r", "status": 429, "code": "RATE_LIMITED"}},
+        headers={"Retry-After": "2"},
+    )
+    resp_200 = _mock_response(status_code=200, json_data={"web": {"results": []}})
+    mock_get.side_effect = [resp_429, resp_200]
+
+    web_tool.raw = True
+    result = web_tool._run(query="test")
+
+    assert result == {"web": {"results": []}}
+    assert mock_get.call_count == 2
+    # Slept for the Retry-After value.
+    retry_sleeps = [c for c in mock_time.sleep.call_args_list if c[0][0] == 2.0]
+    assert len(retry_sleeps) == 1
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_5xx_is_retried(mock_time, mock_get, web_tool):
+    """A 502 server error is retried; success on the second attempt."""
+    mock_time.time.return_value = 200.0
+
+    resp_502 = _mock_response(status_code=502, text="Bad Gateway")
+    resp_502.json.side_effect = ValueError("no json")
+    resp_200 = _mock_response(status_code=200, json_data={"web": {"results": []}})
+    mock_get.side_effect = [resp_502, resp_200]
+
+    web_tool.raw = True
+    result = web_tool._run(query="test")
+
+    assert result == {"web": {"results": []}}
+    assert mock_get.call_count == 2
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_429_rate_limited_exhausts_retries(mock_time, mock_get, web_tool):
+    """Persistent RATE_LIMITED 429s exhaust retries and raise RuntimeError."""
+    mock_time.time.return_value = 200.0
+
+    resp_429 = _mock_response(
+        status_code=429,
+        json_data={"error": {"id": "r", "status": 429, "code": "RATE_LIMITED"}},
+    )
+    mock_get.return_value = resp_429
+
+    with pytest.raises(RuntimeError, match="RATE_LIMITED"):
+        web_tool._run(query="test")
+    # 3 attempts (default _max_retries).
+    assert mock_get.call_count == 3
+
+
+@patch("crewai_tools.tools.brave_search_tool.base.requests.get")
+@patch("crewai_tools.tools.brave_search_tool.base.time")
+def test_retry_uses_exponential_backoff_when_no_retry_after(
+    mock_time, mock_get, web_tool
+):
+    """Without Retry-After, backoff is 2^attempt (1s, 2s, ...)."""
+    mock_time.time.return_value = 200.0
+
+    resp_503 = _mock_response(status_code=503, text="Service Unavailable")
+    resp_503.json.side_effect = ValueError("no json")
+    resp_200 = _mock_response(status_code=200, json_data={"ok": True})
+    mock_get.side_effect = [resp_503, resp_503, resp_200]
+
+    web_tool.raw = True
+    web_tool._run(query="test")
+
+    # Two retries: attempt 0 → sleep(1.0), attempt 1 → sleep(2.0).
+    retry_sleeps = [c[0][0] for c in mock_time.sleep.call_args_list]
+    assert 1.0 in retry_sleeps
+    assert 2.0 in retry_sleeps
--- a/lib/crewai-tools/tool.specs.json
+++ b/lib/crewai-tools/tool.specs.json
--- a/lib/crewai/pyproject.toml
+++ b/lib/crewai/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
    "opentelemetry-exporter-otlp-proto-http~=1.34.0",
    # Data Handling
    "chromadb~=1.1.0",
-    "tokenizers~=0.20.3",
+    "tokenizers>=0.21,<1",
    "openpyxl~=3.1.5",
    # Authentication and Security
    "python-dotenv~=1.1.1",
@@ -53,7 +53,7 @@ Repository = "https://github.com/crewAIInc/crewAI"

 [project.optional-dependencies]
 tools = [
-    "crewai-tools==1.10.1a1",
+    "crewai-tools==1.10.2a1",
 ]
 embeddings = [
    "tiktoken~=0.8.0"
@@ -88,7 +88,7 @@ bedrock = [
    "boto3~=1.40.45",
 ]
 google-genai = [
-    "google-genai~=1.49.0",
+    "google-genai~=1.65.0",
 ]
 azure-ai-inference = [
    "azure-ai-inference~=1.0.0b9",
--- a/lib/crewai/src/crewai/init.py
+++ b/lib/crewai/src/crewai/init.py
@@ -4,6 +4,7 @@ import urllib.request
 import warnings

 from crewai.agent.core import Agent
+from crewai.agent.planning_config import PlanningConfig
 from crewai.crew import Crew
 from crewai.crews.crew_output import CrewOutput
 from crewai.flow.flow import Flow
@@ -40,7 +41,7 @@ def _suppress_pydantic_deprecation_warnings() -> None:

 _suppress_pydantic_deprecation_warnings()

-__version__ = "1.10.1a1"
+__version__ = "1.10.2a1"
 _telemetry_submitted = False


@@ -100,6 +101,7 @@ __all__ = [
    "Knowledge",
    "LLMGuardrail",
    "Memory",
+    "PlanningConfig",
    "Process",
    "Task",
    "TaskOutput",
--- a/lib/crewai/src/crewai/a2a/utils/agent_card.py
+++ b/lib/crewai/src/crewai/a2a/utils/agent_card.py
@@ -4,6 +4,7 @@ from __future__ import annotations

 import asyncio
 from collections.abc import MutableMapping
+import concurrent.futures
 from functools import lru_cache
 import ssl
 import time
@@ -138,14 +139,17 @@ def fetch_agent_card(
        ttl_hash = int(time.time() // cache_ttl)
        return _fetch_agent_card_cached(endpoint, auth_hash, timeout, ttl_hash)

-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
+    coro = afetch_agent_card(endpoint=endpoint, auth=auth, timeout=timeout)
    try:
-        return loop.run_until_complete(
-            afetch_agent_card(endpoint=endpoint, auth=auth, timeout=timeout)
-        )
-    finally:
-        loop.close()
+        asyncio.get_running_loop()
+        has_running_loop = True
+    except RuntimeError:
+        has_running_loop = False
+
+    if has_running_loop:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            return pool.submit(asyncio.run, coro).result()
+    return asyncio.run(coro)


 async def afetch_agent_card(
@@ -203,14 +207,17 @@ def _fetch_agent_card_cached(
    """Cached sync version of fetch_agent_card."""
    auth = _auth_store.get(auth_hash)

-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
+    coro = _afetch_agent_card_impl(endpoint=endpoint, auth=auth, timeout=timeout)
    try:
-        return loop.run_until_complete(
-            _afetch_agent_card_impl(endpoint=endpoint, auth=auth, timeout=timeout)
-        )
-    finally:
-        loop.close()
+        asyncio.get_running_loop()
+        has_running_loop = True
+    except RuntimeError:
+        has_running_loop = False
+
+    if has_running_loop:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            return pool.submit(asyncio.run, coro).result()
+    return asyncio.run(coro)


@cached(ttl=300, serializer=PickleSerializer())  # type: ignore[untyped-decorator]
--- a/lib/crewai/src/crewai/a2a/utils/delegation.py
+++ b/lib/crewai/src/crewai/a2a/utils/delegation.py
@@ -5,6 +5,7 @@ from __future__ import annotations
 import asyncio
 import base64
 from collections.abc import AsyncIterator, Callable, MutableMapping
+import concurrent.futures
 from contextlib import asynccontextmanager
 import logging
 from typing import TYPE_CHECKING, Any, Final, Literal
@@ -194,56 +195,43 @@ def execute_a2a_delegation(

    Returns:
        TaskStateResult with status, result/error, history, and agent_card.
-
-    Raises:
-        RuntimeError: If called from an async context with a running event loop.
    """
+    coro = aexecute_a2a_delegation(
+        endpoint=endpoint,
+        auth=auth,
+        timeout=timeout,
+        task_description=task_description,
+        context=context,
+        context_id=context_id,
+        task_id=task_id,
+        reference_task_ids=reference_task_ids,
+        metadata=metadata,
+        extensions=extensions,
+        conversation_history=conversation_history,
+        agent_id=agent_id,
+        agent_role=agent_role,
+        agent_branch=agent_branch,
+        response_model=response_model,
+        turn_number=turn_number,
+        updates=updates,
+        from_task=from_task,
+        from_agent=from_agent,
+        skill_id=skill_id,
+        client_extensions=client_extensions,
+        transport=transport,
+        accepted_output_modes=accepted_output_modes,
+        input_files=input_files,
+    )
    try:
        asyncio.get_running_loop()
-        raise RuntimeError(
-            "execute_a2a_delegation() cannot be called from an async context. "
-            "Use 'await aexecute_a2a_delegation()' instead."
-        )
-    except RuntimeError as e:
-        if "no running event loop" not in str(e).lower():
-            raise
+        has_running_loop = True
+    except RuntimeError:
+        has_running_loop = False

-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        return loop.run_until_complete(
-            aexecute_a2a_delegation(
-                endpoint=endpoint,
-                auth=auth,
-                timeout=timeout,
-                task_description=task_description,
-                context=context,
-                context_id=context_id,
-                task_id=task_id,
-                reference_task_ids=reference_task_ids,
-                metadata=metadata,
-                extensions=extensions,
-                conversation_history=conversation_history,
-                agent_id=agent_id,
-                agent_role=agent_role,
-                agent_branch=agent_branch,
-                response_model=response_model,
-                turn_number=turn_number,
-                updates=updates,
-                from_task=from_task,
-                from_agent=from_agent,
-                skill_id=skill_id,
-                client_extensions=client_extensions,
-                transport=transport,
-                accepted_output_modes=accepted_output_modes,
-                input_files=input_files,
-            )
-        )
-    finally:
-        try:
-            loop.run_until_complete(loop.shutdown_asyncgens())
-        finally:
-            loop.close()
+    if has_running_loop:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            return pool.submit(asyncio.run, coro).result()
+    return asyncio.run(coro)


 async def aexecute_a2a_delegation(
--- a/lib/crewai/src/crewai/agent/core.py
+++ b/lib/crewai/src/crewai/agent/core.py
@@ -22,6 +22,7 @@ from pydantic import (
 )
 from typing_extensions import Self

+from crewai.agent.planning_config import PlanningConfig
 from crewai.agent.utils import (
    ahandle_knowledge_retrieval,
    apply_training_data,
@@ -191,13 +192,23 @@ class Agent(BaseAgent):
        default="safe",
        description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).",
    )
-    reasoning: bool = Field(
+    planning_config: PlanningConfig | None = Field(
+        default=None,
+        description="Configuration for agent planning before task execution.",
+    )
+    planning: bool = Field(
        default=False,
        description="Whether the agent should reflect and create a plan before executing a task.",
    )
+    reasoning: bool = Field(
+        default=False,
+        description="[DEPRECATED: Use planning_config instead] Whether the agent should reflect and create a plan before executing a task.",
+        deprecated=True,
+    )
    max_reasoning_attempts: int | None = Field(
        default=None,
-        description="Maximum number of reasoning attempts before executing the task. If None, will try until ready.",
+        description="[DEPRECATED: Use planning_config.max_attempts instead] Maximum number of reasoning attempts before executing the task. If None, will try until ready.",
+        deprecated=True,
    )
    embedder: EmbedderConfig | None = Field(
        default=None,
@@ -264,8 +275,26 @@ class Agent(BaseAgent):
        if self.allow_code_execution:
            self._validate_docker_installation()

+        # Handle backward compatibility: convert reasoning=True to planning_config
+        if self.reasoning and self.planning_config is None:
+            import warnings
+
+            warnings.warn(
+                "The 'reasoning' parameter is deprecated. Use 'planning_config=PlanningConfig()' instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            self.planning_config = PlanningConfig(
+                max_attempts=self.max_reasoning_attempts,
+            )
+
        return self

+    @property
+    def planning_enabled(self) -> bool:
+        """Check if planning is enabled for this agent."""
+        return self.planning_config is not None or self.planning
+
    def _setup_agent_executor(self) -> None:
        if not self.cache_handler:
            self.cache_handler = CacheHandler()
@@ -334,7 +363,11 @@ class Agent(BaseAgent):
            ValueError: If the max execution time is not a positive integer.
            RuntimeError: If the agent execution fails for other reasons.
        """
-        handle_reasoning(self, task)
+        # Only call handle_reasoning for legacy CrewAgentExecutor
+        # For AgentExecutor, planning is handled in AgentExecutor.generate_plan()
+        if self.executor_class is not AgentExecutor:
+            handle_reasoning(self, task)
+
        self._inject_date_to_task(task)

        if self.tools_handler:
@@ -572,7 +605,10 @@ class Agent(BaseAgent):
            ValueError: If the max execution time is not a positive integer.
            RuntimeError: If the agent execution fails for other reasons.
        """
-        handle_reasoning(self, task)
+        if self.executor_class is not AgentExecutor:
+            handle_reasoning(
+                self, task
+            )  # we need this till CrewAgentExecutor migrates to AgentExecutor
        self._inject_date_to_task(task)

        if self.tools_handler:
@@ -1156,11 +1192,15 @@ class Agent(BaseAgent):
        # Process platform apps and MCP tools
        if self.apps:
            platform_tools = self.get_platform_tools(self.apps)
-            if platform_tools and self.tools is not None:
+            if platform_tools:
+                if self.tools is None:
+                    self.tools = []
                self.tools.extend(platform_tools)
        if self.mcps:
            mcps = self.get_mcp_tools(self.mcps)
-            if mcps and self.tools is not None:
+            if mcps:
+                if self.tools is None:
+                    self.tools = []
                self.tools.extend(mcps)

        # Prepare tools
@@ -1264,7 +1304,7 @@ class Agent(BaseAgent):
                    ),
                )
                start_time = time.time()
-                matches = agent_memory.recall(formatted_messages, limit=5)
+                matches = agent_memory.recall(formatted_messages, limit=20)
                memory_block = ""
                if matches:
                    memory_block = "Relevant memories:\n" + "\n".join(
@@ -1414,17 +1454,19 @@ class Agent(BaseAgent):
        except Exception as e:
            self._logger.log("error", f"Failed to save kickoff result to memory: {e}")

-    def _execute_and_build_output(
+    def _build_output_from_result(
        self,
+        result: dict[str, Any],
        executor: AgentExecutor,
-        inputs: dict[str, str],
        response_format: type[Any] | None = None,
    ) -> LiteAgentOutput:
-        """Execute the agent and build the output object.
+        """Build a LiteAgentOutput from an executor result dict.
+
+        Shared logic used by both sync and async execution paths.

        Args:
+            result: The result dictionary from executor.invoke / invoke_async.
            executor: The executor instance.
-            inputs: Input dictionary for execution.
            response_format: Optional response format.

        Returns:
@@ -1432,8 +1474,6 @@ class Agent(BaseAgent):
        """
        import json

-        # Execute the agent (this is called from sync path, so invoke returns dict)
-        result = cast(dict[str, Any], executor.invoke(inputs))
        output = result.get("output", "")

        # Handle response format conversion
@@ -1481,91 +1521,39 @@ class Agent(BaseAgent):
            else str(raw_output)
        )

+        todo_results = LiteAgentOutput.from_todo_items(executor.state.todos.items)
+
        return LiteAgentOutput(
            raw=raw_str,
            pydantic=formatted_result,
            agent_role=self.role,
            usage_metrics=usage_metrics.model_dump() if usage_metrics else None,
-            messages=executor.messages,
+            messages=list(executor.state.messages),
+            plan=executor.state.plan,
+            todos=todo_results,
+            replan_count=executor.state.replan_count,
+            last_replan_reason=executor.state.last_replan_reason,
        )

+    def _execute_and_build_output(
+        self,
+        executor: AgentExecutor,
+        inputs: dict[str, str],
+        response_format: type[Any] | None = None,
+    ) -> LiteAgentOutput:
+        """Execute the agent synchronously and build the output object."""
+        result = cast(dict[str, Any], executor.invoke(inputs))
+        return self._build_output_from_result(result, executor, response_format)
+
    async def _execute_and_build_output_async(
        self,
        executor: AgentExecutor,
        inputs: dict[str, str],
        response_format: type[Any] | None = None,
    ) -> LiteAgentOutput:
-        """Execute the agent asynchronously and build the output object.
-
-        This is the async version of _execute_and_build_output that uses
-        invoke_async() for native async execution within event loops.
-
-        Args:
-            executor: The executor instance.
-            inputs: Input dictionary for execution.
-            response_format: Optional response format.
-
-        Returns:
-            LiteAgentOutput with raw output, formatted result, and metrics.
-        """
-        import json
-
-        # Execute the agent asynchronously
+        """Execute the agent asynchronously and build the output object."""
        result = await executor.invoke_async(inputs)
-        output = result.get("output", "")
-
-        # Handle response format conversion
-        formatted_result: BaseModel | None = None
-        raw_output: str
-
-        if isinstance(output, BaseModel):
-            formatted_result = output
-            raw_output = output.model_dump_json()
-        elif response_format:
-            raw_output = str(output) if not isinstance(output, str) else output
-            try:
-                model_schema = generate_model_description(response_format)
-                schema = json.dumps(model_schema, indent=2)
-                instructions = self.i18n.slice("formatted_task_instructions").format(
-                    output_format=schema
-                )
-
-                converter = Converter(
-                    llm=self.llm,
-                    text=raw_output,
-                    model=response_format,
-                    instructions=instructions,
-                )
-
-                conversion_result = converter.to_pydantic()
-                if isinstance(conversion_result, BaseModel):
-                    formatted_result = conversion_result
-            except ConverterError:
-                pass  # Keep raw output if conversion fails
-        else:
-            raw_output = str(output) if not isinstance(output, str) else output
-
-        # Get token usage metrics
-        if isinstance(self.llm, BaseLLM):
-            usage_metrics = self.llm.get_token_usage_summary()
-        else:
-            usage_metrics = self._token_process.get_summary()
-
-        raw_str = (
-            raw_output
-            if isinstance(raw_output, str)
-            else raw_output.model_dump_json()
-            if isinstance(raw_output, BaseModel)
-            else str(raw_output)
-        )
-
-        return LiteAgentOutput(
-            raw=raw_str,
-            pydantic=formatted_result,
-            agent_role=self.role,
-            usage_metrics=usage_metrics.model_dump() if usage_metrics else None,
-            messages=executor.messages,
-        )
+        return self._build_output_from_result(result, executor, response_format)

    def _process_kickoff_guardrail(
        self,
--- a/lib/crewai/src/crewai/agent/planning_config.py
+++ b/lib/crewai/src/crewai/agent/planning_config.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+
+class PlanningConfig(BaseModel):
+    """Configuration for agent planning/reasoning before task execution.
+
+    This allows users to customize the planning behavior including prompts,
+    iteration limits, the LLM used for planning, and the reasoning effort
+    level that controls post-step observation and replanning behavior.
+
+    Note: To disable planning, don't pass a planning_config or set planning=False
+    on the Agent. The presence of a PlanningConfig enables planning.
+
+    Attributes:
+        reasoning_effort: Controls observation and replanning after each step.
+            - "low": Observe each step (validates success), but skip the
+              decide/replan/refine pipeline. Steps are marked complete and
+              execution continues linearly. Fastest option.
+            - "medium": Observe each step. On failure, trigger replanning.
+              On success, skip refinement and continue. Balanced option.
+            - "high": Full observation pipeline — observe every step, then
+              route through decide_next_action which can trigger early goal
+              achievement, full replanning, or lightweight refinement.
+              Most adaptive but adds latency per step.
+        max_attempts: Maximum number of planning refinement attempts.
+            If None, will continue until the agent indicates readiness.
+        max_steps: Maximum number of steps in the generated plan.
+        system_prompt: Custom system prompt for planning. Uses default if None.
+        plan_prompt: Custom prompt for creating the initial plan.
+        refine_prompt: Custom prompt for refining the plan.
+        llm: LLM to use for planning. Uses agent's LLM if None.
+
+    Example:
+        ```python
+        from crewai import Agent
+        from crewai.agent.planning_config import PlanningConfig
+
+        # Simple usage — fast, linear execution (default)
+        agent = Agent(
+            role="Researcher",
+            goal="Research topics",
+            backstory="Expert researcher",
+            planning_config=PlanningConfig(),
+        )
+
+        # Balanced — replan only when steps fail
+        agent = Agent(
+            role="Researcher",
+            goal="Research topics",
+            backstory="Expert researcher",
+            planning_config=PlanningConfig(
+                reasoning_effort="medium",
+            ),
+        )
+
+        # Full adaptive planning with refinement and replanning
+        agent = Agent(
+            role="Researcher",
+            goal="Research topics",
+            backstory="Expert researcher",
+            planning_config=PlanningConfig(
+                reasoning_effort="high",
+                max_attempts=3,
+                max_steps=10,
+                plan_prompt="Create a focused plan for: {description}",
+                llm="gpt-4o-mini",  # Use cheaper model for planning
+            ),
+        )
+        ```
+    """
+
+    reasoning_effort: Literal["low", "medium", "high"] = Field(
+        default="medium",
+        description=(
+            "Controls post-step observation and replanning behavior. "
+            "'low' observes steps but skips replanning/refinement (fastest). "
+            "'medium' observes and replans only on step failure (balanced). "
+            "'high' runs full observation pipeline with replanning, refinement, "
+            "and early goal detection (most adaptive, highest latency)."
+        ),
+    )
+    max_attempts: int | None = Field(
+        default=None,
+        description=(
+            "Maximum number of planning refinement attempts. "
+            "If None, will continue until the agent indicates readiness."
+        ),
+    )
+    max_steps: int = Field(
+        default=20,
+        description="Maximum number of steps in the generated plan.",
+        ge=1,
+    )
+    system_prompt: str | None = Field(
+        default=None,
+        description="Custom system prompt for planning. Uses default if None.",
+    )
+    plan_prompt: str | None = Field(
+        default=None,
+        description="Custom prompt for creating the initial plan.",
+    )
+    refine_prompt: str | None = Field(
+        default=None,
+        description="Custom prompt for refining the plan.",
+    )
+    max_replans: int = Field(
+        default=3,
+        description="Maximum number of full replanning attempts before finalizing.",
+        ge=0,
+    )
+    max_step_iterations: int = Field(
+        default=15,
+        description=(
+            "Maximum LLM iterations per step in the StepExecutor multi-turn loop. "
+            "Lower values make steps faster but less thorough."
+        ),
+        ge=1,
+    )
+    step_timeout: int | None = Field(
+        default=None,
+        description=(
+            "Maximum wall-clock seconds for a single step execution. "
+            "If exceeded, the step is marked as failed and observation decides "
+            "whether to continue or replan. None means no per-step timeout."
+        ),
+    )
+    llm: str | Any | None = Field(
+        default=None,
+        description="LLM to use for planning. Uses agent's LLM if None.",
+    )
+
+    model_config = {"arbitrary_types_allowed": True}
--- a/lib/crewai/src/crewai/agent/utils.py
+++ b/lib/crewai/src/crewai/agent/utils.py
@@ -28,13 +28,20 @@ if TYPE_CHECKING:


 def handle_reasoning(agent: Agent, task: Task) -> None:
-    """Handle the reasoning process for an agent before task execution.
+    """Handle the reasoning/planning process for an agent before task execution.
+
+    This function checks if planning is enabled for the agent and, if so,
+    creates a plan that gets appended to the task description.
+
+    Note: This function is used by CrewAgentExecutor (legacy path).
+    For AgentExecutor, planning is handled in AgentExecutor.generate_plan().

    Args:
        agent: The agent performing the task.
        task: The task to execute.
    """
-    if not agent.reasoning:
+    # Check if planning is enabled using the planning_enabled property
+    if not getattr(agent, "planning_enabled", False):
        return

    try:
@@ -43,13 +50,13 @@ def handle_reasoning(agent: Agent, task: Task) -> None:
            AgentReasoningOutput,
        )

-        reasoning_handler = AgentReasoning(task=task, agent=agent)
-        reasoning_output: AgentReasoningOutput = (
-            reasoning_handler.handle_agent_reasoning()
+        planning_handler = AgentReasoning(agent=agent, task=task)
+        planning_output: AgentReasoningOutput = (
+            planning_handler.handle_agent_reasoning()
        )
-        task.description += f"\n\nReasoning Plan:\n{reasoning_output.plan.plan}"
+        task.description += f"\n\nPlanning:\n{planning_output.plan.plan}"
    except Exception as e:
-        agent._logger.log("error", f"Error during reasoning process: {e!s}")
+        agent._logger.log("error", f"Error during planning: {e!s}")


 def build_task_prompt_with_schema(task: Task, task_prompt: str, i18n: I18N) -> str:
--- a/lib/crewai/src/crewai/agents/agent_builder/base_agent.py
+++ b/lib/crewai/src/crewai/agents/agent_builder/base_agent.py
@@ -38,7 +38,7 @@ from crewai.utilities.string_utils import interpolate_only


 _SLUG_RE: Final[re.Pattern[str]] = re.compile(
-    r"^(?:crewai-amp:)?[a-zA-Z0-9][a-zA-Z0-9_-]*(?:#\w+)?$"
+    r"^(?:crewai-amp:)?[a-zA-Z0-9][a-zA-Z0-9_-]*(?:#[\w-]+)?$"
 )


--- a/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
+++ b/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
@@ -30,12 +30,9 @@ class CrewAgentExecutorMixin:
        memory = getattr(self.agent, "memory", None) or (
            getattr(self.crew, "_memory", None) if self.crew else None
        )
-        if memory is None or not self.task or getattr(memory, "_read_only", False):
+        if memory is None or not self.task or memory.read_only:
            return
-        if (
-            f"Action: {sanitize_tool_name('Delegate work to coworker')}"
-            in output.text
-        ):
+        if f"Action: {sanitize_tool_name('Delegate work to coworker')}" in output.text:
            return
        try:
            raw = (
@@ -48,6 +45,4 @@ class CrewAgentExecutorMixin:
            if extracted:
                memory.remember_many(extracted, agent_role=self.agent.role)
        except Exception as e:
-            self.agent._logger.log(
-                "error", f"Failed to save to memory: {e}"
-            )
+            self.agent._logger.log("error", f"Failed to save to memory: {e}")
--- a/lib/crewai/src/crewai/agents/cache/init.py
+++ b/lib/crewai/src/crewai/agents/cache/init.py
@@ -1,5 +1,4 @@
 from crewai.agents.cache.cache_handler import CacheHandler


-
 __all__ = ["CacheHandler"]
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -9,6 +9,7 @@ from __future__ import annotations
 import asyncio
 from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor, as_completed
+import contextvars
 import inspect
 import logging
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -755,6 +756,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                with ThreadPoolExecutor(max_workers=max_workers) as pool:
                    futures = {
                        pool.submit(
+                            contextvars.copy_context().run,
                            self._execute_single_native_tool_call,
                            call_id=call_id,
                            func_name=func_name,
--- a/lib/crewai/src/crewai/agents/planner_observer.py
+++ b/lib/crewai/src/crewai/agents/planner_observer.py
@@ -0,0 +1,356 @@
+"""PlannerObserver: Observation phase after each step execution.
+
+Implements the "Observe" phase. After every step execution, the Planner
+analyzes what happened, what new information was learned, and whether the
+remaining plan is still valid.
+
+This is NOT an error detector — it runs on every step, including successes,
+to incorporate runtime observations into the remaining plan.
+
+Refinements are structured (StepRefinement objects) and applied directly
+from the observation result — no second LLM call required.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any
+
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.observation_events import (
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
+from crewai.utilities.i18n import I18N, get_i18n
+from crewai.utilities.llm_utils import create_llm
+from crewai.utilities.planning_types import StepObservation, TodoItem
+from crewai.utilities.types import LLMMessage
+
+
+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.task import Task
+
+logger = logging.getLogger(__name__)
+
+
+class PlannerObserver:
+    """Observes step execution results and decides on plan continuation.
+
+    After EVERY step execution, this class:
+    1. Analyzes what the step accomplished
+    2. Identifies new information learned
+    3. Decides if the remaining plan is still valid
+    4. Suggests lightweight refinements or triggers full replanning
+
+    LLM resolution (magical fallback):
+    - If ``agent.planning_config.llm`` is explicitly set → use that
+    - Otherwise → fall back to ``agent.llm`` (same LLM for everything)
+
+    Args:
+        agent: The agent instance (for LLM resolution and config).
+        task: Optional task context (for description and expected output).
+    """
+
+    def __init__(
+        self,
+        agent: Agent,
+        task: Task | None = None,
+        kickoff_input: str = "",
+    ) -> None:
+        self.agent = agent
+        self.task = task
+        self.kickoff_input = kickoff_input
+        self.llm = self._resolve_llm()
+        self._i18n: I18N = get_i18n()
+
+    def _resolve_llm(self) -> Any:
+        """Resolve which LLM to use for observation/planning.
+
+        Mirrors AgentReasoning._resolve_llm(): uses planning_config.llm
+        if explicitly set, otherwise falls back to agent.llm.
+
+        Returns:
+            The resolved LLM instance.
+        """
+        from crewai.llm import LLM
+
+        config = getattr(self.agent, "planning_config", None)
+        if config is not None and config.llm is not None:
+            if isinstance(config.llm, LLM):
+                return config.llm
+            return create_llm(config.llm)
+        return self.agent.llm
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def observe(
+        self,
+        completed_step: TodoItem,
+        result: str,
+        all_completed: list[TodoItem],
+        remaining_todos: list[TodoItem],
+    ) -> StepObservation:
+        """Observe a step's result and decide on plan continuation.
+
+        This runs after EVERY step execution — not just failures.
+
+        Args:
+            completed_step: The todo item that was just executed.
+            result: The final result string from the step.
+            all_completed: All previously completed todos (for context).
+            remaining_todos: The pending todos still in the plan.
+
+        Returns:
+            StepObservation with the Planner's analysis. Any suggested
+            refinements are structured StepRefinement objects ready for
+            direct application — no second LLM call needed.
+        """
+        agent_role = self.agent.role
+
+        crewai_event_bus.emit(
+            self.agent,
+            event=StepObservationStartedEvent(
+                agent_role=agent_role,
+                step_number=completed_step.step_number,
+                step_description=completed_step.description,
+                from_task=self.task,
+                from_agent=self.agent,
+            ),
+        )
+
+        messages = self._build_observation_messages(
+            completed_step, result, all_completed, remaining_todos
+        )
+
+        try:
+            response = self.llm.call(
+                messages,
+                response_model=StepObservation,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+
+            observation = self._parse_observation_response(response)
+
+            refinement_summaries = (
+                [
+                    f"Step {r.step_number}: {r.new_description}"
+                    for r in observation.suggested_refinements
+                ]
+                if observation.suggested_refinements
+                else None
+            )
+
+            crewai_event_bus.emit(
+                self.agent,
+                event=StepObservationCompletedEvent(
+                    agent_role=agent_role,
+                    step_number=completed_step.step_number,
+                    step_description=completed_step.description,
+                    step_completed_successfully=observation.step_completed_successfully,
+                    key_information_learned=observation.key_information_learned,
+                    remaining_plan_still_valid=observation.remaining_plan_still_valid,
+                    needs_full_replan=observation.needs_full_replan,
+                    replan_reason=observation.replan_reason,
+                    goal_already_achieved=observation.goal_already_achieved,
+                    suggested_refinements=refinement_summaries,
+                    from_task=self.task,
+                    from_agent=self.agent,
+                ),
+            )
+
+            return observation
+
+        except Exception as e:
+            logger.warning(
+                f"Observation LLM call failed: {e}. Defaulting to conservative replan."
+            )
+
+            crewai_event_bus.emit(
+                self.agent,
+                event=StepObservationFailedEvent(
+                    agent_role=agent_role,
+                    step_number=completed_step.step_number,
+                    step_description=completed_step.description,
+                    error=str(e),
+                    from_task=self.task,
+                    from_agent=self.agent,
+                ),
+            )
+
+            # Don't force a full replan — the step may have succeeded even if the
+            # observer LLM failed to parse the result. Defaulting to "continue" is
+            # far less disruptive than wiping the entire plan on every observer error.
+            return StepObservation(
+                step_completed_successfully=True,
+                key_information_learned="",
+                remaining_plan_still_valid=True,
+                needs_full_replan=False,
+            )
+
+    def _extract_task_section(self, text: str) -> str:
+        """Extract the ## Task body from a structured enriched instruction.
+
+        Falls back to the full text (capped at 2000 chars) for plain inputs.
+        """
+        for marker in ("\n## Task\n", "\n## Task:", "## Task\n"):
+            idx = text.find(marker)
+            if idx >= 0:
+                start = idx + len(marker)
+                for end_marker in ("\n---\n", "\n## "):
+                    end = text.find(end_marker, start)
+                    if end > 0:
+                        return text[start:end].strip()
+                return text[start : start + 2000].strip()
+        return text[:2000] if len(text) > 2000 else text
+
+    def apply_refinements(
+        self,
+        observation: StepObservation,
+        remaining_todos: list[TodoItem],
+    ) -> list[TodoItem]:
+        """Apply structured refinements from the observation directly to todo descriptions.
+
+        No LLM call needed — refinements are already structured StepRefinement
+        objects produced by the observation call. This is a pure in-memory update.
+
+        Args:
+            observation: The observation containing structured refinements.
+            remaining_todos: The pending todos to update in-place.
+
+        Returns:
+            The same todo list with updated descriptions where refinements applied.
+        """
+        if not observation.suggested_refinements:
+            return remaining_todos
+
+        todo_by_step: dict[int, TodoItem] = {t.step_number: t for t in remaining_todos}
+        for refinement in observation.suggested_refinements:
+            if refinement.step_number in todo_by_step and refinement.new_description:
+                todo_by_step[refinement.step_number].description = refinement.new_description
+
+        return remaining_todos
+
+    # ------------------------------------------------------------------
+    # Internal: Message building
+    # ------------------------------------------------------------------
+
+    def _build_observation_messages(
+        self,
+        completed_step: TodoItem,
+        result: str,
+        all_completed: list[TodoItem],
+        remaining_todos: list[TodoItem],
+    ) -> list[LLMMessage]:
+        """Build messages for the observation LLM call."""
+        task_desc = ""
+        task_goal = ""
+        if self.task:
+            task_desc = self.task.description or ""
+            task_goal = self.task.expected_output or ""
+        elif self.kickoff_input:
+            # Standalone kickoff path — no Task object, but we have the raw input.
+            # Extract just the ## Task section so the observer sees the actual goal,
+            # not the full enriched instruction with env/tools/verification noise.
+            task_desc = self._extract_task_section(self.kickoff_input)
+            task_goal = "Complete the task successfully"
+
+        system_prompt = self._i18n.retrieve("planning", "observation_system_prompt")
+
+        # Build context of what's been done
+        completed_summary = ""
+        if all_completed:
+            completed_lines = []
+            for todo in all_completed:
+                result_preview = (todo.result or "")[:200]
+                completed_lines.append(
+                    f"  Step {todo.step_number}: {todo.description}\n"
+                    f"    Result: {result_preview}"
+                )
+            completed_summary = "\n## Previously completed steps:\n" + "\n".join(
+                completed_lines
+            )
+
+        # Build remaining plan
+        remaining_summary = ""
+        if remaining_todos:
+            remaining_lines = [
+                f"  Step {todo.step_number}: {todo.description}"
+                for todo in remaining_todos
+            ]
+            remaining_summary = "\n## Remaining plan steps:\n" + "\n".join(
+                remaining_lines
+            )
+
+        user_prompt = self._i18n.retrieve("planning", "observation_user_prompt").format(
+            task_description=task_desc,
+            task_goal=task_goal,
+            completed_summary=completed_summary,
+            step_number=completed_step.step_number,
+            step_description=completed_step.description,
+            step_result=result,
+            remaining_summary=remaining_summary,
+        )
+
+        return [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+
+    @staticmethod
+    def _parse_observation_response(response: Any) -> StepObservation:
+        """Parse the LLM response into a StepObservation.
+
+        The LLM may return:
+        - A StepObservation instance directly (streaming + litellm path)
+        - A JSON string (non-streaming path serialises model_dump_json())
+        - A dict (some provider paths)
+        - Something else (unexpected)
+
+        We handle all cases to avoid silently falling back to a
+        hardcoded success default.
+        """
+
+        if isinstance(response, StepObservation):
+            return response
+
+        # JSON string path — most common miss before this fix
+        if isinstance(response, str):
+            text = response.strip()
+            try:
+                return StepObservation.model_validate_json(text)
+            except Exception:  # noqa: S110
+                pass
+            # Some LLMs wrap the JSON in markdown fences
+            if text.startswith("```"):
+                lines = text.split("\n")
+                # Strip first and last lines (``` markers)
+                inner = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
+                try:
+                    return StepObservation.model_validate_json(inner.strip())
+                except Exception:  # noqa: S110
+                    pass
+
+        # Dict path
+        if isinstance(response, dict):
+            try:
+                return StepObservation.model_validate(response)
+            except Exception:  # noqa: S110
+                pass
+
+        # Last resort — log what we got so it's diagnosable
+        logger.warning(
+            "Could not parse observation response (type=%s). "
+            "Falling back to default success observation. Preview: %.200s",
+            type(response).__name__,
+            str(response),
+        )
+        return StepObservation(
+            step_completed_successfully=True,
+            key_information_learned=str(response) if response else "",
+            remaining_plan_still_valid=True,
+        )
--- a/lib/crewai/src/crewai/agents/step_executor.py
+++ b/lib/crewai/src/crewai/agents/step_executor.py
@@ -0,0 +1,648 @@
+"""StepExecutor: Isolated executor for a single plan step.
+
+Implements the direct-action execution pattern from Plan-and-Act
+(arxiv 2503.09572): the Executor receives one step description,
+makes a single LLM call, executes any tool call returned, and
+returns the result immediately.
+
+There is no inner loop. Recovery from failure (retry, replan) is
+the responsibility of PlannerObserver and AgentExecutor — keeping
+this class single-purpose and fast.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from datetime import datetime
+import json
+import time
+from typing import TYPE_CHECKING, Any, cast
+
+from pydantic import BaseModel
+
+from crewai.agents.parser import AgentAction, AgentFinish
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.tool_usage_events import (
+    ToolUsageErrorEvent,
+    ToolUsageFinishedEvent,
+    ToolUsageStartedEvent,
+)
+from crewai.utilities.agent_utils import (
+    build_tool_calls_assistant_message,
+    check_native_tool_support,
+    enforce_rpm_limit,
+    execute_single_native_tool_call,
+    format_message_for_llm,
+    is_tool_call_list,
+    process_llm_response,
+    setup_native_tools,
+)
+from crewai.utilities.i18n import I18N, get_i18n
+from crewai.utilities.planning_types import TodoItem
+from crewai.utilities.printer import Printer
+from crewai.utilities.step_execution_context import StepExecutionContext, StepResult
+from crewai.utilities.string_utils import sanitize_tool_name
+from crewai.utilities.tool_utils import execute_tool_and_check_finality
+from crewai.utilities.types import LLMMessage
+
+
+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.agents.tools_handler import ToolsHandler
+    from crewai.crew import Crew
+    from crewai.llms.base_llm import BaseLLM
+    from crewai.task import Task
+    from crewai.tools.base_tool import BaseTool
+    from crewai.tools.structured_tool import CrewStructuredTool
+
+
+class StepExecutor:
+    """Executes a SINGLE todo item using direct-action execution.
+
+    The StepExecutor owns its own message list per invocation. It never reads
+    or writes the AgentExecutor's state. Results flow back via StepResult.
+
+    Execution pattern (per Plan-and-Act, arxiv 2503.09572):
+        1. Build messages from todo + context
+        2. Call LLM once (with or without native tools)
+        3. If tool call → execute it → return tool result
+        4. If text answer → return it directly
+        No inner loop — recovery is PlannerObserver's responsibility.
+
+    Args:
+        llm: The language model to use for execution.
+        tools: Structured tools available to the executor.
+        agent: The agent instance (for role/goal/verbose/config).
+        original_tools: Original BaseTool instances (needed for native tool schema).
+        tools_handler: Optional tools handler for caching and delegation tracking.
+        task: Optional task context.
+        crew: Optional crew context.
+        function_calling_llm: Optional separate LLM for function calling.
+        request_within_rpm_limit: Optional RPM limit function.
+        callbacks: Optional list of callbacks.
+        i18n: Optional i18n instance.
+    """
+
+    def __init__(
+        self,
+        llm: BaseLLM,
+        tools: list[CrewStructuredTool],
+        agent: Agent,
+        original_tools: list[BaseTool] | None = None,
+        tools_handler: ToolsHandler | None = None,
+        task: Task | None = None,
+        crew: Crew | None = None,
+        function_calling_llm: BaseLLM | Any | None = None,
+        request_within_rpm_limit: Callable[[], bool] | None = None,
+        callbacks: list[Any] | None = None,
+        i18n: I18N | None = None,
+    ) -> None:
+        self.llm = llm
+        self.tools = tools
+        self.agent = agent
+        self.original_tools = original_tools or []
+        self.tools_handler = tools_handler
+        self.task = task
+        self.crew = crew
+        self.function_calling_llm = function_calling_llm
+        self.request_within_rpm_limit = request_within_rpm_limit
+        self.callbacks = callbacks or []
+        self._i18n: I18N = i18n or get_i18n()
+        self._printer: Printer = Printer()
+
+        # Native tool support — set up once
+        self._use_native_tools = check_native_tool_support(
+            self.llm, self.original_tools
+        )
+        self._openai_tools: list[dict[str, Any]] = []
+        self._available_functions: dict[str, Callable[..., Any]] = {}
+        if self._use_native_tools and self.original_tools:
+            (
+                self._openai_tools,
+                self._available_functions,
+                _,
+            ) = setup_native_tools(self.original_tools)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def execute(
+        self,
+        todo: TodoItem,
+        context: StepExecutionContext,
+        max_step_iterations: int = 15,
+        step_timeout: int | None = None,
+    ) -> StepResult:
+        """Execute a single todo item using a multi-turn action loop.
+
+        Enforces the RPM limit, builds a fresh message list, then iterates
+        LLM call → tool execution → observation until the LLM signals it is
+        done (text answer) or max_step_iterations is reached.  Never touches
+        external AgentExecutor state.
+
+        Args:
+            todo: The todo item to execute.
+            context: Immutable context with task info and dependency results.
+            max_step_iterations: Maximum LLM iterations in the multi-turn loop.
+            step_timeout: Maximum wall-clock seconds for this step. None = no limit.
+
+        Returns:
+            StepResult with the outcome.
+        """
+        start_time = time.monotonic()
+        tool_calls_made: list[str] = []
+
+        try:
+            enforce_rpm_limit(self.request_within_rpm_limit)
+            messages = self._build_isolated_messages(todo, context)
+
+            if self._use_native_tools:
+                result_text = self._execute_native(
+                    messages, tool_calls_made,
+                    max_step_iterations=max_step_iterations,
+                    step_timeout=step_timeout,
+                    start_time=start_time,
+                )
+            else:
+                result_text = self._execute_text_parsed(
+                    messages, tool_calls_made,
+                    max_step_iterations=max_step_iterations,
+                    step_timeout=step_timeout,
+                    start_time=start_time,
+                )
+            self._validate_expected_tool_usage(todo, tool_calls_made)
+
+            elapsed = time.monotonic() - start_time
+            return StepResult(
+                success=True,
+                result=result_text,
+                tool_calls_made=tool_calls_made,
+                execution_time=elapsed,
+            )
+        except Exception as e:
+            elapsed = time.monotonic() - start_time
+            return StepResult(
+                success=False,
+                result="",
+                error=str(e),
+                tool_calls_made=tool_calls_made,
+                execution_time=elapsed,
+            )
+
+    # ------------------------------------------------------------------
+    # Internal: Message building
+    # ------------------------------------------------------------------
+
+    def _build_isolated_messages(
+        self, todo: TodoItem, context: StepExecutionContext
+    ) -> list[LLMMessage]:
+        """Build a fresh message list for this step's execution.
+
+        System prompt tells the LLM it is an Executor focused on one step.
+        User prompt provides the step description, dependencies, and tools.
+        """
+        system_prompt = self._build_system_prompt()
+        user_prompt = self._build_user_prompt(todo, context)
+
+        return [
+            format_message_for_llm(system_prompt, role="system"),
+            format_message_for_llm(user_prompt, role="user"),
+        ]
+
+    def _build_system_prompt(self) -> str:
+        """Build the Executor's system prompt."""
+        role = self.agent.role if self.agent else "Assistant"
+        goal = self.agent.goal if self.agent else "Complete tasks efficiently"
+        backstory = getattr(self.agent, "backstory", "") or ""
+
+        tools_section = ""
+        if self.tools and not self._use_native_tools:
+            tool_names = ", ".join(sanitize_tool_name(t.name) for t in self.tools)
+            tools_section = self._i18n.retrieve(
+                "planning", "step_executor_tools_section"
+            ).format(tool_names=tool_names)
+
+        return self._i18n.retrieve("planning", "step_executor_system_prompt").format(
+            role=role,
+            backstory=backstory,
+            goal=goal,
+            tools_section=tools_section,
+        )
+
+    def _extract_task_section(self, task_description: str) -> str:
+        """Extract the most relevant portion of the task description.
+
+        For structured descriptions (e.g. harbor_agent-style with ## Task
+        and ## Instructions sections), extracts just the task body so the
+        executor sees the requirements without duplicating tool/verification
+        instructions that are already in the system prompt.
+
+        For plain descriptions, returns the full text (up to 2000 chars).
+        """
+        # Try to extract between "## Task" and the next "---" separator
+        # or next "##" heading — this isolates the task spec from env/tool noise.
+        for marker in ("\n## Task\n", "\n## Task:", "## Task\n"):
+            idx = task_description.find(marker)
+            if idx >= 0:
+                start = idx + len(marker)
+                # End at the first horizontal rule or next top-level ## section
+                for end_marker in ("\n---\n", "\n## "):
+                    end = task_description.find(end_marker, start)
+                    if end > 0:
+                        return task_description[start:end].strip()
+                # No end marker — take up to 2000 chars
+                return task_description[start : start + 2000].strip()
+
+        # No structured format — use the full description, reasonably truncated
+        if len(task_description) > 2000:
+            return task_description[:2000] + "\n... [truncated]"
+        return task_description
+
+    def _build_user_prompt(self, todo: TodoItem, context: StepExecutionContext) -> str:
+        """Build the user prompt for this specific step."""
+        parts: list[str] = []
+
+        # Include overall task context so the executor knows the full goal and
+        # required output format/location — critical for knowing WHAT to produce.
+        # We extract only the task body (not tool instructions or verification
+        # sections) to avoid duplicating directives already in the system prompt.
+        if context.task_description:
+            task_section = self._extract_task_section(context.task_description)
+            if task_section:
+                parts.append(
+                    self._i18n.retrieve(
+                        "planning", "step_executor_task_context"
+                    ).format(
+                        task_context=task_section,
+                    )
+                )
+
+        parts.append(
+            self._i18n.retrieve("planning", "step_executor_user_prompt").format(
+                step_description=todo.description,
+            )
+        )
+
+        if todo.tool_to_use:
+            parts.append(
+                self._i18n.retrieve("planning", "step_executor_suggested_tool").format(
+                    tool_to_use=todo.tool_to_use,
+                )
+            )
+
+        # Include dependency results (final results only, no traces)
+        if context.dependency_results:
+            parts.append(
+                self._i18n.retrieve("planning", "step_executor_context_header")
+            )
+            for step_num, result in sorted(context.dependency_results.items()):
+                parts.append(
+                    self._i18n.retrieve(
+                        "planning", "step_executor_context_entry"
+                    ).format(step_number=step_num, result=result)
+                )
+
+        parts.append(self._i18n.retrieve("planning", "step_executor_complete_step"))
+
+        return "\n".join(parts)
+
+    # ------------------------------------------------------------------
+    # Internal: Multi-turn execution loop
+    # ------------------------------------------------------------------
+
+    def _execute_text_parsed(
+        self,
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+        max_step_iterations: int = 15,
+        step_timeout: int | None = None,
+        start_time: float | None = None,
+    ) -> str:
+        """Execute step using text-parsed tool calling with a multi-turn loop.
+
+        Iterates LLM call → tool execution → observation until the LLM
+        produces a Final Answer or max_step_iterations is reached.
+        This allows the agent to: run a command, see the output, adjust its
+        approach, and run another command — all within a single plan step.
+        """
+        use_stop_words = self.llm.supports_stop_words() if self.llm else False
+        last_tool_result = ""
+
+        for _ in range(max_step_iterations):
+            # Check step timeout
+            if step_timeout and start_time:
+                elapsed = time.monotonic() - start_time
+                if elapsed >= step_timeout:
+                    return last_tool_result or f"Step timed out after {elapsed:.0f}s"
+            answer = self.llm.call(
+                messages,
+                callbacks=self.callbacks,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+
+            if not answer:
+                raise ValueError("Empty response from LLM")
+
+            answer_str = str(answer)
+            formatted = process_llm_response(answer_str, use_stop_words)
+
+            if isinstance(formatted, AgentFinish):
+                return str(formatted.output)
+
+            if isinstance(formatted, AgentAction):
+                tool_calls_made.append(formatted.tool)
+                tool_result = self._execute_text_tool_with_events(formatted)
+                last_tool_result = tool_result
+                # Append the assistant's reasoning + action, then the observation.
+                # _build_observation_message handles vision sentinels so the LLM
+                # receives an image content block instead of raw base64 text.
+                messages.append({"role": "assistant", "content": answer_str})
+                messages.append(self._build_observation_message(tool_result))
+                continue
+
+            # Raw text response with no Final Answer marker — treat as done
+            return answer_str
+
+        # Max iterations reached — return the last tool result we accumulated
+        return last_tool_result
+
+    def _execute_text_tool_with_events(self, formatted: AgentAction) -> str:
+        """Execute text-parsed tool calls with tool usage events."""
+        args_dict = self._parse_tool_args(formatted.tool_input)
+        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+        started_at = datetime.now()
+        crewai_event_bus.emit(
+            self,
+            event=ToolUsageStartedEvent(
+                tool_name=formatted.tool,
+                tool_args=args_dict,
+                from_agent=self.agent,
+                from_task=self.task,
+                agent_key=agent_key,
+            ),
+        )
+
+        try:
+            fingerprint_context = {}
+            if (
+                self.agent
+                and hasattr(self.agent, "security_config")
+                and hasattr(self.agent.security_config, "fingerprint")
+            ):
+                fingerprint_context = {
+                    "agent_fingerprint": str(self.agent.security_config.fingerprint)
+                }
+
+            tool_result = execute_tool_and_check_finality(
+                agent_action=formatted,
+                fingerprint_context=fingerprint_context,
+                tools=self.tools,
+                i18n=self._i18n,
+                agent_key=self.agent.key if self.agent else None,
+                agent_role=self.agent.role if self.agent else None,
+                tools_handler=self.tools_handler,
+                task=self.task,
+                agent=self.agent,
+                function_calling_llm=self.function_calling_llm,
+                crew=self.crew,
+            )
+        except Exception as e:
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageErrorEvent(
+                    tool_name=formatted.tool,
+                    tool_args=args_dict,
+                    from_agent=self.agent,
+                    from_task=self.task,
+                    agent_key=agent_key,
+                    error=e,
+                ),
+            )
+            raise
+
+        crewai_event_bus.emit(
+            self,
+            event=ToolUsageFinishedEvent(
+                output=str(tool_result.result),
+                tool_name=formatted.tool,
+                tool_args=args_dict,
+                from_agent=self.agent,
+                from_task=self.task,
+                agent_key=agent_key,
+                started_at=started_at,
+                finished_at=datetime.now(),
+            ),
+        )
+        return str(tool_result.result)
+
+    def _parse_tool_args(self, tool_input: Any) -> dict[str, Any]:
+        """Parse tool args from the parser output into a dict payload for events."""
+        if isinstance(tool_input, dict):
+            return tool_input
+        if isinstance(tool_input, str):
+            stripped_input = tool_input.strip()
+            if not stripped_input:
+                return {}
+            try:
+                parsed = json.loads(stripped_input)
+                if isinstance(parsed, dict):
+                    return parsed
+                return {"input": parsed}
+            except json.JSONDecodeError:
+                return {"input": stripped_input}
+        return {"input": str(tool_input)}
+
+    # ------------------------------------------------------------------
+    # Internal: Vision support
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _parse_vision_sentinel(raw: str) -> tuple[str, str] | None:
+        """Parse a VISION_IMAGE sentinel into (media_type, base64_data), or None."""
+        prefix = "VISION_IMAGE:"
+        if not raw.startswith(prefix):
+            return None
+        rest = raw[len(prefix) :]
+        sep = rest.find(":")
+        if sep <= 0:
+            return None
+        return rest[:sep], rest[sep + 1 :]
+
+    @staticmethod
+    def _build_observation_message(tool_result: str) -> LLMMessage:
+        """Build an observation message, converting vision sentinels to image blocks.
+
+        When a tool returns a VISION_IMAGE sentinel (e.g. from read_image),
+        we build a multimodal content block so the LLM can actually *see*
+        the image rather than receiving a wall of base64 text.
+
+        Uses the standard image_url / data-URI format so each LLM provider's
+        SDK (OpenAI, LiteLLM, etc.) handles the provider-specific conversion.
+
+        Format: ``VISION_IMAGE:<media_type>:<base64_data>``
+        """
+        parsed = StepExecutor._parse_vision_sentinel(tool_result)
+        if parsed:
+            media_type, b64_data = parsed
+            return {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Observation: Here is the image:"},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:{media_type};base64,{b64_data}",
+                        },
+                    },
+                ],
+            }
+        return {"role": "user", "content": f"Observation: {tool_result}"}
+
+    def _validate_expected_tool_usage(
+        self,
+        todo: TodoItem,
+        tool_calls_made: list[str],
+    ) -> None:
+        """Fail step execution when a required tool is configured but not called."""
+        expected_tool = getattr(todo, "tool_to_use", None)
+        if not expected_tool:
+            return
+        expected_tool_name = sanitize_tool_name(expected_tool)
+        available_tool_names = {
+            sanitize_tool_name(tool.name)
+            for tool in self.tools
+            if getattr(tool, "name", "")
+        } | set(self._available_functions.keys())
+        if expected_tool_name not in available_tool_names:
+            return
+        called_names = {sanitize_tool_name(name) for name in tool_calls_made}
+        if expected_tool_name not in called_names:
+            raise ValueError(
+                f"Expected tool '{expected_tool_name}' was not called "
+                f"for step {todo.step_number}."
+            )
+
+    def _execute_native(
+        self,
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+        max_step_iterations: int = 15,
+        step_timeout: int | None = None,
+        start_time: float | None = None,
+    ) -> str:
+        """Execute step using native function calling with a multi-turn loop.
+
+        Iterates LLM call → tool execution → appended results until the LLM
+        returns a text answer (no more tool calls) or max_step_iterations is
+        reached.  This lets the agent run a shell command, observe the output,
+        correct mistakes, and issue follow-up commands — all within one step.
+        """
+        accumulated_results: list[str] = []
+
+        for _ in range(max_step_iterations):
+            # Check step timeout
+            if step_timeout and start_time:
+                elapsed = time.monotonic() - start_time
+                if elapsed >= step_timeout:
+                    return "\n\n".join(accumulated_results) if accumulated_results else f"Step timed out after {elapsed:.0f}s"
+            answer = self.llm.call(
+                messages,
+                tools=self._openai_tools,
+                callbacks=self.callbacks,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+
+            if not answer:
+                raise ValueError("Empty response from LLM")
+
+            if isinstance(answer, BaseModel):
+                return answer.model_dump_json()
+
+            if isinstance(answer, list) and answer and is_tool_call_list(answer):
+                # _execute_native_tool_calls appends assistant + tool messages
+                # to `messages` as a side-effect, so the next LLM call will
+                # see the full conversation history including tool outputs.
+                result = self._execute_native_tool_calls(
+                    answer, messages, tool_calls_made
+                )
+                accumulated_results.append(result)
+                continue
+
+            # Text answer → LLM decided the step is done
+            return str(answer)
+
+        # Max iterations reached — return everything we accumulated
+        return "\n".join(filter(None, accumulated_results))
+
+    def _execute_native_tool_calls(
+        self,
+        tool_calls: list[Any],
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str:
+        """Execute a batch of native tool calls and return their results.
+
+        Returns the result of the first tool marked result_as_answer if any,
+        otherwise returns all tool results concatenated.
+        """
+        assistant_message, _reports = build_tool_calls_assistant_message(tool_calls)
+        if assistant_message:
+            messages.append(assistant_message)
+
+        tool_results: list[str] = []
+        for tool_call in tool_calls:
+            call_result = execute_single_native_tool_call(
+                tool_call,
+                available_functions=self._available_functions,
+                original_tools=self.original_tools,
+                structured_tools=self.tools,
+                tools_handler=self.tools_handler,
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+                event_source=self,
+                printer=self._printer,
+                verbose=bool(self.agent and self.agent.verbose),
+            )
+
+            if call_result.func_name:
+                tool_calls_made.append(call_result.func_name)
+
+            if call_result.result_as_answer:
+                return str(call_result.result)
+
+            if call_result.tool_message:
+                raw_content = call_result.tool_message.get("content", "")
+                if isinstance(raw_content, str):
+                    parsed = self._parse_vision_sentinel(raw_content)
+                    if parsed:
+                        media_type, b64_data = parsed
+                        # Replace the sentinel with a standard image_url content block.
+                        # Each provider's _format_messages handles conversion to
+                        # its native format (e.g. Anthropic image blocks).
+                        modified: LLMMessage = cast(
+                            LLMMessage, dict(call_result.tool_message)
+                        )
+                        modified["content"] = [
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:{media_type};base64,{b64_data}",
+                                },
+                            }
+                        ]
+                        messages.append(modified)
+                        tool_results.append("[image]")
+                    else:
+                        messages.append(call_result.tool_message)
+                        if raw_content:
+                            tool_results.append(raw_content)
+                else:
+                    messages.append(call_result.tool_message)
+                    if raw_content:
+                        tool_results.append(str(raw_content))
+
+        return "\n".join(tool_results) if tool_results else ""
--- a/lib/crewai/src/crewai/cli/authentication/init.py
+++ b/lib/crewai/src/crewai/cli/authentication/init.py
@@ -1,5 +1,4 @@
 from crewai.cli.authentication.main import AuthenticationCommand


-
 __all__ = ["AuthenticationCommand"]
--- a/lib/crewai/src/crewai/cli/create_crew.py
+++ b/lib/crewai/src/crewai/cli/create_crew.py
@@ -143,7 +143,7 @@ def create_folder_structure(
        (folder_path / "src" / folder_name).mkdir(parents=True)
        (folder_path / "src" / folder_name / "tools").mkdir(parents=True)
        (folder_path / "src" / folder_name / "config").mkdir(parents=True)
-        
+
        # Copy AGENTS.md to project root (top-level projects only)
        package_dir = Path(__file__).parent
        agents_md_src = package_dir / "templates" / "AGENTS.md"
--- a/lib/crewai/src/crewai/cli/create_flow.py
+++ b/lib/crewai/src/crewai/cli/create_flow.py
@@ -1,5 +1,5 @@
-import shutil
 from pathlib import Path
+import shutil

 import click

--- a/lib/crewai/src/crewai/cli/plus_api.py
+++ b/lib/crewai/src/crewai/cli/plus_api.py
@@ -49,8 +49,13 @@ class PlusAPI:
        with httpx.Client(trust_env=False, verify=verify) as client:
            return client.request(method, url, headers=self.headers, **kwargs)

-    def login_to_tool_repository(self) -> httpx.Response:
-        return self._make_request("POST", f"{self.TOOLS_RESOURCE}/login")
+    def login_to_tool_repository(
+        self, user_identifier: str | None = None
+    ) -> httpx.Response:
+        payload = {}
+        if user_identifier:
+            payload["user_identifier"] = user_identifier
+        return self._make_request("POST", f"{self.TOOLS_RESOURCE}/login", json=payload)

    def get_tool(self, handle: str) -> httpx.Response:
        return self._make_request("GET", f"{self.TOOLS_RESOURCE}/{handle}")
--- a/lib/crewai/src/crewai/cli/templates/crew/pyproject.toml
+++ b/lib/crewai/src/crewai/cli/templates/crew/pyproject.toml
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
 authors = [{ name = "Your Name", email = "you@example.com" }]
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]==1.10.1a1"
+    "crewai[tools]==1.10.2a1"
 ]

 [project.scripts]
--- a/lib/crewai/src/crewai/cli/templates/flow/pyproject.toml
+++ b/lib/crewai/src/crewai/cli/templates/flow/pyproject.toml
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
 authors = [{ name = "Your Name", email = "you@example.com" }]
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]==1.10.1a1"
+    "crewai[tools]==1.10.2a1"
 ]

 [project.scripts]
--- a/lib/crewai/src/crewai/cli/templates/tool/pyproject.toml
+++ b/lib/crewai/src/crewai/cli/templates/tool/pyproject.toml
@@ -5,7 +5,7 @@ description = "Power up your crews with {{folder_name}}"
 readme = "README.md"
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]==1.10.1a1"
+    "crewai[tools]==1.10.2a1"
 ]

 [tool.crewai]
--- a/lib/crewai/src/crewai/cli/tools/main.py
+++ b/lib/crewai/src/crewai/cli/tools/main.py
@@ -23,6 +23,7 @@ from crewai.cli.utils import (
    tree_copy,
    tree_find_and_replace,
 )
+from crewai.events.listeners.tracing.utils import get_user_id


 console = Console()
@@ -169,7 +170,9 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
        console.print(f"Successfully installed {handle}", style="bold green")

    def login(self) -> None:
-        login_response = self.plus_api_client.login_to_tool_repository()
+        login_response = self.plus_api_client.login_to_tool_repository(
+            user_identifier=get_user_id()
+        )

        if login_response.status_code != 200:
            console.print(
--- a/lib/crewai/src/crewai/crews/init.py
+++ b/lib/crewai/src/crewai/crews/init.py
@@ -1,5 +1,4 @@
 from crewai.crews.crew_output import CrewOutput


-
 __all__ = ["CrewOutput"]
--- a/lib/crewai/src/crewai/events/base_event_listener.py
+++ b/lib/crewai/src/crewai/events/base_event_listener.py
@@ -23,4 +23,3 @@ class BaseEventListener(ABC):
        Args:
            crewai_event_bus: The event bus to register listeners on.
        """
-        pass
--- a/lib/crewai/src/crewai/events/event_listener.py
+++ b/lib/crewai/src/crewai/events/event_listener.py
@@ -75,6 +75,14 @@ from crewai.events.types.mcp_events import (
    MCPToolExecutionFailedEvent,
    MCPToolExecutionStartedEvent,
 )
+from crewai.events.types.observation_events import (
+    GoalAchievedEarlyEvent,
+    PlanRefinementEvent,
+    PlanReplanTriggeredEvent,
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
 from crewai.events.types.reasoning_events import (
    AgentReasoningCompletedEvent,
    AgentReasoningFailedEvent,
@@ -535,6 +543,64 @@ class EventListener(BaseEventListener):
                event.error,
            )

+        # ----------- OBSERVATION EVENTS (Plan-and-Execute) -----------
+
+        @crewai_event_bus.on(StepObservationStartedEvent)
+        def on_step_observation_started(
+            _: Any, event: StepObservationStartedEvent
+        ) -> None:
+            self.formatter.handle_observation_started(
+                event.agent_role,
+                event.step_number,
+                event.step_description,
+            )
+
+        @crewai_event_bus.on(StepObservationCompletedEvent)
+        def on_step_observation_completed(
+            _: Any, event: StepObservationCompletedEvent
+        ) -> None:
+            self.formatter.handle_observation_completed(
+                event.agent_role,
+                event.step_number,
+                event.step_completed_successfully,
+                event.remaining_plan_still_valid,
+                event.key_information_learned,
+                event.needs_full_replan,
+                event.goal_already_achieved,
+            )
+
+        @crewai_event_bus.on(StepObservationFailedEvent)
+        def on_step_observation_failed(
+            _: Any, event: StepObservationFailedEvent
+        ) -> None:
+            self.formatter.handle_observation_failed(
+                event.step_number,
+                event.error,
+            )
+
+        @crewai_event_bus.on(PlanRefinementEvent)
+        def on_plan_refinement(_: Any, event: PlanRefinementEvent) -> None:
+            self.formatter.handle_plan_refinement(
+                event.step_number,
+                event.refined_step_count,
+                event.refinements,
+            )
+
+        @crewai_event_bus.on(PlanReplanTriggeredEvent)
+        def on_plan_replan_triggered(_: Any, event: PlanReplanTriggeredEvent) -> None:
+            self.formatter.handle_plan_replan(
+                event.replan_reason,
+                event.replan_count,
+                event.completed_steps_preserved,
+            )
+
+        @crewai_event_bus.on(GoalAchievedEarlyEvent)
+        def on_goal_achieved_early(_: Any, event: GoalAchievedEarlyEvent) -> None:
+            self.formatter.handle_goal_achieved_early(
+                event.steps_completed,
+                event.steps_remaining,
+            )
+
        # ----------- AGENT LOGGING EVENTS -----------

        @crewai_event_bus.on(AgentLogsStartedEvent)
--- a/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py
+++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py
@@ -15,6 +15,7 @@ from crewai.cli.plus_api import PlusAPI
 from crewai.cli.version import get_crewai_version
 from crewai.events.listeners.tracing.types import TraceEvent
 from crewai.events.listeners.tracing.utils import (
+    get_user_id,
    is_tracing_enabled_in_context,
    should_auto_collect_first_time_traces,
 )
@@ -120,7 +121,6 @@ class TraceBatchManager:
            payload = {
                "trace_id": self.current_batch.batch_id,
                "execution_type": execution_metadata.get("execution_type", "crew"),
-                "user_identifier": execution_metadata.get("user_context", None),
                "execution_context": {
                    "crew_fingerprint": execution_metadata.get("crew_fingerprint"),
                    "crew_name": execution_metadata.get("crew_name", None),
@@ -140,6 +140,7 @@ class TraceBatchManager:
            }
            if use_ephemeral:
                payload["ephemeral_trace_id"] = self.current_batch.batch_id
+                payload["user_identifier"] = get_user_id()

            response = (
                self.plus_api.initialize_ephemeral_trace_batch(payload)
--- a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py
+++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py
@@ -93,6 +93,14 @@ from crewai.events.types.memory_events import (
    MemorySaveFailedEvent,
    MemorySaveStartedEvent,
 )
+from crewai.events.types.observation_events import (
+    GoalAchievedEarlyEvent,
+    PlanRefinementEvent,
+    PlanReplanTriggeredEvent,
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
 from crewai.events.types.reasoning_events import (
    AgentReasoningCompletedEvent,
    AgentReasoningFailedEvent,
@@ -437,6 +445,39 @@ class TraceCollectionListener(BaseEventListener):
        ) -> None:
            self._handle_action_event("agent_reasoning_failed", source, event)

+        # Observation events (Plan-and-Execute)
+        @event_bus.on(StepObservationStartedEvent)
+        def on_step_observation_started(
+            source: Any, event: StepObservationStartedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_started", source, event)
+
+        @event_bus.on(StepObservationCompletedEvent)
+        def on_step_observation_completed(
+            source: Any, event: StepObservationCompletedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_completed", source, event)
+
+        @event_bus.on(StepObservationFailedEvent)
+        def on_step_observation_failed(
+            source: Any, event: StepObservationFailedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_failed", source, event)
+
+        @event_bus.on(PlanRefinementEvent)
+        def on_plan_refinement(source: Any, event: PlanRefinementEvent) -> None:
+            self._handle_action_event("plan_refinement", source, event)
+
+        @event_bus.on(PlanReplanTriggeredEvent)
+        def on_plan_replan_triggered(
+            source: Any, event: PlanReplanTriggeredEvent
+        ) -> None:
+            self._handle_action_event("plan_replan_triggered", source, event)
+
+        @event_bus.on(GoalAchievedEarlyEvent)
+        def on_goal_achieved_early(source: Any, event: GoalAchievedEarlyEvent) -> None:
+            self._handle_action_event("goal_achieved_early", source, event)
+
        @event_bus.on(KnowledgeRetrievalStartedEvent)
        def on_knowledge_retrieval_started(
            source: Any, event: KnowledgeRetrievalStartedEvent
--- a/lib/crewai/src/crewai/events/types/llm_events.py
+++ b/lib/crewai/src/crewai/events/types/llm_events.py
@@ -86,3 +86,11 @@ class LLMStreamChunkEvent(LLMEventBase):
    tool_call: ToolCall | None = None
    call_type: LLMCallType | None = None
    response_id: str | None = None
+
+
+class LLMThinkingChunkEvent(LLMEventBase):
+    """Event emitted when a thinking/reasoning chunk is received from a thinking model"""
+
+    type: str = "llm_thinking_chunk"
+    chunk: str
+    response_id: str | None = None
--- a/lib/crewai/src/crewai/events/types/observation_events.py
+++ b/lib/crewai/src/crewai/events/types/observation_events.py
@@ -0,0 +1,99 @@
+"""Observation events for the Plan-and-Execute architecture.
+
+Emitted during the Observation phase (PLAN-AND-ACT Section 3.3) when the
+PlannerObserver analyzes step execution results and decides on plan
+continuation, refinement, or replanning.
+"""
+
+from typing import Any
+
+from crewai.events.base_events import BaseEvent
+
+
+class ObservationEvent(BaseEvent):
+    """Base event for observation phase events."""
+
+    type: str
+    agent_role: str
+    step_number: int
+    step_description: str = ""
+    from_task: Any | None = None
+    from_agent: Any | None = None
+
+    def __init__(self, **data: Any) -> None:
+        super().__init__(**data)
+        self._set_task_params(data)
+        self._set_agent_params(data)
+
+
+class StepObservationStartedEvent(ObservationEvent):
+    """Emitted when the Planner begins observing a step's result.
+
+    Fires after every step execution, before the observation LLM call.
+    """
+
+    type: str = "step_observation_started"
+
+
+class StepObservationCompletedEvent(ObservationEvent):
+    """Emitted when the Planner finishes observing a step's result.
+
+    Contains the full observation analysis: what was learned, whether
+    the plan is still valid, and what action to take next.
+    """
+
+    type: str = "step_observation_completed"
+    step_completed_successfully: bool = True
+    key_information_learned: str = ""
+    remaining_plan_still_valid: bool = True
+    needs_full_replan: bool = False
+    replan_reason: str | None = None
+    goal_already_achieved: bool = False
+    suggested_refinements: list[str] | None = None
+
+
+class StepObservationFailedEvent(ObservationEvent):
+    """Emitted when the observation LLM call itself fails.
+
+    The system defaults to continuing the plan when this happens,
+    but the event allows monitoring/alerting on observation failures.
+    """
+
+    type: str = "step_observation_failed"
+    error: str = ""
+
+
+class PlanRefinementEvent(ObservationEvent):
+    """Emitted when the Planner refines upcoming step descriptions.
+
+    This is the lightweight refinement path — no full replan, just
+    sharpening pending todo descriptions based on new information.
+    """
+
+    type: str = "plan_refinement"
+    refined_step_count: int = 0
+    refinements: list[str] | None = None
+
+
+class PlanReplanTriggeredEvent(ObservationEvent):
+    """Emitted when the Planner triggers a full replan.
+
+    The remaining plan was deemed fundamentally wrong and will be
+    regenerated from scratch, preserving completed step results.
+    """
+
+    type: str = "plan_replan_triggered"
+    replan_reason: str = ""
+    replan_count: int = 0
+    completed_steps_preserved: int = 0
+
+
+class GoalAchievedEarlyEvent(ObservationEvent):
+    """Emitted when the Planner detects the goal was achieved early.
+
+    Remaining steps will be skipped and execution will finalize.
+    """
+
+    type: str = "goal_achieved_early"
+    steps_remaining: int = 0
+    steps_completed: int = 0
--- a/lib/crewai/src/crewai/events/types/reasoning_events.py
+++ b/lib/crewai/src/crewai/events/types/reasoning_events.py
@@ -9,7 +9,7 @@ class ReasoningEvent(BaseEvent):
    type: str
    attempt: int = 1
    agent_role: str
-    task_id: str
+    task_id: str | None = None
    task_name: str | None = None
    from_task: Any | None = None
    agent_id: str | None = None
--- a/lib/crewai/src/crewai/events/utils/console_formatter.py
+++ b/lib/crewai/src/crewai/events/utils/console_formatter.py
@@ -936,6 +936,152 @@ To enable tracing, do any one of these:
        )
        self.print_panel(error_content, "❌ Reasoning Error", "red")

+    # ----------- OBSERVATION EVENTS (Plan-and-Execute) -----------
+
+    def handle_observation_started(
+        self,
+        agent_role: str,
+        step_number: int,
+        step_description: str,
+    ) -> None:
+        """Handle step observation started event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Observation Started\n", style="cyan bold")
+        content.append("Agent: ", style="white")
+        content.append(f"{agent_role}\n", style="cyan")
+        content.append("Step: ", style="white")
+        content.append(f"{step_number}\n", style="cyan")
+        if step_description:
+            desc_preview = step_description[:80] + (
+                "..." if len(step_description) > 80 else ""
+            )
+            content.append("Description: ", style="white")
+            content.append(f"{desc_preview}\n", style="cyan")
+
+        self.print_panel(content, "🔍 Observing Step Result", "cyan")
+
+    def handle_observation_completed(
+        self,
+        agent_role: str,
+        step_number: int,
+        step_completed: bool,
+        plan_valid: bool,
+        key_info: str,
+        needs_replan: bool,
+        goal_achieved: bool,
+    ) -> None:
+        """Handle step observation completed event."""
+        if not self.verbose:
+            return
+
+        if goal_achieved:
+            style = "green"
+            status = "Goal Achieved Early"
+        elif needs_replan:
+            style = "yellow"
+            status = "Replan Needed"
+        elif plan_valid:
+            style = "green"
+            status = "Plan Valid — Continue"
+        else:
+            style = "red"
+            status = "Step Failed"
+
+        content = Text()
+        content.append("Observation Complete\n", style=f"{style} bold")
+        content.append("Step: ", style="white")
+        content.append(f"{step_number}\n", style=style)
+        content.append("Status: ", style="white")
+        content.append(f"{status}\n", style=style)
+        if key_info:
+            info_preview = key_info[:120] + ("..." if len(key_info) > 120 else "")
+            content.append("Learned: ", style="white")
+            content.append(f"{info_preview}\n", style=style)
+
+        self.print_panel(content, "🔍 Observation Result", style)
+
+    def handle_observation_failed(
+        self,
+        step_number: int,
+        error: str,
+    ) -> None:
+        """Handle step observation failure event."""
+        if not self.verbose:
+            return
+
+        error_content = self.create_status_content(
+            "Observation Failed",
+            "Error",
+            "red",
+            Step=str(step_number),
+            Error=error,
+        )
+        self.print_panel(error_content, "❌ Observation Error", "red")
+
+    def handle_plan_refinement(
+        self,
+        step_number: int,
+        refined_count: int,
+        refinements: list[str] | None,
+    ) -> None:
+        """Handle plan refinement event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Plan Refined\n", style="cyan bold")
+        content.append("After Step: ", style="white")
+        content.append(f"{step_number}\n", style="cyan")
+        content.append("Steps Updated: ", style="white")
+        content.append(f"{refined_count}\n", style="cyan")
+        if refinements:
+            for r in refinements[:3]:
+                content.append(f"  • {r[:80]}\n", style="white")
+
+        self.print_panel(content, "✏️ Plan Refinement", "cyan")
+
+    def handle_plan_replan(
+        self,
+        reason: str,
+        replan_count: int,
+        preserved_count: int,
+    ) -> None:
+        """Handle plan replan triggered event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Full Replan Triggered\n", style="yellow bold")
+        content.append("Reason: ", style="white")
+        content.append(f"{reason}\n", style="yellow")
+        content.append("Replan #: ", style="white")
+        content.append(f"{replan_count}\n", style="yellow")
+        content.append("Preserved Steps: ", style="white")
+        content.append(f"{preserved_count}\n", style="yellow")
+
+        self.print_panel(content, "🔄 Dynamic Replan", "yellow")
+
+    def handle_goal_achieved_early(
+        self,
+        steps_completed: int,
+        steps_remaining: int,
+    ) -> None:
+        """Handle goal achieved early event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Goal Achieved Early!\n", style="green bold")
+        content.append("Completed: ", style="white")
+        content.append(f"{steps_completed} steps\n", style="green")
+        content.append("Skipped: ", style="white")
+        content.append(f"{steps_remaining} remaining steps\n", style="green")
+
+        self.print_panel(content, "🎯 Early Goal Achievement", "green")
+
    # ----------- AGENT LOGGING EVENTS -----------

    def handle_agent_logs_started(
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
--- a/lib/crewai/src/crewai/flow/flow.py
+++ b/lib/crewai/src/crewai/flow/flow.py
@@ -497,6 +497,50 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
    def __bool__(self) -> bool:
        return bool(self._list)

+    def index(self, value: T, start: SupportsIndex = 0, stop: SupportsIndex | None = None) -> int:  # type: ignore[override]
+        if stop is None:
+            return self._list.index(value, start)
+        return self._list.index(value, start, stop)
+
+    def count(self, value: T) -> int:
+        return self._list.count(value)
+
+    def sort(self, *, key: Any = None, reverse: bool = False) -> None:
+        with self._lock:
+            self._list.sort(key=key, reverse=reverse)
+
+    def reverse(self) -> None:
+        with self._lock:
+            self._list.reverse()
+
+    def copy(self) -> list[T]:
+        return self._list.copy()
+
+    def __add__(self, other: list[T]) -> list[T]:
+        return self._list + other
+
+    def __radd__(self, other: list[T]) -> list[T]:
+        return other + self._list
+
+    def __iadd__(self, other: Iterable[T]) -> LockedListProxy[T]:
+        with self._lock:
+            self._list += list(other)
+        return self
+
+    def __mul__(self, n: SupportsIndex) -> list[T]:
+        return self._list * n
+
+    def __rmul__(self, n: SupportsIndex) -> list[T]:
+        return self._list * n
+
+    def __imul__(self, n: SupportsIndex) -> LockedListProxy[T]:
+        with self._lock:
+            self._list *= n
+        return self
+
+    def __reversed__(self) -> Iterator[T]:
+        return reversed(self._list)
+
    def __eq__(self, other: object) -> bool:
        """Compare based on the underlying list contents."""
        if isinstance(other, LockedListProxy):
@@ -579,6 +623,23 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    def __bool__(self) -> bool:
        return bool(self._dict)

+    def copy(self) -> dict[str, T]:
+        return self._dict.copy()
+
+    def __or__(self, other: dict[str, T]) -> dict[str, T]:
+        return self._dict | other
+
+    def __ror__(self, other: dict[str, T]) -> dict[str, T]:
+        return other | self._dict
+
+    def __ior__(self, other: dict[str, T]) -> LockedDictProxy[T]:
+        with self._lock:
+            self._dict |= other
+        return self
+
+    def __reversed__(self) -> Iterator[str]:
+        return reversed(self._dict)
+
    def __eq__(self, other: object) -> bool:
        """Compare based on the underlying dict contents."""
        if isinstance(other, LockedDictProxy):
@@ -620,6 +681,10 @@ class StateProxy(Generic[T]):
        if name in ("_proxy_state", "_proxy_lock"):
            object.__setattr__(self, name, value)
        else:
+            if isinstance(value, LockedListProxy):
+                value = value._list
+            elif isinstance(value, LockedDictProxy):
+                value = value._dict
            with object.__getattribute__(self, "_proxy_lock"):
                setattr(object.__getattribute__(self, "_proxy_state"), name, value)

@@ -692,6 +757,7 @@ class FlowMeta(type):
                    condition_type = getattr(
                        attr_value, "__condition_type__", OR_CONDITION
                    )
+
                    if (
                        hasattr(attr_value, "__trigger_condition__")
                        and attr_value.__trigger_condition__ is not None
@@ -769,6 +835,7 @@ class Flow(Generic[T], metaclass=FlowMeta):
        persistence: FlowPersistence | None = None,
        tracing: bool | None = None,
        suppress_flow_events: bool = False,
+        max_method_calls: int = 100,
        **kwargs: Any,
    ) -> None:
        """Initialize a new Flow instance.
@@ -777,6 +844,7 @@ class Flow(Generic[T], metaclass=FlowMeta):
            persistence: Optional persistence backend for storing flow states
            tracing: Whether to enable tracing. True=always enable, False=always disable, None=check environment/user settings
            suppress_flow_events: Whether to suppress flow event emissions (internal use)
+            max_method_calls: Maximum times a single method can be called per execution before raising RecursionError
            **kwargs: Additional state values to initialize or override
        """
        # Initialize basic instance attributes
@@ -792,6 +860,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
        self._completed_methods: set[FlowMethodName] = (
            set()
        )  # Track completed methods for reload
+        self._method_call_counts: dict[FlowMethodName, int] = {}
+        self._max_method_calls = max_method_calls
        self._persistence: FlowPersistence | None = persistence
        self._is_execution_resuming: bool = False
        self._event_futures: list[Future[None]] = []
@@ -1828,6 +1898,7 @@ class Flow(Generic[T], metaclass=FlowMeta):
                self._method_outputs.clear()
                self._pending_and_listeners.clear()
                self._clear_or_listeners()
+                self._method_call_counts.clear()
            else:
                # Only enter resumption mode if there are completed methods to
                # replay.  When _completed_methods is empty (e.g. a pure
@@ -2569,6 +2640,16 @@ class Flow(Generic[T], metaclass=FlowMeta):
            - Skips execution if method was already completed (e.g., after reload)
            - Catches and logs any exceptions during execution, preventing individual listener failures from breaking the entire flow
        """
+        count = self._method_call_counts.get(listener_name, 0) + 1
+        if count > self._max_method_calls:
+            raise RecursionError(
+                f"Method '{listener_name}' has been called {self._max_method_calls} times in "
+                f"this flow execution, which indicates an infinite loop. "
+                f"This commonly happens when a @listen label matches the "
+                f"method's own name."
+            )
+        self._method_call_counts[listener_name] = count
+
        if listener_name in self._completed_methods:
            if self._is_execution_resuming:
                # During resumption, skip execution but continue listeners
--- a/lib/crewai/src/crewai/flow/human_feedback.py
+++ b/lib/crewai/src/crewai/flow/human_feedback.py
@@ -408,7 +408,7 @@ def human_feedback(
                emit=list(emit) if emit else None,
                default_outcome=default_outcome,
                metadata=metadata or {},
-                llm=llm if isinstance(llm, str) else None,
+                llm=llm if isinstance(llm, str) else getattr(llm, "model", None),
            )

            # Determine effective provider:
--- a/lib/crewai/src/crewai/flow/persistence/sqlite.py
+++ b/lib/crewai/src/crewai/flow/persistence/sqlite.py
@@ -72,7 +72,8 @@ class SQLiteFlowPersistence(FlowPersistence):

    def init_db(self) -> None:
        """Create the necessary tables if they don't exist."""
-        with sqlite3.connect(self.db_path) as conn:
+        with sqlite3.connect(self.db_path, timeout=30) as conn:
+            conn.execute("PRAGMA journal_mode=WAL")
            # Main state table
            conn.execute(
                """
@@ -136,7 +137,7 @@ class SQLiteFlowPersistence(FlowPersistence):
                f"state_data must be either a Pydantic BaseModel or dict, got {type(state_data)}"
            )

-        with sqlite3.connect(self.db_path) as conn:
+        with sqlite3.connect(self.db_path, timeout=30) as conn:
            conn.execute(
                """
            INSERT INTO flow_states (
@@ -163,7 +164,7 @@ class SQLiteFlowPersistence(FlowPersistence):
        Returns:
            The most recent state as a dictionary, or None if no state exists
        """
-        with sqlite3.connect(self.db_path) as conn:
+        with sqlite3.connect(self.db_path, timeout=30) as conn:
            cursor = conn.execute(
                """
            SELECT state_json
@@ -213,7 +214,7 @@ class SQLiteFlowPersistence(FlowPersistence):
        self.save_state(flow_uuid, context.method_name, state_data)

        # Save pending feedback context
-        with sqlite3.connect(self.db_path) as conn:
+        with sqlite3.connect(self.db_path, timeout=30) as conn:
            # Use INSERT OR REPLACE to handle re-triggering feedback on same flow
            conn.execute(
                """
@@ -248,7 +249,7 @@ class SQLiteFlowPersistence(FlowPersistence):
        # Import here to avoid circular imports
        from crewai.flow.async_feedback.types import PendingFeedbackContext

-        with sqlite3.connect(self.db_path) as conn:
+        with sqlite3.connect(self.db_path, timeout=30) as conn:
            cursor = conn.execute(
                """
            SELECT state_json, context_json
@@ -272,7 +273,7 @@ class SQLiteFlowPersistence(FlowPersistence):
        Args:
            flow_uuid: Unique identifier for the flow instance
        """
-        with sqlite3.connect(self.db_path) as conn:
+        with sqlite3.connect(self.db_path, timeout=30) as conn:
            conn.execute(
                """
            DELETE FROM pending_feedback
--- a/lib/crewai/src/crewai/lite_agent.py
+++ b/lib/crewai/src/crewai/lite_agent.py
@@ -600,7 +600,7 @@ class LiteAgent(FlowTrackable, BaseModel):

    def _save_to_memory(self, output_text: str) -> None:
        """Extract discrete memories from the run and remember each. No-op if _memory is None or read-only."""
-        if self._memory is None or getattr(self._memory, "_read_only", False):
+        if self._memory is None or self._memory.read_only:
            return
        input_str = self._get_last_user_content() or "User request"
        try:
--- a/lib/crewai/src/crewai/lite_agent_output.py
+++ b/lib/crewai/src/crewai/lite_agent_output.py
@@ -6,9 +6,27 @@ from typing import Any

 from pydantic import BaseModel, Field

+from crewai.utilities.planning_types import TodoItem
 from crewai.utilities.types import LLMMessage


+class TodoExecutionResult(BaseModel):
+    """Summary of a single todo execution."""
+
+    step_number: int = Field(description="Step number in the plan")
+    description: str = Field(description="What the todo was supposed to do")
+    tool_used: str | None = Field(
+        default=None, description="Tool that was used for this step"
+    )
+    status: str = Field(description="Final status: completed, failed, pending")
+    result: str | None = Field(
+        default=None, description="Result or error message from execution"
+    )
+    depends_on: list[int] = Field(
+        default_factory=list, description="Step numbers this depended on"
+    )
+
+
 class LiteAgentOutput(BaseModel):
    """Class that represents the result of a LiteAgent execution."""

@@ -24,12 +42,75 @@ class LiteAgentOutput(BaseModel):
    )
    messages: list[LLMMessage] = Field(description="Messages of the agent", default=[])

+    plan: str | None = Field(
+        default=None, description="The execution plan that was generated, if any"
+    )
+    todos: list[TodoExecutionResult] = Field(
+        default_factory=list,
+        description="List of todos that were executed with their results",
+    )
+    replan_count: int = Field(
+        default=0, description="Number of times the plan was regenerated"
+    )
+    last_replan_reason: str | None = Field(
+        default=None, description="Reason for the last replan, if any"
+    )
+
+    @classmethod
+    def from_todo_items(cls, todo_items: list[TodoItem]) -> list[TodoExecutionResult]:
+        """Convert TodoItem objects to TodoExecutionResult summaries.
+
+        Args:
+            todo_items: List of TodoItem objects from execution.
+
+        Returns:
+            List of TodoExecutionResult summaries.
+        """
+        return [
+            TodoExecutionResult(
+                step_number=item.step_number,
+                description=item.description,
+                tool_used=item.tool_to_use,
+                status=item.status,
+                result=item.result,
+                depends_on=item.depends_on,
+            )
+            for item in todo_items
+        ]
+
    def to_dict(self) -> dict[str, Any]:
        """Convert pydantic_output to a dictionary."""
        if self.pydantic:
            return self.pydantic.model_dump()
        return {}

+    @property
+    def completed_todos(self) -> list[TodoExecutionResult]:
+        """Get only the completed todos."""
+        return [t for t in self.todos if t.status == "completed"]
+
+    @property
+    def failed_todos(self) -> list[TodoExecutionResult]:
+        """Get only the failed todos."""
+        return [t for t in self.todos if t.status == "failed"]
+
+    @property
+    def had_plan(self) -> bool:
+        """Check if the agent executed with a plan."""
+        return self.plan is not None or len(self.todos) > 0
+
    def __str__(self) -> str:
        """Return the raw output as a string."""
        return self.raw
+
+    def __repr__(self) -> str:
+        """Return a detailed representation including todo summary."""
+        parts = [f"LiteAgentOutput(role={self.agent_role!r}"]
+        if self.todos:
+            completed = len(self.completed_todos)
+            total = len(self.todos)
+            parts.append(f", todos={completed}/{total} completed")
+        if self.replan_count > 0:
+            parts.append(f", replans={self.replan_count}")
+        parts.append(")")
+        return "".join(parts)
--- a/lib/crewai/src/crewai/llms/base_llm.py
+++ b/lib/crewai/src/crewai/llms/base_llm.py
@@ -26,6 +26,7 @@ from crewai.events.types.llm_events import (
    LLMCallStartedEvent,
    LLMCallType,
    LLMStreamChunkEvent,
+    LLMThinkingChunkEvent,
 )
 from crewai.events.types.tool_usage_events import (
    ToolUsageErrorEvent,
@@ -368,9 +369,6 @@ class BaseLLM(ABC):
        """Emit LLM call started event."""
        from crewai.utilities.serialization import to_serializable

-        if not hasattr(crewai_event_bus, "emit"):
-            raise ValueError("crewai_event_bus does not have an emit method") from None
-
        crewai_event_bus.emit(
            self,
            event=LLMCallStartedEvent(
@@ -416,9 +414,6 @@ class BaseLLM(ABC):
        from_agent: Agent | None = None,
    ) -> None:
        """Emit LLM call failed event."""
-        if not hasattr(crewai_event_bus, "emit"):
-            raise ValueError("crewai_event_bus does not have an emit method") from None
-
        crewai_event_bus.emit(
            self,
            event=LLMCallFailedEvent(
@@ -449,9 +444,6 @@ class BaseLLM(ABC):
            call_type: The type of LLM call (LLM_CALL or TOOL_CALL).
            response_id: Unique ID for a particular LLM response, chunks have same response_id.
        """
-        if not hasattr(crewai_event_bus, "emit"):
-            raise ValueError("crewai_event_bus does not have an emit method") from None
-
        crewai_event_bus.emit(
            self,
            event=LLMStreamChunkEvent(
@@ -465,6 +457,32 @@ class BaseLLM(ABC):
            ),
        )

+    def _emit_thinking_chunk_event(
+        self,
+        chunk: str,
+        from_task: Task | None = None,
+        from_agent: Agent | None = None,
+        response_id: str | None = None,
+    ) -> None:
+        """Emit thinking/reasoning chunk event from a thinking model.
+
+        Args:
+            chunk: The thinking text content.
+            from_task: The task that initiated the call.
+            from_agent: The agent that initiated the call.
+            response_id: Unique ID for a particular LLM response.
+        """
+        crewai_event_bus.emit(
+            self,
+            event=LLMThinkingChunkEvent(
+                chunk=chunk,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_id=response_id,
+                call_id=get_current_call_id(),
+            ),
+        )
+
    def _handle_tool_execution(
        self,
        function_name: str,
--- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
@@ -22,7 +22,12 @@ if TYPE_CHECKING:

 try:
    from anthropic import Anthropic, AsyncAnthropic, transform_schema
-    from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
+    from anthropic.types import (
+        Message,
+        TextBlock,
+        ThinkingBlock,
+        ToolUseBlock,
+    )
    from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
    import httpx
 except ImportError:
@@ -31,6 +36,11 @@ except ImportError:
    ) from None


+TOOL_SEARCH_TOOL_TYPES: Final[tuple[str, ...]] = (
+    "tool_search_tool_regex_20251119",
+    "tool_search_tool_bm25_20251119",
+)
+
 ANTHROPIC_FILES_API_BETA: Final = "files-api-2025-04-14"
 ANTHROPIC_STRUCTURED_OUTPUTS_BETA: Final = "structured-outputs-2025-11-13"

@@ -117,6 +127,22 @@ class AnthropicThinkingConfig(BaseModel):
    budget_tokens: int | None = None


+class AnthropicToolSearchConfig(BaseModel):
+    """Configuration for Anthropic's server-side tool search.
+
+    When enabled, tools marked with defer_loading=True are not loaded into
+    context immediately. Instead, Claude uses the tool search tool to
+    dynamically discover and load relevant tools on-demand.
+
+    Attributes:
+        type: The tool search variant to use.
+            - "regex": Claude constructs regex patterns to search tool names/descriptions.
+            - "bm25": Claude uses natural language queries to search tools.
+    """
+
+    type: Literal["regex", "bm25"] = "bm25"
+
+
 class AnthropicCompletion(BaseLLM):
    """Anthropic native completion implementation.

@@ -140,6 +166,7 @@ class AnthropicCompletion(BaseLLM):
        interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
        thinking: AnthropicThinkingConfig | None = None,
        response_format: type[BaseModel] | None = None,
+        tool_search: AnthropicToolSearchConfig | bool | None = None,
        **kwargs: Any,
    ):
        """Initialize Anthropic chat completion client.
@@ -159,6 +186,10 @@ class AnthropicCompletion(BaseLLM):
            interceptor: HTTP interceptor for modifying requests/responses at transport level.
            response_format: Pydantic model for structured output. When provided, responses
                will be validated against this model schema.
+            tool_search: Enable Anthropic's server-side tool search. When True, uses "bm25"
+                variant by default. Pass an AnthropicToolSearchConfig to choose "regex" or
+                "bm25". When enabled, tools are automatically marked with defer_loading=True
+                and a tool search tool is injected into the tools list.
            **kwargs: Additional parameters
        """
        super().__init__(
@@ -190,6 +221,13 @@ class AnthropicCompletion(BaseLLM):
        self.thinking = thinking
        self.previous_thinking_blocks: list[ThinkingBlock] = []
        self.response_format = response_format
+        # Tool search config
+        if tool_search is True:
+            self.tool_search = AnthropicToolSearchConfig()
+        elif isinstance(tool_search, AnthropicToolSearchConfig):
+            self.tool_search = tool_search
+        else:
+            self.tool_search = None
        # Model-specific settings
        self.is_claude_3 = "claude-3" in model.lower()
        self.supports_tools = True
@@ -432,10 +470,23 @@ class AnthropicCompletion(BaseLLM):
        # Handle tools for Claude 3+
        if tools and self.supports_tools:
            converted_tools = self._convert_tools_for_interference(tools)
+
+            # When tool_search is enabled and there are 2+ regular tools,
+            # inject the search tool and mark regular tools with defer_loading.
+            # With only 1 tool there's nothing to search — skip tool search
+            # entirely so the normal forced tool_choice optimisation still works.
+            regular_tools = [
+                t
+                for t in converted_tools
+                if t.get("type", "") not in TOOL_SEARCH_TOOL_TYPES
+            ]
+            if self.tool_search is not None and len(regular_tools) >= 2:
+                converted_tools = self._apply_tool_search(converted_tools)
+
            params["tools"] = converted_tools

-            if available_functions and len(converted_tools) == 1:
-                tool_name = converted_tools[0].get("name")
+            if available_functions and len(regular_tools) == 1:
+                tool_name = regular_tools[0].get("name")
                if tool_name and tool_name in available_functions:
                    params["tool_choice"] = {"type": "tool", "name": tool_name}

@@ -454,6 +505,12 @@ class AnthropicCompletion(BaseLLM):
        anthropic_tools = []

        for tool in tools:
+            # Pass through tool search tool definitions unchanged
+            tool_type = tool.get("type", "")
+            if tool_type in TOOL_SEARCH_TOOL_TYPES:
+                anthropic_tools.append(tool)
+                continue
+
            if "input_schema" in tool and "name" in tool and "description" in tool:
                anthropic_tools.append(tool)
                continue
@@ -466,15 +523,15 @@ class AnthropicCompletion(BaseLLM):
                logging.error(f"Error converting tool to Anthropic format: {e}")
                raise e

-            anthropic_tool = {
+            anthropic_tool: dict[str, Any] = {
                "name": name,
                "description": description,
            }

            if parameters and isinstance(parameters, dict):
-                anthropic_tool["input_schema"] = parameters  # type: ignore[assignment]
+                anthropic_tool["input_schema"] = parameters
            else:
-                anthropic_tool["input_schema"] = {  # type: ignore[assignment]
+                anthropic_tool["input_schema"] = {
                    "type": "object",
                    "properties": {},
                    "required": [],
@@ -484,6 +541,55 @@ class AnthropicCompletion(BaseLLM):

        return anthropic_tools

+    def _apply_tool_search(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        """Inject tool search tool and mark regular tools with defer_loading.
+
+        When tool_search is enabled, this method:
+        1. Adds the appropriate tool search tool definition (regex or bm25)
+        2. Marks all regular tools with defer_loading=True so they are only
+           loaded when Claude discovers them via search
+
+        Args:
+            tools: Converted tool definitions in Anthropic format.
+
+        Returns:
+            Updated tools list with tool search tool prepended and
+            regular tools marked as deferred.
+        """
+        if self.tool_search is None:
+            return tools
+
+        # Check if a tool search tool is already present (user passed one manually)
+        has_search_tool = any(
+            t.get("type", "") in TOOL_SEARCH_TOOL_TYPES for t in tools
+        )
+
+        result: list[dict[str, Any]] = []
+
+        if not has_search_tool:
+            # Map config type to API type identifier
+            type_map = {
+                "regex": "tool_search_tool_regex_20251119",
+                "bm25": "tool_search_tool_bm25_20251119",
+            }
+            tool_type = type_map[self.tool_search.type]
+            # Tool search tool names follow the convention: tool_search_tool_{variant}
+            tool_name = f"tool_search_tool_{self.tool_search.type}"
+            result.append({"type": tool_type, "name": tool_name})
+
+        for tool in tools:
+            # Don't modify tool search tools
+            if tool.get("type", "") in TOOL_SEARCH_TOOL_TYPES:
+                result.append(tool)
+                continue
+
+            # Mark regular tools as deferred if not already set
+            if "defer_loading" not in tool:
+                tool = {**tool, "defer_loading": True}
+            result.append(tool)
+
+        return result
+
    def _extract_thinking_block(
        self, content_block: Any
    ) -> ThinkingBlock | dict[str, Any] | None:
@@ -512,6 +618,50 @@ class AnthropicCompletion(BaseLLM):
            return redacted_block
        return None

+    @staticmethod
+    def _convert_image_blocks(content: Any) -> Any:
+        """Convert OpenAI-style image_url blocks to Anthropic image blocks.
+
+        Upstream code (e.g. StepExecutor) uses the standard ``image_url``
+        format with a ``data:`` URI.  Anthropic rejects that — it requires
+        ``{"type": "image", "source": {"type": "base64", ...}}``.
+
+        Non-list content and blocks that are not ``image_url`` are passed
+        through unchanged.
+        """
+        if not isinstance(content, list):
+            return content
+
+        converted: list[dict[str, Any]] = []
+        for block in content:
+            if not isinstance(block, dict) or block.get("type") != "image_url":
+                converted.append(block)
+                continue
+
+            image_info = block.get("image_url", {})
+            url = image_info.get("url", "") if isinstance(image_info, dict) else ""
+            if url.startswith("data:") and ";base64," in url:
+                # Parse  data:<media_type>;base64,<data>
+                header, b64_data = url.split(";base64,", 1)
+                media_type = (
+                    header.split("data:", 1)[1] if "data:" in header else "image/png"
+                )
+                converted.append(
+                    {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": media_type,
+                            "data": b64_data,
+                        },
+                    }
+                )
+            else:
+                # Non-data URI — pass through as-is (Anthropic supports url source)
+                converted.append(block)
+
+        return converted
+
    def _format_messages_for_anthropic(
        self, messages: str | list[LLMMessage]
    ) -> tuple[list[LLMMessage], str | None]:
@@ -550,10 +700,11 @@ class AnthropicCompletion(BaseLLM):
                tool_call_id = message.get("tool_call_id", "")
                if not tool_call_id:
                    raise ValueError("Tool message missing required tool_call_id")
+                tool_content = self._convert_image_blocks(content) if content else ""
                tool_result = {
                    "type": "tool_result",
                    "tool_use_id": tool_call_id,
-                    "content": content if content else "",
+                    "content": tool_content,
                }
                pending_tool_results.append(tool_result)
            elif role == "assistant":
@@ -612,7 +763,12 @@ class AnthropicCompletion(BaseLLM):

                role_str = role if role is not None else "user"
                if isinstance(content, list):
-                    formatted_messages.append({"role": role_str, "content": content})
+                    formatted_messages.append(
+                        {
+                            "role": role_str,
+                            "content": self._convert_image_blocks(content),
+                        }
+                    )
                else:
                    content_str = content if content is not None else ""
                    formatted_messages.append(
--- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
@@ -1781,6 +1781,7 @@ class BedrockCompletion(BaseLLM):

        converse_messages: list[LLMMessage] = []
        system_message: str | None = None
+        pending_tool_results: list[dict[str, Any]] = []

        for message in formatted_messages:
            role = message.get("role")
@@ -1794,56 +1795,62 @@ class BedrockCompletion(BaseLLM):
                    system_message += f"\n\n{content}"
                else:
                    system_message = cast(str, content)
-            elif role == "assistant" and tool_calls:
-                # Convert OpenAI-style tool_calls to Bedrock toolUse format
-                bedrock_content = []
-                for tc in tool_calls:
-                    func = tc.get("function", {})
-                    tool_use_block = {
-                        "toolUse": {
-                            "toolUseId": tc.get("id", f"call_{id(tc)}"),
-                            "name": func.get("name", ""),
-                            "input": func.get("arguments", {})
-                            if isinstance(func.get("arguments"), dict)
-                            else json.loads(func.get("arguments", "{}") or "{}"),
-                        }
-                    }
-                    bedrock_content.append(tool_use_block)
-                converse_messages.append(
-                    {"role": "assistant", "content": bedrock_content}
-                )
            elif role == "tool":
                if not tool_call_id:
                    raise ValueError("Tool message missing required tool_call_id")
-                converse_messages.append(
+                pending_tool_results.append(
                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "toolResult": {
-                                    "toolUseId": tool_call_id,
-                                    "content": [
-                                        {"text": str(content) if content else ""}
-                                    ],
-                                }
-                            }
-                        ],
+                        "toolResult": {
+                            "toolUseId": tool_call_id,
+                            "content": [{"text": str(content) if content else ""}],
+                        }
                    }
                )
            else:
-                # Convert to Converse API format with proper content structure
-                if isinstance(content, list):
-                    # Already formatted as multimodal content blocks
-                    converse_messages.append({"role": role, "content": content})
-                else:
-                    # String content - wrap in text block
-                    text_content = content if content else ""
+                if pending_tool_results:
                    converse_messages.append(
-                        {"role": role, "content": [{"text": text_content}]}
+                        {"role": "user", "content": pending_tool_results}
                    )
+                    pending_tool_results = []
+
+                if role == "assistant" and tool_calls:
+                    # Convert OpenAI-style tool_calls to Bedrock toolUse format
+                    bedrock_content = []
+                    for tc in tool_calls:
+                        func = tc.get("function", {})
+                        tool_use_block = {
+                            "toolUse": {
+                                "toolUseId": tc.get("id", f"call_{id(tc)}"),
+                                "name": func.get("name", ""),
+                                "input": func.get("arguments", {})
+                                if isinstance(func.get("arguments"), dict)
+                                else json.loads(func.get("arguments", "{}") or "{}"),
+                            }
+                        }
+                        bedrock_content.append(tool_use_block)
+                    converse_messages.append(
+                        {"role": "assistant", "content": bedrock_content}
+                    )
+                else:
+                    # Convert to Converse API format with proper content structure
+                    if isinstance(content, list):
+                        # Already formatted as multimodal content blocks
+                        converse_messages.append({"role": role, "content": content})
+                    else:
+                        # String content - wrap in text block
+                        text_content = content if content else ""
+                        converse_messages.append(
+                            {"role": role, "content": [{"text": text_content}]}
+                        )
+
+        if pending_tool_results:
+            converse_messages.append({"role": "user", "content": pending_tool_results})

        # CRITICAL: Handle model-specific conversation requirements
-        # Cohere and some other models require conversation to end with user message
+        # Cohere and some other models require conversation to end with user message.
+        # Anthropic models on Bedrock also reject assistant messages in the final
+        # position when tools are present ("pre-filling the assistant response is
+        # not supported").
        if converse_messages:
            last_message = converse_messages[-1]
            if last_message["role"] == "assistant":
@@ -1870,6 +1877,20 @@ class BedrockCompletion(BaseLLM):
                            "content": [{"text": "Continue your response."}],
                        }
                    )
+                # Anthropic (Claude) models reject assistant-last messages when
+                # tools are in the request. Append a user message so the
+                # Converse API accepts the payload.
+                elif "anthropic" in self.model.lower() or "claude" in self.model.lower():
+                    converse_messages.append(
+                        {
+                            "role": "user",
+                            "content": [
+                                {
+                                    "text": "Please continue and provide your final answer."
+                                }
+                            ],
+                        }
+                    )

        # Ensure first message is from user (required by Converse API)
        if not converse_messages:
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -61,6 +61,7 @@ class GeminiCompletion(BaseLLM):
        interceptor: BaseInterceptor[Any, Any] | None = None,
        use_vertexai: bool | None = None,
        response_format: type[BaseModel] | None = None,
+        thinking_config: types.ThinkingConfig | None = None,
        **kwargs: Any,
    ):
        """Initialize Google Gemini chat completion client.
@@ -93,6 +94,10 @@ class GeminiCompletion(BaseLLM):
                         api_version="v1" is automatically configured.
            response_format: Pydantic model for structured output. Used as default when
                           response_model is not passed to call()/acall() methods.
+            thinking_config: ThinkingConfig for thinking models (gemini-2.5+, gemini-3+).
+                           Controls thought output via include_thoughts, thinking_budget,
+                           and thinking_level. When None, thinking models automatically
+                           get include_thoughts=True so thought content is surfaced.
            **kwargs: Additional parameters
        """
        if interceptor is not None:
@@ -139,6 +144,14 @@ class GeminiCompletion(BaseLLM):
            version_match and float(version_match.group(1)) >= 2.0
        )

+        self.thinking_config = thinking_config
+        if (
+            self.thinking_config is None
+            and version_match
+            and float(version_match.group(1)) >= 2.5
+        ):
+            self.thinking_config = types.ThinkingConfig(include_thoughts=True)
+
    @property
    def stop(self) -> list[str]:
        """Get stop sequences sent to the API."""
@@ -520,6 +533,9 @@ class GeminiCompletion(BaseLLM):
        if self.safety_settings:
            config_params["safety_settings"] = self.safety_settings

+        if self.thinking_config is not None:
+            config_params["thinking_config"] = self.thinking_config
+
        return types.GenerateContentConfig(**config_params)

    def _convert_tools_for_interference(  # type: ignore[override]
@@ -618,9 +634,17 @@ class GeminiCompletion(BaseLLM):
                function_response_part = types.Part.from_function_response(
                    name=tool_name, response=response_data
                )
-                contents.append(
-                    types.Content(role="user", parts=[function_response_part])
-                )
+                if (
+                    contents
+                    and contents[-1].role == "user"
+                    and contents[-1].parts
+                    and contents[-1].parts[-1].function_response is not None
+                ):
+                    contents[-1].parts.append(function_response_part)
+                else:
+                    contents.append(
+                        types.Content(role="user", parts=[function_response_part])
+                    )
            elif role == "assistant" and message.get("tool_calls"):
                raw_parts: list[Any] | None = message.get("raw_tool_call_parts")
                if raw_parts and all(isinstance(p, types.Part) for p in raw_parts):
@@ -931,15 +955,6 @@ class GeminiCompletion(BaseLLM):
        if chunk.usage_metadata:
            usage_data = self._extract_token_usage(chunk)

-        if chunk.text:
-            full_response += chunk.text
-            self._emit_stream_chunk_event(
-                chunk=chunk.text,
-                from_task=from_task,
-                from_agent=from_agent,
-                response_id=response_id,
-            )
-
        if chunk.candidates:
            candidate = chunk.candidates[0]
            if candidate.content and candidate.content.parts:
@@ -976,6 +991,21 @@ class GeminiCompletion(BaseLLM):
                            call_type=LLMCallType.TOOL_CALL,
                            response_id=response_id,
                        )
+                    elif part.thought and part.text:
+                        self._emit_thinking_chunk_event(
+                            chunk=part.text,
+                            from_task=from_task,
+                            from_agent=from_agent,
+                            response_id=response_id,
+                        )
+                    elif part.text:
+                        full_response += part.text
+                        self._emit_stream_chunk_event(
+                            chunk=part.text,
+                            from_task=from_task,
+                            from_agent=from_agent,
+                            response_id=response_id,
+                        )

        return full_response, function_calls, usage_data

@@ -1329,7 +1359,7 @@ class GeminiCompletion(BaseLLM):
        text_parts = [
            part.text
            for part in candidate.content.parts
-            if hasattr(part, "text") and part.text
+            if part.text and not part.thought
        ]

        return "".join(text_parts)
--- a/lib/crewai/src/crewai/mcp/client.py
+++ b/lib/crewai/src/crewai/mcp/client.py
@@ -95,7 +95,7 @@ class MCPClient:
        self.discovery_timeout = discovery_timeout
        self.max_retries = max_retries
        self.cache_tools_list = cache_tools_list
-        self._logger = logger or logging.getLogger(__name__)
+        # self._logger = logger or logging.getLogger(__name__)
        self._session: Any = None
        self._initialized = False
        self._exit_stack = AsyncExitStack()
@@ -358,12 +358,10 @@ class MCPClient:
        """Cleanup resources when an error occurs during connection."""
        try:
            await self._exit_stack.aclose()
-        except (RuntimeError, BaseExceptionGroup) as e:
-            error_msg = str(e).lower()
-            if "cancel scope" not in error_msg and "task" not in error_msg:
-                raise RuntimeError(f"Error during MCP client cleanup: {e}") from e
-        except Exception:
-            self._logger.debug("Suppressed error during MCP cleanup", exc_info=True)
+
+        except Exception as e:
+            # Best effort cleanup - ignore all other errors
+            raise RuntimeError(f"Error during MCP client cleanup: {e}") from e
        finally:
            self._session = None
            self._initialized = False
@@ -376,12 +374,8 @@ class MCPClient:

        try:
            await self._exit_stack.aclose()
-        except (RuntimeError, BaseExceptionGroup) as e:
-            error_msg = str(e).lower()
-            if "cancel scope" not in error_msg and "task" not in error_msg:
-                raise RuntimeError(f"Error during MCP client disconnect: {e}") from e
-        except Exception:
-            self._logger.debug("Suppressed error during MCP disconnect", exc_info=True)
+        except Exception as e:
+            raise RuntimeError(f"Error during MCP client disconnect: {e}") from e
        finally:
            self._session = None
            self._initialized = False
--- a/lib/crewai/src/crewai/mcp/tool_resolver.py
+++ b/lib/crewai/src/crewai/mcp/tool_resolver.py
@@ -22,6 +22,7 @@ from crewai.mcp.config import (
    MCPServerSSE,
    MCPServerStdio,
 )
+from crewai.utilities.string_utils import sanitize_tool_name
 from crewai.mcp.transports.http import HTTPTransport
 from crewai.mcp.transports.sse import SSETransport
 from crewai.mcp.transports.stdio import StdioTransport
@@ -74,10 +75,9 @@ class MCPToolResolver:
            elif isinstance(mcp_config, str):
                amp_refs.append(self._parse_amp_ref(mcp_config))
            else:
-                tools, client = self._resolve_native(mcp_config)
+                tools, clients = self._resolve_native(mcp_config)
                all_tools.extend(tools)
-                if client:
-                    self._clients.append(client)
+                self._clients.extend(clients)

        if amp_refs:
            tools, clients = self._resolve_amp(amp_refs)
@@ -87,12 +87,7 @@ class MCPToolResolver:
        return all_tools

    def cleanup(self) -> None:
-        """Disconnect all MCP client connections.
-
-        Submits the disconnect coroutines to the persistent MCP event loop
-        so that transport context managers are exited on the same loop they
-        were entered on.
-        """
+        """Disconnect all MCP client connections."""
        if not self._clients:
            return

@@ -102,11 +97,7 @@ class MCPToolResolver:
                    await client.disconnect()

        try:
-            from crewai.tools.mcp_native_tool import _get_mcp_event_loop
-
-            loop = _get_mcp_event_loop()
-            future = asyncio.run_coroutine_threadsafe(_disconnect_all(), loop)
-            future.result(timeout=30)
+            asyncio.run(_disconnect_all())
        except Exception as e:
            self._logger.log("error", f"Error during MCP client cleanup: {e}")
        finally:
@@ -140,7 +131,7 @@ class MCPToolResolver:
        all_tools: list[BaseTool] = []
        all_clients: list[Any] = []

-        resolved_cache: dict[str, tuple[list[BaseTool], Any | None]] = {}
+        resolved_cache: dict[str, tuple[list[BaseTool], list[Any]]] = {}

        for slug in unique_slugs:
            config_dict = amp_configs_map.get(slug)
@@ -158,10 +149,9 @@ class MCPToolResolver:
            mcp_server_config = self._build_mcp_config_from_dict(config_dict)

            try:
-                tools, client = self._resolve_native(mcp_server_config)
-                resolved_cache[slug] = (tools, client)
-                if client:
-                    all_clients.append(client)
+                tools, clients = self._resolve_native(mcp_server_config)
+                resolved_cache[slug] = (tools, clients)
+                all_clients.extend(clients)
            except Exception as e:
                crewai_event_bus.emit(
                    self,
@@ -179,8 +169,9 @@ class MCPToolResolver:

            slug_tools, _ = cached
            if specific_tool:
+                sanitized = sanitize_tool_name(specific_tool)
                all_tools.extend(
-                    t for t in slug_tools if t.name.endswith(f"_{specific_tool}")
+                    t for t in slug_tools if t.name.endswith(f"_{sanitized}")
                )
            else:
                all_tools.extend(slug_tools)
@@ -207,7 +198,6 @@ class MCPToolResolver:

            plus_api = PlusAPI(api_key=get_platform_integration_token())
            response = plus_api.get_mcp_configs(slugs)
-
            if response.status_code == 200:
                configs: dict[str, dict[str, Any]] = response.json().get("configs", {})
                return configs
@@ -227,6 +217,7 @@ class MCPToolResolver:

    def _resolve_external(self, mcp_ref: str) -> list[BaseTool]:
        """Resolve an HTTPS MCP server URL into tools."""
+        from crewai.tools.base_tool import BaseTool
        from crewai.tools.mcp_tool_wrapper import MCPToolWrapper

        if "#" in mcp_ref:
@@ -236,6 +227,7 @@ class MCPToolResolver:

        server_params = {"url": server_url}
        server_name = self._extract_server_name(server_url)
+        sanitized_specific_tool = sanitize_tool_name(specific_tool) if specific_tool else None

        try:
            tool_schemas = self._get_mcp_tool_schemas(server_params)
@@ -248,7 +240,7 @@ class MCPToolResolver:

            tools = []
            for tool_name, schema in tool_schemas.items():
-                if specific_tool and tool_name != specific_tool:
+                if sanitized_specific_tool and tool_name != sanitized_specific_tool:
                    continue

                try:
@@ -280,14 +272,16 @@ class MCPToolResolver:
            )
            return []

-    def _resolve_native(
-        self, mcp_config: MCPServerConfig
-    ) -> tuple[list[BaseTool], Any | None]:
-        """Resolve an ``MCPServerConfig`` into tools, returning the client for cleanup."""
-        from crewai.tools.base_tool import BaseTool
-        from crewai.tools.mcp_native_tool import MCPNativeTool
+    @staticmethod
+    def _create_transport(
+        mcp_config: MCPServerConfig,
+    ) -> tuple[StdioTransport | HTTPTransport | SSETransport, str]:
+        """Create a fresh transport instance from an MCP server config.

-        transport: StdioTransport | HTTPTransport | SSETransport
+        Returns a ``(transport, server_name)`` tuple. Each call produces an
+        independent transport so that parallel tool executions never share
+        state.
+        """
        if isinstance(mcp_config, MCPServerStdio):
            transport = StdioTransport(
                command=mcp_config.command,
@@ -301,65 +295,84 @@ class MCPToolResolver:
                headers=mcp_config.headers,
                streamable=mcp_config.streamable,
            )
-            server_name = self._extract_server_name(mcp_config.url)
+            server_name = MCPToolResolver._extract_server_name(mcp_config.url)
        elif isinstance(mcp_config, MCPServerSSE):
            transport = SSETransport(
                url=mcp_config.url,
                headers=mcp_config.headers,
            )
-            server_name = self._extract_server_name(mcp_config.url)
+            server_name = MCPToolResolver._extract_server_name(mcp_config.url)
        else:
            raise ValueError(f"Unsupported MCP server config type: {type(mcp_config)}")
+        return transport, server_name

-        client = MCPClient(
-            transport=transport,
+    def _resolve_native(
+        self, mcp_config: MCPServerConfig
+    ) -> tuple[list[BaseTool], list[Any]]:
+        """Resolve an ``MCPServerConfig`` into tools.
+
+        Returns ``(tools, clients)`` where *clients* is always empty for
+        native tools (clients are now created on-demand per invocation).
+        A ``client_factory`` closure is passed to each ``MCPNativeTool`` so
+        every call -- even concurrent calls to the *same* tool -- gets its
+        own ``MCPClient`` + transport with no shared mutable state.
+        """
+        from crewai.tools.base_tool import BaseTool
+        from crewai.tools.mcp_native_tool import MCPNativeTool
+
+        discovery_transport, server_name = self._create_transport(mcp_config)
+        discovery_client = MCPClient(
+            transport=discovery_transport,
            cache_tools_list=mcp_config.cache_tools_list,
        )

        async def _setup_client_and_list_tools() -> list[dict[str, Any]]:
            try:
-                if not client.connected:
-                    await client.connect()
+                if not discovery_client.connected:
+                    await discovery_client.connect()

-                tools_list = await client.list_tools()
+                tools_list = await discovery_client.list_tools()

                try:
-                    await client.disconnect()
+                    await discovery_client.disconnect()
                    await asyncio.sleep(0.1)
                except Exception as e:
                    self._logger.log("error", f"Error during disconnect: {e}")

                return tools_list
            except Exception as e:
-                if client.connected:
-                    await client.disconnect()
+                if discovery_client.connected:
+                    await discovery_client.disconnect()
                    await asyncio.sleep(0.1)
                raise RuntimeError(
                    f"Error during setup client and list tools: {e}"
                ) from e

        try:
-            from crewai.tools.mcp_native_tool import _get_mcp_event_loop
-
-            loop = _get_mcp_event_loop()
-            future = asyncio.run_coroutine_threadsafe(
-                _setup_client_and_list_tools(), loop
-            )
            try:
-                tools_list = future.result(timeout=60)
-            except RuntimeError as e:
-                error_msg = str(e).lower()
-                if "cancel scope" in error_msg or "task" in error_msg:
+                asyncio.get_running_loop()
+                import concurrent.futures
+
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(
+                        asyncio.run, _setup_client_and_list_tools()
+                    )
+                    tools_list = future.result()
+            except RuntimeError:
+                try:
+                    tools_list = asyncio.run(_setup_client_and_list_tools())
+                except RuntimeError as e:
+                    error_msg = str(e).lower()
+                    if "cancel scope" in error_msg or "task" in error_msg:
+                        raise ConnectionError(
+                            "MCP connection failed due to event loop cleanup issues. "
+                            "This may be due to authentication errors or server unavailability."
+                        ) from e
+                except asyncio.CancelledError as e:
                    raise ConnectionError(
-                        "MCP connection failed due to event loop cleanup issues. "
-                        "This may be due to authentication errors or server unavailability."
+                        "MCP connection was cancelled. This may indicate an authentication "
+                        "error or server unavailability."
                    ) from e
-                raise
-            except asyncio.CancelledError as e:
-                raise ConnectionError(
-                    "MCP connection was cancelled. This may indicate an authentication "
-                    "error or server unavailability."
-                ) from e

            if mcp_config.tool_filter:
                filtered_tools = []
@@ -382,6 +395,13 @@ class MCPToolResolver:
                        filtered_tools.append(tool)
                tools_list = filtered_tools

+            def _client_factory() -> MCPClient:
+                transport, _ = self._create_transport(mcp_config)
+                return MCPClient(
+                    transport=transport,
+                    cache_tools_list=mcp_config.cache_tools_list,
+                )
+
            tools = []
            for tool_def in tools_list:
                tool_name = tool_def.get("name", "")
@@ -402,7 +422,7 @@ class MCPToolResolver:

                try:
                    native_tool = MCPNativeTool(
-                        mcp_client=client,
+                        client_factory=_client_factory,
                        tool_name=tool_name,
                        tool_schema=tool_schema,
                        server_name=server_name,
@@ -413,16 +433,10 @@ class MCPToolResolver:
                    self._logger.log("error", f"Failed to create native MCP tool: {e}")
                    continue

-            return cast(list[BaseTool], tools), client
+            return cast(list[BaseTool], tools), []
        except Exception as e:
-            if client.connected:
-                try:
-                    fut = asyncio.run_coroutine_threadsafe(
-                        client.disconnect(), loop
-                    )
-                    fut.result(timeout=10)
-                except Exception:
-                    self._logger.log("debug", "Suppressed error during MCP client disconnect on cleanup")
+            if discovery_client.connected:
+                asyncio.run(discovery_client.disconnect())

            raise RuntimeError(f"Failed to get native MCP tools: {e}") from e

--- a/lib/crewai/src/crewai/memory/init.py
+++ b/lib/crewai/src/crewai/memory/init.py
@@ -19,6 +19,7 @@ from crewai.memory.types import (
    embed_texts,
 )

+
 _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
    "Memory": ("crewai.memory.unified_memory", "Memory"),
    "EncodingFlow": ("crewai.memory.encoding_flow", "EncodingFlow"),
--- a/lib/crewai/src/crewai/memory/memory_scope.py
+++ b/lib/crewai/src/crewai/memory/memory_scope.py
@@ -3,11 +3,9 @@
 from __future__ import annotations

 from datetime import datetime
-from typing import TYPE_CHECKING, Any
+from typing import Any, Literal

-
-if TYPE_CHECKING:
-    from crewai.memory.unified_memory import Memory
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator

 from crewai.memory.types import (
    _RECALL_OVERSAMPLE_FACTOR,
@@ -15,22 +13,38 @@ from crewai.memory.types import (
    MemoryRecord,
    ScopeInfo,
 )
+from crewai.memory.unified_memory import Memory


-class MemoryScope:
+class MemoryScope(BaseModel):
    """View of Memory restricted to a root path. All operations are scoped under that path."""

-    def __init__(self, memory: Memory, root_path: str) -> None:
-        """Initialize scope.
+    model_config = ConfigDict(arbitrary_types_allowed=True)

-        Args:
-            memory: The underlying Memory instance.
-            root_path: Root path for this scope (e.g. /agent/1).
-        """
-        self._memory = memory
-        self._root = root_path.rstrip("/") or ""
-        if self._root and not self._root.startswith("/"):
-            self._root = "/" + self._root
+    root_path: str = Field(default="/")
+
+    _memory: Memory = PrivateAttr()
+    _root: str = PrivateAttr()
+
+    @model_validator(mode="wrap")
+    @classmethod
+    def _accept_memory(cls, data: Any, handler: Any) -> MemoryScope:
+        """Extract memory dependency and normalize root path before validation."""
+        if isinstance(data, MemoryScope):
+            return data
+        memory = data.pop("memory")
+        instance: MemoryScope = handler(data)
+        instance._memory = memory
+        root = instance.root_path.rstrip("/") or ""
+        if root and not root.startswith("/"):
+            root = "/" + root
+        instance._root = root
+        return instance
+
+    @property
+    def read_only(self) -> bool:
+        """Whether the underlying memory is read-only."""
+        return self._memory.read_only

    def _scope_path(self, scope: str | None) -> str:
        if not scope or scope == "/":
@@ -52,7 +66,7 @@ class MemoryScope:
        importance: float | None = None,
        source: str | None = None,
        private: bool = False,
-    ) -> MemoryRecord:
+    ) -> MemoryRecord | None:
        """Remember content; scope is relative to this scope's root."""
        path = self._scope_path(scope)
        return self._memory.remember(
@@ -71,7 +85,7 @@ class MemoryScope:
        scope: str | None = None,
        categories: list[str] | None = None,
        limit: int = 10,
-        depth: str = "deep",
+        depth: Literal["shallow", "deep"] = "deep",
        source: str | None = None,
        include_private: bool = False,
    ) -> list[MemoryMatch]:
@@ -138,34 +152,34 @@ class MemoryScope:
        """Return a narrower scope under this scope."""
        child = path.strip("/")
        if not child:
-            return MemoryScope(self._memory, self._root or "/")
+            return MemoryScope(memory=self._memory, root_path=self._root or "/")
        base = self._root.rstrip("/") or ""
        new_root = f"{base}/{child}" if base else f"/{child}"
-        return MemoryScope(self._memory, new_root)
+        return MemoryScope(memory=self._memory, root_path=new_root)


-class MemorySlice:
+class MemorySlice(BaseModel):
    """View over multiple scopes: recall searches all, remember is a no-op when read_only."""

-    def __init__(
-        self,
-        memory: Memory,
-        scopes: list[str],
-        categories: list[str] | None = None,
-        read_only: bool = True,
-    ) -> None:
-        """Initialize slice.
+    model_config = ConfigDict(arbitrary_types_allowed=True)

-        Args:
-            memory: The underlying Memory instance.
-            scopes: List of scope paths to include.
-            categories: Optional category filter for recall.
-            read_only: If True, remember() is a silent no-op.
-        """
-        self._memory = memory
-        self._scopes = [s.rstrip("/") or "/" for s in scopes]
-        self._categories = categories
-        self._read_only = read_only
+    scopes: list[str] = Field(default_factory=list)
+    categories: list[str] | None = Field(default=None)
+    read_only: bool = Field(default=True)
+
+    _memory: Memory = PrivateAttr()
+
+    @model_validator(mode="wrap")
+    @classmethod
+    def _accept_memory(cls, data: Any, handler: Any) -> MemorySlice:
+        """Extract memory dependency and normalize scopes before validation."""
+        if isinstance(data, MemorySlice):
+            return data
+        memory = data.pop("memory")
+        data["scopes"] = [s.rstrip("/") or "/" for s in data.get("scopes", [])]
+        instance: MemorySlice = handler(data)
+        instance._memory = memory
+        return instance

    def remember(
        self,
@@ -178,7 +192,7 @@ class MemorySlice:
        private: bool = False,
    ) -> MemoryRecord | None:
        """Remember into an explicit scope. No-op when read_only=True."""
-        if self._read_only:
+        if self.read_only:
            return None
        return self._memory.remember(
            content,
@@ -196,14 +210,14 @@ class MemorySlice:
        scope: str | None = None,
        categories: list[str] | None = None,
        limit: int = 10,
-        depth: str = "deep",
+        depth: Literal["shallow", "deep"] = "deep",
        source: str | None = None,
        include_private: bool = False,
    ) -> list[MemoryMatch]:
        """Recall across all slice scopes; results merged and re-ranked."""
-        cats = categories or self._categories
+        cats = categories or self.categories
        all_matches: list[MemoryMatch] = []
-        for sc in self._scopes:
+        for sc in self.scopes:
            matches = self._memory.recall(
                query,
                scope=sc,
@@ -231,7 +245,7 @@ class MemorySlice:
    def list_scopes(self, path: str = "/") -> list[str]:
        """List scopes across all slice roots."""
        out: list[str] = []
-        for sc in self._scopes:
+        for sc in self.scopes:
            full = f"{sc.rstrip('/')}{path}" if sc != "/" else path
            out.extend(self._memory.list_scopes(full))
        return sorted(set(out))
@@ -243,15 +257,23 @@ class MemorySlice:
        oldest: datetime | None = None
        newest: datetime | None = None
        children: list[str] = []
-        for sc in self._scopes:
+        for sc in self.scopes:
            full = f"{sc.rstrip('/')}{path}" if sc != "/" else path
            inf = self._memory.info(full)
            total_records += inf.record_count
            all_categories.update(inf.categories)
            if inf.oldest_record:
-                oldest = inf.oldest_record if oldest is None else min(oldest, inf.oldest_record)
+                oldest = (
+                    inf.oldest_record
+                    if oldest is None
+                    else min(oldest, inf.oldest_record)
+                )
            if inf.newest_record:
-                newest = inf.newest_record if newest is None else max(newest, inf.newest_record)
+                newest = (
+                    inf.newest_record
+                    if newest is None
+                    else max(newest, inf.newest_record)
+                )
            children.extend(inf.child_scopes)
        return ScopeInfo(
            path=path,
@@ -265,7 +287,7 @@ class MemorySlice:
    def list_categories(self, path: str | None = None) -> dict[str, int]:
        """Categories and counts across slice scopes."""
        counts: dict[str, int] = {}
-        for sc in self._scopes:
+        for sc in self.scopes:
            full = (f"{sc.rstrip('/')}{path}" if sc != "/" else path) if path else sc
            for k, v in self._memory.list_categories(full).items():
                counts[k] = counts.get(k, 0) + v
--- a/lib/crewai/src/crewai/memory/recall_flow.py
+++ b/lib/crewai/src/crewai/memory/recall_flow.py
@@ -2,7 +2,6 @@

 Implements adaptive-depth retrieval with:
 - LLM query distillation into targeted sub-queries
- Keyword-driven category filtering
 - Time-based filtering from temporal hints
 - Parallel multi-query, multi-scope search
 - Confidence-based routing with iterative deepening (budget loop)
@@ -37,7 +36,6 @@ class RecallState(BaseModel):
    query: str = ""
    scope: str | None = None
    categories: list[str] | None = None
-    inferred_categories: list[str] = Field(default_factory=list)
    time_cutoff: datetime | None = None
    source: str | None = None
    include_private: bool = False
@@ -82,11 +80,8 @@ class RecallFlow(Flow[RecallState]):
    # ------------------------------------------------------------------

    def _merged_categories(self) -> list[str] | None:
-        """Merge caller-supplied and LLM-inferred categories."""
-        merged = list(
-            set((self.state.categories or []) + self.state.inferred_categories)
-        )
-        return merged or None
+        """Return caller-supplied categories, or None if empty."""
+        return self.state.categories or None

    def _do_search(self) -> list[dict[str, Any]]:
        """Run parallel search across (embeddings x scopes) with filters.
@@ -212,10 +207,6 @@ class RecallFlow(Flow[RecallState]):
            )
            self.state.query_analysis = analysis

-            # Wire keywords -> category filter
-            if analysis.keywords:
-                self.state.inferred_categories = analysis.keywords
-
            # Parse time_filter into a datetime cutoff
            if analysis.time_filter:
                try:
--- a/lib/crewai/src/crewai/memory/storage/kickoff_task_outputs_storage.py
+++ b/lib/crewai/src/crewai/memory/storage/kickoff_task_outputs_storage.py
@@ -38,7 +38,8 @@ class KickoffTaskOutputsSQLiteStorage:
            DatabaseOperationError: If database initialization fails due to SQLite errors.
        """
        try:
-            with sqlite3.connect(self.db_path) as conn:
+            with sqlite3.connect(self.db_path, timeout=30) as conn:
+                conn.execute("PRAGMA journal_mode=WAL")
                cursor = conn.cursor()
                cursor.execute(
                    """
@@ -82,7 +83,7 @@ class KickoffTaskOutputsSQLiteStorage:
        """
        inputs = inputs or {}
        try:
-            with sqlite3.connect(self.db_path) as conn:
+            with sqlite3.connect(self.db_path, timeout=30) as conn:
                conn.execute("BEGIN TRANSACTION")
                cursor = conn.cursor()
                cursor.execute(
@@ -125,7 +126,7 @@ class KickoffTaskOutputsSQLiteStorage:
            DatabaseOperationError: If updating the task output fails due to SQLite errors.
        """
        try:
-            with sqlite3.connect(self.db_path) as conn:
+            with sqlite3.connect(self.db_path, timeout=30) as conn:
                conn.execute("BEGIN TRANSACTION")
                cursor = conn.cursor()

@@ -166,7 +167,7 @@ class KickoffTaskOutputsSQLiteStorage:
            DatabaseOperationError: If loading task outputs fails due to SQLite errors.
        """
        try:
-            with sqlite3.connect(self.db_path) as conn:
+            with sqlite3.connect(self.db_path, timeout=30) as conn:
                cursor = conn.cursor()
                cursor.execute("""
                SELECT *
@@ -205,7 +206,7 @@ class KickoffTaskOutputsSQLiteStorage:
            DatabaseOperationError: If deleting task outputs fails due to SQLite errors.
        """
        try:
-            with sqlite3.connect(self.db_path) as conn:
+            with sqlite3.connect(self.db_path, timeout=30) as conn:
                conn.execute("BEGIN TRANSACTION")
                cursor = conn.cursor()
                cursor.execute("DELETE FROM latest_kickoff_task_outputs")
--- a/lib/crewai/src/crewai/memory/storage/lancedb_storage.py
+++ b/lib/crewai/src/crewai/memory/storage/lancedb_storage.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+from contextlib import AbstractContextManager
 from datetime import datetime
 import json
 import logging
@@ -14,6 +15,7 @@ from typing import Any, ClassVar
 import lancedb

 from crewai.memory.types import MemoryRecord, ScopeInfo
+from crewai.utilities.lock_store import lock as store_lock


 _logger = logging.getLogger(__name__)
@@ -90,6 +92,7 @@ class LanceDBStorage:
        # Raise it proactively so scans on large tables never hit OS error 24.
        try:
            import resource
+
            soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
            if soft < 4096:
                resource.setrlimit(resource.RLIMIT_NOFILE, (min(hard, 4096), hard))
@@ -99,7 +102,8 @@ class LanceDBStorage:
        self._compact_every = compact_every
        self._save_count = 0

-        # Get or create a shared write lock for this database path.
+        self._lock_name = f"lancedb:{self._path.resolve()}"
+
        resolved = str(self._path.resolve())
        with LanceDBStorage._path_locks_guard:
            if resolved not in LanceDBStorage._path_locks:
@@ -110,10 +114,13 @@ class LanceDBStorage:
        # If no table exists yet, defer creation until the first save so the
        # dimension can be auto-detected from the embedder's actual output.
        try:
-            self._table: lancedb.table.Table | None = self._db.open_table(self._table_name)
+            self._table: lancedb.table.Table | None = self._db.open_table(
+                self._table_name
+            )
            self._vector_dim: int = self._infer_dim_from_table(self._table)
            # Best-effort: create the scope index if it doesn't exist yet.
-            self._ensure_scope_index()
+            with self._file_lock():
+                self._ensure_scope_index()
            # Compact in the background if the table has accumulated many
            # fragments from previous runs (each save() creates one).
            self._compact_if_needed()
@@ -124,7 +131,8 @@ class LanceDBStorage:
        # Explicit dim provided: create the table immediately if it doesn't exist.
        if self._table is None and vector_dim is not None:
            self._vector_dim = vector_dim
-            self._table = self._create_table(vector_dim)
+            with self._file_lock():
+                self._table = self._create_table(vector_dim)

    @property
    def write_lock(self) -> threading.RLock:
@@ -149,18 +157,14 @@ class LanceDBStorage:
                    break
        return DEFAULT_VECTOR_DIM

-    def _retry_write(self, op: str, *args: Any, **kwargs: Any) -> Any:
-        """Execute a table operation with retry on LanceDB commit conflicts.
+    def _file_lock(self) -> AbstractContextManager[None]:
+        """Return a cross-process lock for serialising writes."""
+        return store_lock(self._lock_name)

-        Args:
-            op: Method name on the table object (e.g. "add", "delete").
-            *args, **kwargs: Passed to the table method.
+    def _do_write(self, op: str, *args: Any, **kwargs: Any) -> Any:
+        """Execute a single table write with retry on commit conflicts.

-        LanceDB uses optimistic concurrency: if two transactions overlap,
-        the second to commit fails with an ``OSError`` containing
-        "Commit conflict". This helper retries with exponential backoff,
-        refreshing the table reference before each retry so the retried
-        call uses the latest committed version (not a stale reference).
+        Caller must already hold the cross-process file lock.
        """
        delay = _RETRY_BASE_DELAY
        for attempt in range(_MAX_RETRIES + 1):
@@ -171,20 +175,24 @@ class LanceDBStorage:
                    raise
                _logger.debug(
                    "LanceDB commit conflict on %s (attempt %d/%d), retrying in %.1fs",
-                    op, attempt + 1, _MAX_RETRIES, delay,
+                    op,
+                    attempt + 1,
+                    _MAX_RETRIES,
+                    delay,
                )
-                # Refresh table to pick up the latest version before retrying.
-                # The next getattr(self._table, op) will use the fresh table.
                try:
                    self._table = self._db.open_table(self._table_name)
                except Exception:  # noqa: S110
-                    pass  # table refresh is best-effort
+                    pass
                time.sleep(delay)
                delay *= 2
        return None  # unreachable, but satisfies type checker

    def _create_table(self, vector_dim: int) -> lancedb.table.Table:
-        """Create a new table with the given vector dimension."""
+        """Create a new table with the given vector dimension.
+
+        Caller must already hold the cross-process file lock.
+        """
        placeholder = [
            {
                "id": "__schema_placeholder__",
@@ -200,8 +208,12 @@ class LanceDBStorage:
                "vector": [0.0] * vector_dim,
            }
        ]
-        table = self._db.create_table(self._table_name, placeholder)
-        table.delete("id = '__schema_placeholder__'")
+        try:
+            table = self._db.create_table(self._table_name, placeholder)
+        except ValueError:
+            table = self._db.open_table(self._table_name)
+        else:
+            table.delete("id = '__schema_placeholder__'")
        return table

    def _ensure_scope_index(self) -> None:
@@ -248,9 +260,9 @@ class LanceDBStorage:
        """Run ``table.optimize()`` in a background thread, absorbing errors."""
        try:
            if self._table is not None:
-                self._table.optimize()
-                # Refresh the scope index so new fragments are covered.
-                self._ensure_scope_index()
+                with self._file_lock():
+                    self._table.optimize()
+                    self._ensure_scope_index()
        except Exception:
            _logger.debug("LanceDB background compaction failed", exc_info=True)

@@ -280,7 +292,9 @@ class LanceDBStorage:
            "last_accessed": record.last_accessed.isoformat(),
            "source": record.source or "",
            "private": record.private,
-            "vector": record.embedding if record.embedding else [0.0] * self._vector_dim,
+            "vector": record.embedding
+            if record.embedding
+            else [0.0] * self._vector_dim,
        }

    def _row_to_record(self, row: dict[str, Any]) -> MemoryRecord:
@@ -296,7 +310,9 @@ class LanceDBStorage:
            id=str(row["id"]),
            content=str(row["content"]),
            scope=str(row["scope"]),
-            categories=json.loads(row["categories_str"]) if row.get("categories_str") else [],
+            categories=json.loads(row["categories_str"])
+            if row.get("categories_str")
+            else [],
            metadata=json.loads(row["metadata_str"]) if row.get("metadata_str") else {},
            importance=float(row.get("importance", 0.5)),
            created_at=_parse_dt(row.get("created_at")),
@@ -316,16 +332,15 @@ class LanceDBStorage:
                dim = len(r.embedding)
                break
        is_new_table = self._table is None
-        with self._write_lock:
+        with self._write_lock, self._file_lock():
            self._ensure_table(vector_dim=dim)
            rows = [self._record_to_row(r) for r in records]
            for r in rows:
                if r["vector"] is None or len(r["vector"]) != self._vector_dim:
                    r["vector"] = [0.0] * self._vector_dim
-            self._retry_write("add", rows)
-        # Create the scope index on the first save so it covers the initial dataset.
-        if is_new_table:
-            self._ensure_scope_index()
+            self._do_write("add", rows)
+            if is_new_table:
+                self._ensure_scope_index()
        # Auto-compact every N saves so fragment files don't pile up.
        self._save_count += 1
        if self._compact_every > 0 and self._save_count % self._compact_every == 0:
@@ -333,14 +348,14 @@ class LanceDBStorage:

    def update(self, record: MemoryRecord) -> None:
        """Update a record by ID. Preserves created_at, updates last_accessed."""
-        with self._write_lock:
+        with self._write_lock, self._file_lock():
            self._ensure_table()
            safe_id = str(record.id).replace("'", "''")
-            self._retry_write("delete", f"id = '{safe_id}'")
+            self._do_write("delete", f"id = '{safe_id}'")
            row = self._record_to_row(record)
            if row["vector"] is None or len(row["vector"]) != self._vector_dim:
                row["vector"] = [0.0] * self._vector_dim
-            self._retry_write("add", [row])
+            self._do_write("add", [row])

    def touch_records(self, record_ids: list[str]) -> None:
        """Update last_accessed to now for the given record IDs.
@@ -354,11 +369,11 @@ class LanceDBStorage:
        """
        if not record_ids or self._table is None:
            return
-        with self._write_lock:
+        with self._write_lock, self._file_lock():
            now = datetime.utcnow().isoformat()
            safe_ids = [str(rid).replace("'", "''") for rid in record_ids]
            ids_expr = ", ".join(f"'{rid}'" for rid in safe_ids)
-            self._retry_write(
+            self._do_write(
                "update",
                where=f"id IN ({ids_expr})",
                values={"last_accessed": now},
@@ -390,13 +405,17 @@ class LanceDBStorage:
            prefix = scope_prefix.rstrip("/")
            like_val = prefix + "%"
            query = query.where(f"scope LIKE '{like_val}'")
-        results = query.limit(limit * 3 if (categories or metadata_filter) else limit).to_list()
+        results = query.limit(
+            limit * 3 if (categories or metadata_filter) else limit
+        ).to_list()
        out: list[tuple[MemoryRecord, float]] = []
        for row in results:
            record = self._row_to_record(row)
            if categories and not any(c in record.categories for c in categories):
                continue
-            if metadata_filter and not all(record.metadata.get(k) == v for k, v in metadata_filter.items()):
+            if metadata_filter and not all(
+                record.metadata.get(k) == v for k, v in metadata_filter.items()
+            ):
                continue
            distance = row.get("_distance", 0.0)
            score = 1.0 / (1.0 + float(distance)) if distance is not None else 1.0
@@ -416,20 +435,24 @@ class LanceDBStorage:
    ) -> int:
        if self._table is None:
            return 0
-        with self._write_lock:
+        with self._write_lock, self._file_lock():
            if record_ids and not (categories or metadata_filter):
                before = self._table.count_rows()
                ids_expr = ", ".join(f"'{rid}'" for rid in record_ids)
-                self._retry_write("delete", f"id IN ({ids_expr})")
+                self._do_write("delete", f"id IN ({ids_expr})")
                return before - self._table.count_rows()
            if categories or metadata_filter:
                rows = self._scan_rows(scope_prefix)
                to_delete: list[str] = []
                for row in rows:
                    record = self._row_to_record(row)
-                    if categories and not any(c in record.categories for c in categories):
+                    if categories and not any(
+                        c in record.categories for c in categories
+                    ):
                        continue
-                    if metadata_filter and not all(record.metadata.get(k) == v for k, v in metadata_filter.items()):
+                    if metadata_filter and not all(
+                        record.metadata.get(k) == v for k, v in metadata_filter.items()
+                    ):
                        continue
                    if older_than and record.created_at >= older_than:
                        continue
@@ -438,7 +461,7 @@ class LanceDBStorage:
                    return 0
                before = self._table.count_rows()
                ids_expr = ", ".join(f"'{rid}'" for rid in to_delete)
-                self._retry_write("delete", f"id IN ({ids_expr})")
+                self._do_write("delete", f"id IN ({ids_expr})")
                return before - self._table.count_rows()
            conditions = []
            if scope_prefix is not None and scope_prefix.strip("/"):
@@ -450,11 +473,11 @@ class LanceDBStorage:
                conditions.append(f"created_at < '{older_than.isoformat()}'")
            if not conditions:
                before = self._table.count_rows()
-                self._retry_write("delete", "id != ''")
+                self._do_write("delete", "id != ''")
                return before - self._table.count_rows()
            where_expr = " AND ".join(conditions)
            before = self._table.count_rows()
-            self._retry_write("delete", where_expr)
+            self._do_write("delete", where_expr)
            return before - self._table.count_rows()

    def _scan_rows(
@@ -528,7 +551,7 @@ class LanceDBStorage:
        for row in rows:
            sc = str(row.get("scope", ""))
            if child_prefix and sc.startswith(child_prefix):
-                rest = sc[len(child_prefix):]
+                rest = sc[len(child_prefix) :]
                first_component = rest.split("/", 1)[0]
                if first_component:
                    children.add(child_prefix + first_component)
@@ -539,7 +562,11 @@ class LanceDBStorage:
                pass
            created = row.get("created_at")
            if created:
-                dt = datetime.fromisoformat(str(created).replace("Z", "+00:00")) if isinstance(created, str) else created
+                dt = (
+                    datetime.fromisoformat(str(created).replace("Z", "+00:00"))
+                    if isinstance(created, str)
+                    else created
+                )
                if isinstance(dt, datetime):
                    if oldest is None or dt < oldest:
                        oldest = dt
@@ -562,7 +589,7 @@ class LanceDBStorage:
        for row in rows:
            sc = str(row.get("scope", ""))
            if sc.startswith(prefix) and sc != (prefix.rstrip("/") or "/"):
-                rest = sc[len(prefix):]
+                rest = sc[len(prefix) :]
                first_component = rest.split("/", 1)[0]
                if first_component:
                    children.add(prefix + first_component)
@@ -590,17 +617,19 @@ class LanceDBStorage:
        return info.record_count

    def reset(self, scope_prefix: str | None = None) -> None:
-        if scope_prefix is None or scope_prefix.strip("/") == "":
-            if self._table is not None:
-                self._db.drop_table(self._table_name)
-            self._table = None
-            # Dimension is preserved; table will be recreated on next save.
-            return
-        if self._table is None:
-            return
-        prefix = scope_prefix.rstrip("/")
-        if prefix:
-            self._table.delete(f"scope >= '{prefix}' AND scope < '{prefix}/\uFFFF'")
+        with self._write_lock, self._file_lock():
+            if scope_prefix is None or scope_prefix.strip("/") == "":
+                if self._table is not None:
+                    self._db.drop_table(self._table_name)
+                self._table = None
+                return
+            if self._table is None:
+                return
+            prefix = scope_prefix.rstrip("/")
+            if prefix:
+                self._do_write(
+                    "delete", f"scope >= '{prefix}' AND scope < '{prefix}/\uffff'"
+                )

    def optimize(self) -> None:
        """Compact the table synchronously and refresh the scope index.
@@ -614,8 +643,9 @@ class LanceDBStorage:
        """
        if self._table is None:
            return
-        self._table.optimize()
-        self._ensure_scope_index()
+        with self._write_lock, self._file_lock():
+            self._table.optimize()
+            self._ensure_scope_index()

    async def asave(self, records: list[MemoryRecord]) -> None:
        self.save(records)
--- a/lib/crewai/src/crewai/memory/unified_memory.py
+++ b/lib/crewai/src/crewai/memory/unified_memory.py
@@ -6,7 +6,9 @@ from concurrent.futures import Future, ThreadPoolExecutor
 from datetime import datetime
 import threading
 import time
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Annotated, Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field, PlainValidator, PrivateAttr

 from crewai.events.event_bus import crewai_event_bus
 from crewai.events.types.memory_events import (
@@ -39,13 +41,18 @@ if TYPE_CHECKING:
    )


+def _passthrough(v: Any) -> Any:
+    """PlainValidator that accepts any value, bypassing strict union discrimination."""
+    return v
+
+
 def _default_embedder() -> OpenAIEmbeddingFunction:
    """Build default OpenAI embedder for memory."""
    spec: OpenAIProviderSpec = {"provider": "openai", "config": {}}
    return build_embedder(spec)


-class Memory:
+class Memory(BaseModel):
    """Unified memory: standalone, LLM-analyzed, with intelligent recall flow.

    Works without agent/crew. Uses LLM to infer scope, categories, importance on save.
@@ -53,116 +60,119 @@ class Memory:
    pluggable storage (LanceDB default).
    """

-    def __init__(
-        self,
-        llm: BaseLLM | str = "gpt-4o-mini",
-        storage: StorageBackend | str = "lancedb",
-        embedder: Any = None,
-        # -- Scoring weights --
-        # These three weights control how recall results are ranked.
-        # The composite score is: semantic_weight * similarity + recency_weight * decay + importance_weight * importance.
-        # They should sum to ~1.0 for intuitive scoring.
-        recency_weight: float = 0.3,
-        semantic_weight: float = 0.5,
-        importance_weight: float = 0.2,
-        # How quickly old memories lose relevance. The recency score halves every
-        # N days (exponential decay). Lower = faster forgetting; higher = longer relevance.
-        recency_half_life_days: int = 30,
-        # -- Consolidation --
-        # When remembering new content, if an existing record has similarity >= this
-        # threshold, the LLM is asked to merge/update/delete. Set to 1.0 to disable.
-        consolidation_threshold: float = 0.85,
-        # Max existing records to compare against when checking for consolidation.
-        consolidation_limit: int = 5,
-        # -- Save defaults --
-        # Importance assigned to new memories when no explicit value is given and
-        # the LLM analysis path is skipped (all fields provided by the caller).
-        default_importance: float = 0.5,
-        # -- Recall depth control --
-        # These thresholds govern the RecallFlow router that decides between
-        # returning results immediately ("synthesize") vs. doing an extra
-        # LLM-driven exploration round ("explore_deeper").
-        #   confidence >= confidence_threshold_high  => always synthesize
-        #   confidence <  confidence_threshold_low   => explore deeper (if budget > 0)
-        #   complex query + confidence < complex_query_threshold => explore deeper
-        confidence_threshold_high: float = 0.8,
-        confidence_threshold_low: float = 0.5,
-        complex_query_threshold: float = 0.7,
-        # How many LLM-driven exploration rounds the RecallFlow is allowed to run.
-        # 0 = always shallow (vector search only); higher = more thorough but slower.
-        exploration_budget: int = 1,
-        # Queries shorter than this skip LLM analysis (saving ~1-3s).
-        # Longer queries (full task descriptions) benefit from LLM distillation.
-        query_analysis_threshold: int = 200,
-        # When True, all write operations (remember, remember_many) are silently
-        # skipped. Useful for sharing a read-only view of memory across agents
-        # without any of them persisting new memories.
-        read_only: bool = False,
-    ) -> None:
-        """Initialize Memory.
+    model_config = ConfigDict(arbitrary_types_allowed=True)

-        Args:
-            llm: LLM for analysis (model name or BaseLLM instance).
-            storage: Backend: "lancedb" or a StorageBackend instance.
-            embedder: Embedding callable, provider config dict, or None (default OpenAI).
-            recency_weight: Weight for recency in the composite relevance score.
-            semantic_weight: Weight for semantic similarity in the composite relevance score.
-            importance_weight: Weight for importance in the composite relevance score.
-            recency_half_life_days: Recency score halves every N days (exponential decay).
-            consolidation_threshold: Similarity above which consolidation is triggered on save.
-            consolidation_limit: Max existing records to compare during consolidation.
-            default_importance: Default importance when not provided or inferred.
-            confidence_threshold_high: Recall confidence above which results are returned directly.
-            confidence_threshold_low: Recall confidence below which deeper exploration is triggered.
-            complex_query_threshold: For complex queries, explore deeper below this confidence.
-            exploration_budget: Number of LLM-driven exploration rounds during deep recall.
-            query_analysis_threshold: Queries shorter than this skip LLM analysis during deep recall.
-            read_only: If True, remember() and remember_many() are silent no-ops.
-        """
-        self._read_only = read_only
+    llm: Annotated[BaseLLM | str, PlainValidator(_passthrough)] = Field(
+        default="gpt-4o-mini",
+        description="LLM for analysis (model name or BaseLLM instance).",
+    )
+    storage: Annotated[StorageBackend | str, PlainValidator(_passthrough)] = Field(
+        default="lancedb",
+        description="Storage backend instance or path string.",
+    )
+    embedder: Any = Field(
+        default=None,
+        description="Embedding callable, provider config dict, or None for default OpenAI.",
+    )
+    recency_weight: float = Field(
+        default=0.3,
+        description="Weight for recency in the composite relevance score.",
+    )
+    semantic_weight: float = Field(
+        default=0.5,
+        description="Weight for semantic similarity in the composite relevance score.",
+    )
+    importance_weight: float = Field(
+        default=0.2,
+        description="Weight for importance in the composite relevance score.",
+    )
+    recency_half_life_days: int = Field(
+        default=30,
+        description="Recency score halves every N days (exponential decay).",
+    )
+    consolidation_threshold: float = Field(
+        default=0.85,
+        description="Similarity above which consolidation is triggered on save.",
+    )
+    consolidation_limit: int = Field(
+        default=5,
+        description="Max existing records to compare during consolidation.",
+    )
+    default_importance: float = Field(
+        default=0.5,
+        description="Default importance when not provided or inferred.",
+    )
+    confidence_threshold_high: float = Field(
+        default=0.8,
+        description="Recall confidence above which results are returned directly.",
+    )
+    confidence_threshold_low: float = Field(
+        default=0.5,
+        description="Recall confidence below which deeper exploration is triggered.",
+    )
+    complex_query_threshold: float = Field(
+        default=0.7,
+        description="For complex queries, explore deeper below this confidence.",
+    )
+    exploration_budget: int = Field(
+        default=1,
+        description="Number of LLM-driven exploration rounds during deep recall.",
+    )
+    query_analysis_threshold: int = Field(
+        default=200,
+        description="Queries shorter than this skip LLM analysis during deep recall.",
+    )
+    read_only: bool = Field(
+        default=False,
+        description="If True, remember() and remember_many() are silent no-ops.",
+    )
+
+    _config: MemoryConfig = PrivateAttr()
+    _llm_instance: BaseLLM | None = PrivateAttr(default=None)
+    _embedder_instance: Any = PrivateAttr(default=None)
+    _storage: StorageBackend = PrivateAttr()
+    _save_pool: ThreadPoolExecutor = PrivateAttr(
+        default_factory=lambda: ThreadPoolExecutor(
+            max_workers=1, thread_name_prefix="memory-save"
+        )
+    )
+    _pending_saves: list[Future[Any]] = PrivateAttr(default_factory=list)
+    _pending_lock: threading.Lock = PrivateAttr(default_factory=threading.Lock)
+
+    def model_post_init(self, __context: Any) -> None:
+        """Initialize runtime state from field values."""
        self._config = MemoryConfig(
-            recency_weight=recency_weight,
-            semantic_weight=semantic_weight,
-            importance_weight=importance_weight,
-            recency_half_life_days=recency_half_life_days,
-            consolidation_threshold=consolidation_threshold,
-            consolidation_limit=consolidation_limit,
-            default_importance=default_importance,
-            confidence_threshold_high=confidence_threshold_high,
-            confidence_threshold_low=confidence_threshold_low,
-            complex_query_threshold=complex_query_threshold,
-            exploration_budget=exploration_budget,
-            query_analysis_threshold=query_analysis_threshold,
+            recency_weight=self.recency_weight,
+            semantic_weight=self.semantic_weight,
+            importance_weight=self.importance_weight,
+            recency_half_life_days=self.recency_half_life_days,
+            consolidation_threshold=self.consolidation_threshold,
+            consolidation_limit=self.consolidation_limit,
+            default_importance=self.default_importance,
+            confidence_threshold_high=self.confidence_threshold_high,
+            confidence_threshold_low=self.confidence_threshold_low,
+            complex_query_threshold=self.complex_query_threshold,
+            exploration_budget=self.exploration_budget,
+            query_analysis_threshold=self.query_analysis_threshold,
        )

-        # Store raw config for lazy initialization. LLM and embedder are only
-        # built on first access so that Memory() never fails at construction
-        # time (e.g. when auto-created by Flow without an API key set).
-        self._llm_config: BaseLLM | str = llm
-        self._llm_instance: BaseLLM | None = None if isinstance(llm, str) else llm
-        self._embedder_config: Any = embedder
-        self._embedder_instance: Any = (
-            embedder
-            if (embedder is not None and not isinstance(embedder, dict))
+        self._llm_instance = None if isinstance(self.llm, str) else self.llm
+        self._embedder_instance = (
+            self.embedder
+            if (self.embedder is not None and not isinstance(self.embedder, dict))
            else None
        )

-        if isinstance(storage, str):
+        if isinstance(self.storage, str):
            from crewai.memory.storage.lancedb_storage import LanceDBStorage

-            self._storage = LanceDBStorage() if storage == "lancedb" else LanceDBStorage(path=storage)
+            self._storage = (
+                LanceDBStorage()
+                if self.storage == "lancedb"
+                else LanceDBStorage(path=self.storage)
+            )
        else:
-            self._storage = storage
-
-        # Background save queue. max_workers=1 serializes saves to avoid
-        # concurrent storage mutations (two saves finding the same similar
-        # record and both trying to update/delete it). Within each save,
-        # the parallel LLM calls still run on their own thread pool.
-        self._save_pool = ThreadPoolExecutor(
-            max_workers=1, thread_name_prefix="memory-save"
-        )
-        self._pending_saves: list[Future[Any]] = []
-        self._pending_lock = threading.Lock()
+            self._storage = self.storage

    _MEMORY_DOCS_URL = "https://docs.crewai.com/concepts/memory"

@@ -173,11 +183,7 @@ class Memory:
            from crewai.llm import LLM

            try:
-                model_name = (
-                    self._llm_config
-                    if isinstance(self._llm_config, str)
-                    else str(self._llm_config)
-                )
+                model_name = self.llm if isinstance(self.llm, str) else str(self.llm)
                self._llm_instance = LLM(model=model_name)
            except Exception as e:
                raise RuntimeError(
@@ -197,8 +203,8 @@ class Memory:
        """Lazy embedder initialization -- only created when first needed."""
        if self._embedder_instance is None:
            try:
-                if isinstance(self._embedder_config, dict):
-                    self._embedder_instance = build_embedder(self._embedder_config)
+                if isinstance(self.embedder, dict):
+                    self._embedder_instance = build_embedder(self.embedder)
                else:
                    self._embedder_instance = _default_embedder()
            except Exception as e:
@@ -356,7 +362,7 @@ class Memory:
        Raises:
            Exception: On save failure (events emitted).
        """
-        if self._read_only:
+        if self.read_only:
            return None
        _source_type = "unified_memory"
        try:
@@ -444,7 +450,7 @@ class Memory:
        Returns:
            Empty list (records are not available until the background save completes).
        """
-        if not contents or self._read_only:
+        if not contents or self.read_only:
            return []

        self._submit_save(
--- a/lib/crewai/src/crewai/rag/chromadb/factory.py
+++ b/lib/crewai/src/crewai/rag/chromadb/factory.py
@@ -1,13 +1,12 @@
 """Factory functions for creating ChromaDB clients."""

-from hashlib import md5
 import os

 from chromadb import PersistentClient
-import portalocker

 from crewai.rag.chromadb.client import ChromaDBClient
 from crewai.rag.chromadb.config import ChromaDBConfig
+from crewai.utilities.lock_store import lock


 def create_client(config: ChromaDBConfig) -> ChromaDBClient:
@@ -25,10 +24,8 @@ def create_client(config: ChromaDBConfig) -> ChromaDBClient:

    persist_dir = config.settings.persist_directory
    os.makedirs(persist_dir, exist_ok=True)
-    lock_id = md5(persist_dir.encode(), usedforsecurity=False).hexdigest()
-    lockfile = os.path.join(persist_dir, f"chromadb-{lock_id}.lock")

-    with portalocker.Lock(lockfile):
+    with lock(f"chromadb:{persist_dir}"):
        client = PersistentClient(
            path=persist_dir,
            settings=config.settings,
--- a/lib/crewai/src/crewai/telemetry/init.py
+++ b/lib/crewai/src/crewai/telemetry/init.py
@@ -1,5 +1,4 @@
 from crewai.telemetry.telemetry import Telemetry


-
 __all__ = ["Telemetry"]
--- a/lib/crewai/src/crewai/tools/init.py
+++ b/lib/crewai/src/crewai/tools/init.py
@@ -1,7 +1,6 @@
 from crewai.tools.base_tool import BaseTool, EnvVar, tool


-
 __all__ = [
    "BaseTool",
    "EnvVar",
--- a/lib/crewai/src/crewai/tools/mcp_native_tool.py
+++ b/lib/crewai/src/crewai/tools/mcp_native_tool.py
@@ -1,55 +1,30 @@
 """Native MCP tool wrapper for CrewAI agents.

-This module provides a tool wrapper that reuses existing MCP client sessions
-for better performance and connection management.
+This module provides a tool wrapper that creates a fresh MCP client for every
+invocation, ensuring safe parallel execution even when the same tool is called
+concurrently by the executor.
 """

 import asyncio
-import threading
+from collections.abc import Callable
 from typing import Any

 from crewai.tools import BaseTool


-_mcp_loop: asyncio.AbstractEventLoop | None = None
-_mcp_loop_thread: threading.Thread | None = None
-_mcp_loop_lock = threading.Lock()
-
-
-def _get_mcp_event_loop() -> asyncio.AbstractEventLoop:
-    """Return (and lazily start) a persistent event loop for MCP operations.
-
-    All MCP SDK transports use anyio task groups whose cancel scopes must be
-    entered and exited on the same event loop / task.  By funnelling every
-    MCP call through one long-lived loop we avoid the "exit cancel scope in
-    a different task" errors that happen when asyncio.run() creates a
-    throwaway loop per call.
-    """
-    global _mcp_loop, _mcp_loop_thread
-    with _mcp_loop_lock:
-        if _mcp_loop is None or _mcp_loop.is_closed():
-            _mcp_loop = asyncio.new_event_loop()
-            _mcp_loop_thread = threading.Thread(
-                target=_mcp_loop.run_forever, daemon=True, name="mcp-event-loop"
-            )
-            _mcp_loop_thread.start()
-    return _mcp_loop
-
-
 class MCPNativeTool(BaseTool):
-    """Native MCP tool that reuses client sessions.
+    """Native MCP tool that creates a fresh client per invocation.

-    This tool wrapper is used when agents connect to MCP servers using
-    structured configurations. It reuses existing client sessions for
-    better performance and proper connection lifecycle management.
-
-    Unlike MCPToolWrapper which connects on-demand, this tool uses
-    a shared MCP client instance that maintains a persistent connection.
+    A ``client_factory`` callable produces an independent ``MCPClient`` +
+    transport for every ``_run_async`` call.  This guarantees that parallel
+    invocations -- whether of the *same* tool or *different* tools from the
+    same server -- never share mutable connection state (which would cause
+    anyio cancel-scope errors).
    """

    def __init__(
        self,
-        mcp_client: Any,
+        client_factory: Callable[[], Any],
        tool_name: str,
        tool_schema: dict[str, Any],
        server_name: str,
@@ -58,7 +33,7 @@ class MCPNativeTool(BaseTool):
        """Initialize native MCP tool.

        Args:
-            mcp_client: MCPClient instance with active session.
+            client_factory: Zero-arg callable that returns a new MCPClient.
            tool_name: Name of the tool (may be prefixed).
            tool_schema: Schema information for the tool.
            server_name: Name of the MCP server for prefixing.
@@ -80,15 +55,10 @@ class MCPNativeTool(BaseTool):

        super().__init__(**kwargs)

-        self._mcp_client = mcp_client
+        self._client_factory = client_factory
        self._original_tool_name = original_tool_name or tool_name
        self._server_name = server_name

-    @property
-    def mcp_client(self) -> Any:
-        """Get the MCP client instance."""
-        return self._mcp_client
-
    @property
    def original_tool_name(self) -> str:
        """Get the original tool name."""
@@ -102,21 +72,25 @@ class MCPNativeTool(BaseTool):
    def _run(self, **kwargs) -> str:
        """Execute tool using the MCP client session.

-        Submits work to a persistent background event loop so that all MCP
-        transport context managers (which rely on anyio cancel scopes) stay
-        on the same loop and task throughout their lifecycle.
-
        Args:
            **kwargs: Arguments to pass to the MCP tool.

        Returns:
            Result from the MCP tool execution.
        """
-        loop = _get_mcp_event_loop()
-        timeout = self._mcp_client.connect_timeout + self._mcp_client.execution_timeout
        try:
-            future = asyncio.run_coroutine_threadsafe(self._run_async(**kwargs), loop)
-            return future.result(timeout=timeout)
+            try:
+                asyncio.get_running_loop()
+
+                import concurrent.futures
+
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    coro = self._run_async(**kwargs)
+                    future = executor.submit(asyncio.run, coro)
+                    return future.result()
+            except RuntimeError:
+                return asyncio.run(self._run_async(**kwargs))
+
        except Exception as e:
            raise RuntimeError(
                f"Error executing MCP tool {self.original_tool_name}: {e!s}"
@@ -125,31 +99,22 @@ class MCPNativeTool(BaseTool):
    async def _run_async(self, **kwargs) -> str:
        """Async implementation of tool execution.

+        A fresh ``MCPClient`` is created for every invocation so that
+        concurrent calls never share transport or session state.
+
        Args:
            **kwargs: Arguments to pass to the MCP tool.

        Returns:
            Result from the MCP tool execution.
        """
-        if not self._mcp_client.connected:
-            await self._mcp_client.connect()
+        client = self._client_factory()
+        await client.connect()

        try:
-            result = await self._mcp_client.call_tool(self.original_tool_name, kwargs)
-        except Exception as e:
-            error_str = str(e).lower()
-            if (
-                "not connected" in error_str
-                or "connection" in error_str
-                or "send" in error_str
-            ):
-                await self._mcp_client.disconnect()
-                await self._mcp_client.connect()
-                result = await self._mcp_client.call_tool(
-                    self.original_tool_name, kwargs
-                )
-            else:
-                raise
+            result = await client.call_tool(self.original_tool_name, kwargs)
+        finally:
+            await client.disconnect()

        if isinstance(result, str):
            return result
--- a/lib/crewai/src/crewai/tools/memory_tools.py
+++ b/lib/crewai/src/crewai/tools/memory_tools.py
@@ -49,7 +49,7 @@ class RecallMemoryTool(BaseTool):
        all_lines: list[str] = []
        seen_ids: set[str] = set()
        for query in queries:
-            matches = self.memory.recall(query)
+            matches = self.memory.recall(query, limit=20)
            for m in matches:
                if m.record.id not in seen_ids:
                    seen_ids.add(m.record.id)
@@ -121,7 +121,7 @@ def create_memory_tools(memory: Any) -> list[BaseTool]:
            description=i18n.tools("recall_memory"),
        ),
    ]
-    if not getattr(memory, "_read_only", False):
+    if not memory.read_only:
        tools.append(
            RememberTool(
                memory=memory,
--- a/lib/crewai/src/crewai/translations/en.json
+++ b/lib/crewai/src/crewai/translations/en.json
@@ -7,7 +7,7 @@
  "slices": {
    "observation": "\nObservation:",
    "task": "\nCurrent Task: {input}\n\nBegin! This is VERY important to you, use the tools available and give your best Final Answer, your job depends on it!\n\nThought:",
-    "memory": "\n\n# Useful context: \n{memory}",
+    "memory": "\n\n# Memories from past conversations:\n{memory}\n\nIMPORTANT: The memories above are an automatic selection and may be INCOMPLETE. If the task involves counting, listing, or summing items (e.g. 'how many', 'total', 'list all'), you MUST use the Search memory tool with several different queries before answering — do NOT rely solely on the memories shown above. Enumerate each distinct item you find before giving a final count.",
    "role_playing": "You are {role}. {backstory}\nYour personal goal is: {goal}",
    "tools": "\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple JSON object, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce all necessary information is gathered, return the following format:\n\n```\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n```",
    "no_tools": "",
@@ -60,12 +60,12 @@
      "description": "See image to understand its content, you can optionally ask a question about the image",
      "default_action": "Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe."
    },
-    "recall_memory": "Search through the team's shared memory for relevant information. Pass one or more queries to search for multiple things at once. Use this when you need to find facts, decisions, preferences, or past results that may have been stored previously.",
+    "recall_memory": "Search through the team's shared memory for relevant information. Pass one or more queries to search for multiple things at once. Use this when you need to find facts, decisions, preferences, or past results that may have been stored previously. IMPORTANT: For questions that require counting, summing, or listing items across multiple conversations (e.g. 'how many X', 'total Y', 'list all Z'), you MUST search multiple times with different phrasings to ensure you find ALL relevant items before giving a final count or total. Do not rely on a single search — items may be described differently across conversations.",
    "save_to_memory": "Store one or more important facts, decisions, observations, or lessons in memory so they can be recalled later by you or other agents. Pass multiple items at once when you have several things worth remembering."
  },
  "memory": {
    "query_system": "You analyze a query for searching memory.\nGiven the query and available scopes, output:\n1. keywords: Key entities or keywords that can be used to filter by category.\n2. suggested_scopes: Which available scopes are most relevant (empty for all).\n3. complexity: 'simple' or 'complex'.\n4. recall_queries: 1-3 short, targeted search phrases distilled from the query. Each should be a concise phrase optimized for semantic vector search. If the query is already short and focused, return it as-is in a single-item list. For long task descriptions, extract the distinct things worth searching for.\n5. time_filter: If the query references a time period (like 'last week', 'yesterday', 'in January'), return an ISO 8601 date string for the earliest relevant date (e.g. '2026-02-01'). Return null if no time constraint is implied.",
-    "extract_memories_system": "You extract discrete, reusable memory statements from raw content (e.g. a task description and its result).\n\nFor the given content, output a list of memory statements. Each memory must:\n- Be one clear sentence or short statement\n- Be understandable without the original context\n- Capture a decision, fact, outcome, preference, lesson, or observation worth remembering\n- NOT be a vague summary or a restatement of the task description\n- NOT duplicate the same idea in different words\n\nIf there is nothing worth remembering (e.g. empty result, no decisions or facts), return an empty list.\nOutput a JSON object with a single key \"memories\" whose value is a list of strings.",
+    "extract_memories_system": "You extract discrete, reusable memory statements from raw content (e.g. a task description and its result, or a conversation between a user and an assistant).\n\nFor the given content, output a list of memory statements. Each memory must:\n- Be one clear sentence or short statement\n- Be understandable without the original context\n- Capture a decision, fact, outcome, preference, lesson, or observation worth remembering\n- NOT be a vague summary or a restatement of the task description\n- NOT duplicate the same idea in different words\n\nWhen the content is a conversation, pay special attention to facts stated by the user (first-person statements). These personal facts are HIGH PRIORITY and must always be extracted:\n- What the user did, bought, made, visited, attended, or completed\n- Names of people, pets, places, brands, and specific items the user mentions\n- Quantities, durations, dates, and measurements the user states\n- Subordinate clauses and casual asides often contain important personal details (e.g. \"by the way, it took me 4 hours\" or \"my Golden Retriever Max\")\n\nPreserve exact names and numbers — never generalize (e.g. keep \"lavender gin fizz\" not just \"cocktail\", keep \"12 largemouth bass\" not just \"fish caught\", keep \"Golden Retriever\" not just \"dog\").\n\nAdditional extraction rules:\n- Presupposed facts: When the user reveals a fact indirectly in a question (e.g. \"What collar suits a Golden Retriever like Max?\" presupposes Max is a Golden Retriever), extract that fact as a separate memory.\n- Date precision: Always preserve the full date including day-of-month when stated (e.g. \"February 14th\" not just \"February\", \"March 5\" not just \"March\").\n- Life events in passing: When the user mentions a life event (birth, wedding, graduation, move, adoption) while discussing something else, extract the life event as its own memory (e.g. \"my friend David had a baby boy named Jasper\" is a birth fact, even if mentioned while planning to send congratulations).\n\nIf there is nothing worth remembering (e.g. empty result, no decisions or facts), return an empty list.\nOutput a JSON object with a single key \"memories\" whose value is a list of strings.",
    "extract_memories_user": "Content:\n{content}\n\nExtract memory statements as described. Return structured output.",
    "query_user": "Query: {query}\n\nAvailable scopes: {available_scopes}\n{scope_desc}\n\nReturn the analysis as structured output.",
    "save_system": "You analyze content to be stored in a hierarchical memory system.\nGiven the content and the existing scopes and categories, output:\n1. suggested_scope: The best matching existing scope path, or a new path if none fit (use / for root).\n2. categories: A list of categories (reuse existing when relevant, add new ones if needed).\n3. importance: A number from 0.0 to 1.0 indicating how significant this memory is.\n4. extracted_metadata: A JSON object with any entities, dates, or topics you can extract.",
@@ -74,9 +74,28 @@
    "consolidation_user": "New content to consider storing:\n{new_content}\n\nExisting similar memories:\n{records_summary}\n\nReturn the consolidation plan as structured output."
  },
  "reasoning": {
-    "initial_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are creating a strategic plan for a task that requires your expertise and unique perspective.",
-    "refine_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are refining a strategic plan for a task that requires your expertise and unique perspective.",
-    "create_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou have been assigned the following task:\n{description}\n\nExpected output:\n{expected_output}\n\nAvailable tools: {tools}\n\nBefore executing this task, create a detailed plan that leverages your expertise as {role} and outlines:\n1. Your understanding of the task from your professional perspective\n2. The key steps you'll take to complete it, drawing on your background and skills\n3. How you'll approach any challenges that might arise, considering your expertise\n4. How you'll strategically use the available tools based on your experience, exactly what tools to use and how to use them\n5. The expected outcome and how it aligns with your goal\n\nAfter creating your plan, assess whether you feel ready to execute the task or if you could do better.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan because [specific reason].\"",
-    "refine_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou created the following plan for this task:\n{current_plan}\n\nHowever, you indicated that you're not ready to execute the task yet.\n\nPlease refine your plan further, drawing on your expertise as {role} to address any gaps or uncertainties. As you refine your plan, be specific about which available tools you will use, how you will use them, and why they are the best choices for each step. Clearly outline your tool usage strategy as part of your improved plan.\n\nAfter refining your plan, assess whether you feel ready to execute the task.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan further because [specific reason].\""
+    "initial_plan": "You are {role}. Create a focused execution plan using only the essential steps needed.",
+    "refine_plan": "You are {role}. Refine your plan to address the specific gap while keeping it minimal.",
+    "create_plan_prompt": "You are {role}.\n\nTask: {description}\n\nExpected output: {expected_output}\n\nAvailable tools: {tools}\n\nCreate a focused plan with ONLY the essential steps needed. Most tasks require just 2-5 steps. Do NOT pad with unnecessary steps like \"review\", \"verify\", \"document\", or \"finalize\" unless explicitly required.\n\nFor each step, specify the action and which tool to use (if any).\n\nConclude with:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan because [specific reason].\"",
+    "refine_plan_prompt": "Your plan:\n{current_plan}\n\nYou indicated you're not ready. Address the specific gap while keeping the plan minimal.\n\nConclude with READY or NOT READY."
+  },
+  "planning": {
+    "system_prompt": "You are a strategic planning assistant. Create concrete, executable plans where every step produces a verifiable result.",
+    "create_plan_prompt": "Create an execution plan for the following task:\n\n## Task\n{description}\n\n## Expected Output\n{expected_output}\n\n## Available Tools\n{tools}\n\n## Planning Principles\nFocus on CONCRETE, EXECUTABLE steps. Each step must clearly state WHAT ACTION to take and HOW to verify it succeeded. The number of steps should match the task complexity. Hard limit: {max_steps} steps.\n\n## Rules:\n- Each step must have a clear DONE criterion\n- Do NOT group unrelated actions: if steps can fail independently, keep them separate\n- NO standalone \"thinking\" or \"planning\" steps — act, don't just observe\n- The last step must produce the required output\n\nAfter your plan, state READY or NOT READY.",
+    "refine_plan_prompt": "Your previous plan:\n{current_plan}\n\nYou indicated you weren't ready. Refine your plan to address the specific gap.\n\nKeep the plan minimal - only add steps that directly address the issue.\n\nConclude with READY or NOT READY as before.",
+    "observation_system_prompt": "You are a Planning Agent observing execution progress. After each step completes, you analyze what happened and decide whether the remaining plan is still valid.\n\nReason step-by-step about:\n1. Did this step produce a concrete, verifiable result? (file created, command succeeded, service running, etc.) — or did it only explore without acting?\n2. What new information was learned from this step's result?\n3. Whether the remaining steps still make sense given this new information\n4. What refinements, if any, are needed for upcoming steps\n5. Whether the overall goal has already been achieved\n\nCritical: mark `step_completed_successfully=false` if:\n- The step result is only exploratory (ls, pwd, cat) without producing the required artifact or action\n- A command returned a non-zero exit code and the error was not recovered\n- The step description required creating/building/starting something and the result shows it was not done\n\nBe conservative about triggering full replans — only do so when the remaining plan is fundamentally wrong, not just suboptimal.\n\nIMPORTANT: Set step_completed_successfully=false if:\n- The step's stated goal was NOT achieved (even if other things were done)\n- The first meaningful action returned an error (file not found, command not found, etc.)\n- The result is exploration/discovery output rather than the concrete action the step required\n- The step ran out of attempts without producing the required output\nSet needs_full_replan=true if the current plan's remaining steps reference paths or state that don't exist yet and need to be created first.",
+    "observation_user_prompt": "## Original task\n{task_description}\n\n## Expected output\n{task_goal}\n{completed_summary}\n\n## Just completed step {step_number}\nDescription: {step_description}\nResult: {step_result}\n{remaining_summary}\n\nAnalyze this step's result and provide your observation.",
+    "step_executor_system_prompt": "You are {role}. {backstory}\n\nYour goal: {goal}\n\nYou are executing ONE specific step in a larger plan. Your ONLY job is to fully complete this step — not to plan ahead.\n\nKey rules:\n- **ACT FIRST.** Execute the primary action of this step immediately. Do NOT read or explore files before attempting the main action unless exploration IS the step's goal.\n- If the step says 'run X', run X NOW. If it says 'write file Y', write Y NOW.\n- If the step requires producing an output file (e.g. /app/move.txt, report.jsonl, summary.csv), you MUST write that file using a tool call — do NOT just state the answer in text.\n- You may use tools MULTIPLE TIMES. After each tool use, check the result. If it failed, try a different approach.\n- Only output your Final Answer AFTER the concrete outcome is verified (file written, build succeeded, command exited 0).\n- If a command is not found or a path does not exist, fix it (different PATH, install missing deps, use absolute paths).\n- Do NOT spend more than 3 tool calls on exploration/analysis before attempting the primary action.{tools_section}",
+    "step_executor_tools_section": "\n\nAvailable tools: {tool_names}\n\nYou may call tools multiple times in sequence. Use this format for EACH tool call:\nThought: <what you observed and what you will try next>\nAction: <tool_name>\nAction Input: <input>\n\nAfter observing each result, decide: is the step complete? If yes:\nThought: The step is done because <evidence>\nFinal Answer: <concise summary of what was accomplished and the key result>",
+    "step_executor_user_prompt": "## Current Step\n{step_description}",
+    "step_executor_suggested_tool": "\nSuggested tool: {tool_to_use}",
+    "step_executor_context_header": "\n## Context from previous steps:",
+    "step_executor_context_entry": "Step {step_number} result: {result}",
+    "step_executor_complete_step": "\n**Execute the primary action of this step NOW.** If the step requires writing a file, write it. If it requires running a command, run it. Verify the outcome with a follow-up tool call, then give your Final Answer. Your Final Answer must confirm what was DONE (file created at path X, command succeeded), not just what should be done.",
+    "todo_system_prompt": "You are {role}. Your goal: {goal}\n\nYou are executing a specific step in a multi-step plan. Focus only on completing the current step. Use the suggested tool if one is provided. Be concise and provide clear results that can be used by subsequent steps.",
+    "synthesis_system_prompt": "You are {role}. You have completed a multi-step task. Synthesize the results from all steps into a single, coherent final response that directly addresses the original task. Do NOT list step numbers or say 'Step 1 result'. Produce a clean, polished answer as if you did it all at once.",
+    "synthesis_user_prompt": "## Original Task\n{task_description}\n\n## Results from each step\n{combined_steps}\n\nSynthesize these results into a single, coherent final answer.",
+    "replan_enhancement_prompt": "\n\nIMPORTANT: Previous execution attempt did not fully succeed. Please create a revised plan that accounts for the following context from the previous attempt:\n\n{previous_context}\n\nConsider:\n1. What steps succeeded and can be built upon\n2. What steps failed and why they might have failed\n3. Alternative approaches that might work better\n4. Whether dependencies need to be restructured",
+    "step_executor_task_context": "## Task Context\nThe following is the full task you are helping complete. Keep this in mind — especially any required output files, exact filenames, and expected formats.\n\n{task_context}\n\n---\n"
  }
-}
+}
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 import asyncio
 from collections.abc import Callable, Sequence
 import concurrent.futures
+from dataclasses import dataclass, field
+from datetime import datetime
 import inspect
 import json
 import re
@@ -39,6 +41,7 @@ from crewai.utilities.types import LLMMessage
 if TYPE_CHECKING:
    from crewai.agent import Agent
    from crewai.agents.crew_agent_executor import CrewAgentExecutor
+    from crewai.agents.tools_handler import ToolsHandler
    from crewai.experimental.agent_executor import AgentExecutor
    from crewai.lite_agent import LiteAgent
    from crewai.llm import LLM
@@ -335,6 +338,66 @@ def enforce_rpm_limit(
        request_within_rpm_limit()


+def _prepare_llm_call(
+    executor_context: CrewAgentExecutor | AgentExecutor | LiteAgent | None,
+    messages: list[LLMMessage],
+    printer: Printer,
+    verbose: bool = True,
+) -> list[LLMMessage]:
+    """Shared pre-call logic: run before hooks and resolve messages.
+
+    Args:
+        executor_context: Optional executor context for hook invocation.
+        messages: The messages to send to the LLM.
+        printer: Printer instance for output.
+        verbose: Whether to print output.
+
+    Returns:
+        The resolved messages list (may come from executor_context).
+
+    Raises:
+        ValueError: If a before hook blocks the call.
+    """
+    if executor_context is not None:
+        if not _setup_before_llm_call_hooks(executor_context, printer, verbose=verbose):
+            raise ValueError("LLM call blocked by before_llm_call hook")
+        messages = executor_context.messages
+    return messages
+
+
+def _validate_and_finalize_llm_response(
+    answer: Any,
+    executor_context: CrewAgentExecutor | AgentExecutor | LiteAgent | None,
+    printer: Printer,
+    verbose: bool = True,
+) -> str | BaseModel | Any:
+    """Shared post-call logic: validate response and run after hooks.
+
+    Args:
+        answer: The raw LLM response.
+        executor_context: Optional executor context for hook invocation.
+        printer: Printer instance for output.
+        verbose: Whether to print output.
+
+    Returns:
+        The potentially modified response.
+
+    Raises:
+        ValueError: If the response is None or empty.
+    """
+    if not answer:
+        if verbose:
+            printer.print(
+                content="Received None or empty response from LLM call.",
+                color="red",
+            )
+        raise ValueError("Invalid response from LLM call - None or empty.")
+
+    return _setup_after_llm_call_hooks(
+        executor_context, answer, printer, verbose=verbose
+    )
+
+
 def get_llm_response(
    llm: LLM | BaseLLM,
    messages: list[LLMMessage],
@@ -371,11 +434,7 @@ def get_llm_response(
        Exception: If an error occurs.
        ValueError: If the response is None or empty.
    """
-
-    if executor_context is not None:
-        if not _setup_before_llm_call_hooks(executor_context, printer, verbose=verbose):
-            raise ValueError("LLM call blocked by before_llm_call hook")
-        messages = executor_context.messages
+    messages = _prepare_llm_call(executor_context, messages, printer, verbose=verbose)

    try:
        answer = llm.call(
@@ -389,16 +448,9 @@ def get_llm_response(
        )
    except Exception as e:
        raise e
-    if not answer:
-        if verbose:
-            printer.print(
-                content="Received None or empty response from LLM call.",
-                color="red",
-            )
-        raise ValueError("Invalid response from LLM call - None or empty.")

-    return _setup_after_llm_call_hooks(
-        executor_context, answer, printer, verbose=verbose
+    return _validate_and_finalize_llm_response(
+        answer, executor_context, printer, verbose=verbose
    )


@@ -428,6 +480,7 @@ async def aget_llm_response(
        from_agent: Optional agent context for the LLM call.
        response_model: Optional Pydantic model for structured outputs.
        executor_context: Optional executor context for hook invocation.
+        verbose: Whether to print output.

    Returns:
        The response from the LLM as a string, Pydantic model (when response_model is provided),
@@ -437,10 +490,7 @@ async def aget_llm_response(
        Exception: If an error occurs.
        ValueError: If the response is None or empty.
    """
-    if executor_context is not None:
-        if not _setup_before_llm_call_hooks(executor_context, printer, verbose=verbose):
-            raise ValueError("LLM call blocked by before_llm_call hook")
-        messages = executor_context.messages
+    messages = _prepare_llm_call(executor_context, messages, printer, verbose=verbose)

    try:
        answer = await llm.acall(
@@ -454,16 +504,9 @@ async def aget_llm_response(
        )
    except Exception as e:
        raise e
-    if not answer:
-        if verbose:
-            printer.print(
-                content="Received None or empty response from LLM call.",
-                color="red",
-            )
-        raise ValueError("Invalid response from LLM call - None or empty.")

-    return _setup_after_llm_call_hooks(
-        executor_context, answer, printer, verbose=verbose
+    return _validate_and_finalize_llm_response(
+        answer, executor_context, printer, verbose=verbose
    )


@@ -1157,6 +1200,386 @@ def extract_tool_call_info(
    return None


+def is_tool_call_list(response: list[Any]) -> bool:
+    """Check if a response from the LLM is a list of tool calls.
+
+    Supports OpenAI, Anthropic, Bedrock, and Gemini formats.
+
+    Args:
+        response: The response to check.
+
+    Returns:
+        True if the response appears to be a list of tool calls.
+    """
+    if not response:
+        return False
+    first_item = response[0]
+    # OpenAI-style
+    if hasattr(first_item, "function") or (
+        isinstance(first_item, dict) and "function" in first_item
+    ):
+        return True
+    # Anthropic-style (ToolUseBlock)
+    if hasattr(first_item, "type") and getattr(first_item, "type", None) == "tool_use":
+        return True
+    if hasattr(first_item, "name") and hasattr(first_item, "input"):
+        return True
+    # Bedrock-style
+    if isinstance(first_item, dict) and "name" in first_item and "input" in first_item:
+        return True
+    # Gemini-style
+    if hasattr(first_item, "function_call") and first_item.function_call:
+        return True
+    return False
+
+
+def check_native_tool_support(llm: Any, original_tools: list[BaseTool] | None) -> bool:
+    """Check if the LLM supports native function calling and tools are available.
+
+    Args:
+        llm: The LLM instance.
+        original_tools: Original BaseTool instances.
+
+    Returns:
+        True if native function calling is supported and tools exist.
+    """
+    return (
+        hasattr(llm, "supports_function_calling")
+        and callable(getattr(llm, "supports_function_calling", None))
+        and llm.supports_function_calling()
+        and bool(original_tools)
+    )
+
+
+def setup_native_tools(
+    original_tools: list[BaseTool],
+) -> tuple[
+    list[dict[str, Any]],
+    dict[str, Callable[..., Any]],
+    dict[str, BaseTool | CrewStructuredTool],
+]:
+    """Convert tools to OpenAI schema format for native function calling.
+
+    Args:
+        original_tools: Original BaseTool instances.
+
+    Returns:
+        Tuple of (openai_tools_schema, available_functions_dict, tool_name_mapping).
+    """
+    return convert_tools_to_openai_schema(original_tools)
+
+
+def build_tool_calls_assistant_message(
+    tool_calls: list[Any],
+) -> tuple[LLMMessage | None, list[dict[str, Any]]]:
+    """Build an assistant message containing tool call reports.
+
+    Extracts info from each tool call, builds the standard assistant message
+    format, and preserves raw Gemini parts when applicable.
+
+    Args:
+        tool_calls: Raw tool call objects from the LLM response.
+
+    Returns:
+        Tuple of (assistant_message, tool_calls_to_report).
+        assistant_message is None if no valid tool calls found.
+    """
+    tool_calls_to_report: list[dict[str, Any]] = []
+    for tool_call in tool_calls:
+        info = extract_tool_call_info(tool_call)
+        if not info:
+            continue
+        call_id, func_name, func_args = info
+        tool_calls_to_report.append(
+            {
+                "id": call_id,
+                "type": "function",
+                "function": {
+                    "name": func_name,
+                    "arguments": func_args
+                    if isinstance(func_args, str)
+                    else json.dumps(func_args),
+                },
+            }
+        )
+
+    if not tool_calls_to_report:
+        return None, []
+
+    assistant_message: LLMMessage = {
+        "role": "assistant",
+        "content": None,
+        "tool_calls": tool_calls_to_report,
+    }
+    # Preserve raw parts for Gemini compatibility
+    if all(type(tc).__qualname__ == "Part" for tc in tool_calls):
+        assistant_message["raw_tool_call_parts"] = list(tool_calls)
+
+    return assistant_message, tool_calls_to_report
+
+
+@dataclass
+class NativeToolCallResult:
+    """Result from executing a single native tool call."""
+
+    call_id: str
+    func_name: str
+    result: str
+    from_cache: bool = False
+    result_as_answer: bool = False
+    tool_message: LLMMessage = field(default_factory=dict)  # type: ignore[assignment]
+
+
+def execute_single_native_tool_call(
+    tool_call: Any,
+    *,
+    available_functions: dict[str, Callable[..., Any]],
+    original_tools: list[BaseTool],
+    structured_tools: list[CrewStructuredTool] | None,
+    tools_handler: ToolsHandler | None,
+    agent: Agent | None,
+    task: Task | None,
+    crew: Any | None,
+    event_source: Any,
+    printer: Printer | None = None,
+    verbose: bool = False,
+) -> NativeToolCallResult:
+    """Execute a single native tool call with full lifecycle management.
+
+    Handles: arg parsing, tool lookup, max-usage check, cache read/write,
+    before/after hooks, event emission, and result_as_answer detection.
+
+    Args:
+        tool_call: Raw tool call object from the LLM.
+        available_functions: Map of sanitized tool name -> callable.
+        original_tools: Original BaseTool list (for cache_function, result_as_answer).
+        structured_tools: Structured tools list (for hook context).
+        tools_handler: Optional handler with cache.
+        agent: The agent instance.
+        task: The current task.
+        crew: The crew instance.
+        event_source: The object to use as event emitter source.
+        printer: Optional printer for verbose logging.
+        verbose: Whether to print verbose output.
+
+    Returns:
+        NativeToolCallResult with all execution details.
+    """
+    from crewai.events.event_bus import crewai_event_bus
+    from crewai.events.types.tool_usage_events import (
+        ToolUsageErrorEvent,
+        ToolUsageFinishedEvent,
+        ToolUsageStartedEvent,
+    )
+    from crewai.hooks.tool_hooks import (
+        ToolCallHookContext,
+        get_after_tool_call_hooks,
+        get_before_tool_call_hooks,
+    )
+
+    info = extract_tool_call_info(tool_call)
+    if not info:
+        return NativeToolCallResult(
+            call_id="", func_name="", result="Unrecognized tool call format"
+        )
+
+    call_id, func_name, func_args = info
+
+    # Parse arguments
+    if isinstance(func_args, str):
+        try:
+            args_dict = json.loads(func_args)
+        except json.JSONDecodeError:
+            args_dict = {}
+    else:
+        args_dict = func_args
+
+    agent_key = getattr(agent, "key", "unknown") if agent else "unknown"
+
+    # Find original tool for cache_function and result_as_answer
+    original_tool: BaseTool | None = None
+    for tool in original_tools:
+        if sanitize_tool_name(tool.name) == func_name:
+            original_tool = tool
+            break
+
+    # Check max usage count
+    max_usage_reached = False
+    if (
+        original_tool
+        and original_tool.max_usage_count is not None
+        and original_tool.current_usage_count >= original_tool.max_usage_count
+    ):
+        max_usage_reached = True
+
+    # Check cache
+    from_cache = False
+    input_str = json.dumps(args_dict) if args_dict else ""
+    result = "Tool not found"
+
+    if tools_handler and tools_handler.cache:
+        cached_result = tools_handler.cache.read(tool=func_name, input=input_str)
+        if cached_result is not None:
+            result = (
+                str(cached_result)
+                if not isinstance(cached_result, str)
+                else cached_result
+            )
+            from_cache = True
+
+    # Emit tool started event
+    started_at = datetime.now()
+    crewai_event_bus.emit(
+        event_source,
+        event=ToolUsageStartedEvent(
+            tool_name=func_name,
+            tool_args=args_dict,
+            from_agent=agent,
+            from_task=task,
+            agent_key=agent_key,
+        ),
+    )
+
+    track_delegation_if_needed(func_name, args_dict, task)
+
+    # Find structured tool for hooks
+    structured_tool: CrewStructuredTool | None = None
+    for structured in structured_tools or []:
+        if sanitize_tool_name(structured.name) == func_name:
+            structured_tool = structured
+            break
+
+    # Before hooks
+    hook_blocked = False
+    before_hook_context = ToolCallHookContext(
+        tool_name=func_name,
+        tool_input=args_dict,
+        tool=structured_tool,  # type: ignore[arg-type]
+        agent=agent,
+        task=task,
+        crew=crew,
+    )
+    try:
+        for hook in get_before_tool_call_hooks():
+            if hook(before_hook_context) is False:
+                hook_blocked = True
+                break
+    except Exception:  # noqa: S110
+        pass
+
+    error_event_emitted = False
+    if hook_blocked:
+        result = f"Tool execution blocked by hook. Tool: {func_name}"
+    elif not from_cache and not max_usage_reached:
+        if func_name in available_functions:
+            try:
+                tool_func = available_functions[func_name]
+                raw_result = tool_func(**args_dict)
+
+                # Cache result
+                if tools_handler and tools_handler.cache:
+                    should_cache = True
+                    if original_tool:
+                        should_cache = original_tool.cache_function(
+                            args_dict, raw_result
+                        )
+                    if should_cache:
+                        tools_handler.cache.add(
+                            tool=func_name, input=input_str, output=raw_result
+                        )
+
+                result = (
+                    str(raw_result) if not isinstance(raw_result, str) else raw_result
+                )
+            except Exception as e:
+                result = f"Error executing tool: {e}"
+                if task:
+                    task.increment_tools_errors()
+                crewai_event_bus.emit(
+                    event_source,
+                    event=ToolUsageErrorEvent(
+                        tool_name=func_name,
+                        tool_args=args_dict,
+                        from_agent=agent,
+                        from_task=task,
+                        agent_key=agent_key,
+                        error=e,
+                    ),
+                )
+                error_event_emitted = True
+    elif max_usage_reached and original_tool:
+        result = (
+            f"Tool '{func_name}' has reached its usage limit of "
+            f"{original_tool.max_usage_count} times and cannot be used anymore."
+        )
+
+    # After hooks
+    after_hook_context = ToolCallHookContext(
+        tool_name=func_name,
+        tool_input=args_dict,
+        tool=structured_tool,  # type: ignore[arg-type]
+        agent=agent,
+        task=task,
+        crew=crew,
+        tool_result=result,
+    )
+    try:
+        for after_hook in get_after_tool_call_hooks():
+            hook_result = after_hook(after_hook_context)
+            if hook_result is not None:
+                result = hook_result
+                after_hook_context.tool_result = result
+    except Exception:  # noqa: S110
+        pass
+
+    # Emit tool finished event (only if error event wasn't already emitted)
+    if not error_event_emitted:
+        crewai_event_bus.emit(
+            event_source,
+            event=ToolUsageFinishedEvent(
+                output=result,
+                tool_name=func_name,
+                tool_args=args_dict,
+                from_agent=agent,
+                from_task=task,
+                agent_key=agent_key,
+                started_at=started_at,
+                finished_at=datetime.now(),
+            ),
+        )
+
+    # Build tool result message
+    tool_message: LLMMessage = {
+        "role": "tool",
+        "tool_call_id": call_id,
+        "name": func_name,
+        "content": result,
+    }
+
+    if verbose and printer:
+        cache_info = " (from cache)" if from_cache else ""
+        printer.print(
+            content=f"Tool {func_name} executed with result{cache_info}: {result[:200]}...",
+            color="green",
+        )
+
+    # Check result_as_answer
+    is_result_as_answer = bool(
+        original_tool
+        and hasattr(original_tool, "result_as_answer")
+        and original_tool.result_as_answer
+    )
+
+    return NativeToolCallResult(
+        call_id=call_id,
+        func_name=func_name,
+        result=result,
+        from_cache=from_cache,
+        result_as_answer=is_result_as_answer,
+        tool_message=tool_message,
+    )
+
+
 def parse_tool_call_args(
    func_args: dict[str, Any] | str,
    func_name: str,
--- a/lib/crewai/src/crewai/utilities/i18n.py
+++ b/lib/crewai/src/crewai/utilities/i18n.py
@@ -100,7 +100,13 @@ class I18N(BaseModel):
    def retrieve(
        self,
        kind: Literal[
-            "slices", "errors", "tools", "reasoning", "hierarchical_manager_agent", "memory"
+            "slices",
+            "errors",
+            "tools",
+            "reasoning",
+            "planning",
+            "hierarchical_manager_agent",
+            "memory",
        ],
        key: str,
    ) -> str:
--- a/lib/crewai/src/crewai/utilities/lock_store.py
+++ b/lib/crewai/src/crewai/utilities/lock_store.py
@@ -0,0 +1,61 @@
+"""Centralised lock factory.
+
+If ``REDIS_URL`` is set, locks are distributed via ``portalocker.RedisLock``. Otherwise, falls
+back to the standard ``portalocker.Lock``.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from contextlib import contextmanager
+from functools import lru_cache
+from hashlib import md5
+import os
+import tempfile
+from typing import TYPE_CHECKING, Final
+
+import portalocker
+
+
+if TYPE_CHECKING:
+    import redis
+
+
+_REDIS_URL: str | None = os.environ.get("REDIS_URL")
+
+_DEFAULT_TIMEOUT: Final[int] = 120
+
+
+@lru_cache(maxsize=1)
+def _redis_connection() -> redis.Redis:
+    """Return a cached Redis connection, creating one on first call."""
+    from redis import Redis
+
+    if _REDIS_URL is None:
+        raise ValueError("REDIS_URL environment variable is not set")
+    return Redis.from_url(_REDIS_URL)
+
+
+@contextmanager
+def lock(name: str, *, timeout: float = _DEFAULT_TIMEOUT) -> Iterator[None]:
+    """Acquire a named lock, yielding while it is held.
+
+    Args:
+        name: A human-readable lock name (e.g. ``"chromadb_init"``).
+              Automatically namespaced to avoid collisions.
+        timeout: Maximum seconds to wait for the lock before raising.
+    """
+    channel = f"crewai:{md5(name.encode(), usedforsecurity=False).hexdigest()}"
+
+    if _REDIS_URL:
+        with portalocker.RedisLock(
+            channel=channel,
+            connection=_redis_connection(),
+            timeout=timeout,
+        ):
+            yield
+    else:
+        lock_dir = tempfile.gettempdir()
+        lock_path = os.path.join(lock_dir, f"{channel}.lock")
+        with portalocker.Lock(lock_path, timeout=timeout):
+            yield
--- a/lib/crewai/src/crewai/utilities/planning_types.py
+++ b/lib/crewai/src/crewai/utilities/planning_types.py
@@ -0,0 +1,279 @@
+"""Types for agent planning and todo tracking."""
+
+from __future__ import annotations
+
+from typing import Literal
+from uuid import uuid4
+
+from pydantic import BaseModel, Field, field_validator
+
+
+# Todo status type
+TodoStatus = Literal["pending", "running", "completed", "failed"]
+
+
+class PlanStep(BaseModel):
+    """A single step in the reasoning plan."""
+
+    step_number: int = Field(description="Step number (1-based)")
+    description: str = Field(description="What to do in this step")
+    tool_to_use: str | None = Field(
+        default=None, description="Tool to use for this step, if any"
+    )
+    depends_on: list[int] = Field(
+        default_factory=list, description="Step numbers this step depends on"
+    )
+
+
+class TodoItem(BaseModel):
+    """A single todo item representing a step in the execution plan."""
+
+    id: str = Field(default_factory=lambda: str(uuid4()))
+    step_number: int = Field(description="Order of this step in the plan (1-based)")
+    description: str = Field(description="What needs to be done")
+    tool_to_use: str | None = Field(
+        default=None, description="Tool to use for this step, if any"
+    )
+    status: TodoStatus = Field(default="pending", description="Current status")
+    depends_on: list[int] = Field(
+        default_factory=list, description="Step numbers this depends on"
+    )
+    result: str | None = Field(
+        default=None, description="Result after completion, if any"
+    )
+
+
+class TodoList(BaseModel):
+    """Collection of todos for tracking plan execution."""
+
+    items: list[TodoItem] = Field(default_factory=list)
+
+    @property
+    def current_todo(self) -> TodoItem | None:
+        """Get the currently running todo item."""
+        for item in self.items:
+            if item.status == "running":
+                return item
+        return None
+
+    @property
+    def next_pending(self) -> TodoItem | None:
+        """Get the next pending todo item."""
+        for item in self.items:
+            if item.status == "pending":
+                return item
+        return None
+
+    @property
+    def is_complete(self) -> bool:
+        """Check if all todos are in a terminal state (completed or failed)."""
+        return len(self.items) > 0 and all(
+            item.status in ("completed", "failed") for item in self.items
+        )
+
+    @property
+    def pending_count(self) -> int:
+        """Count of pending todos."""
+        return sum(1 for item in self.items if item.status == "pending")
+
+    @property
+    def completed_count(self) -> int:
+        """Count of completed todos."""
+        return sum(1 for item in self.items if item.status == "completed")
+
+    def get_by_step_number(self, step_number: int) -> TodoItem | None:
+        """Get a todo by its step number."""
+        for item in self.items:
+            if item.step_number == step_number:
+                return item
+        return None
+
+    def mark_running(self, step_number: int) -> None:
+        """Mark a todo as running by step number."""
+        item = self.get_by_step_number(step_number)
+        if item:
+            item.status = "running"
+
+    def mark_completed(self, step_number: int, result: str | None = None) -> None:
+        """Mark a todo as completed by step number."""
+        item = self.get_by_step_number(step_number)
+        if item:
+            item.status = "completed"
+            if result is not None:
+                item.result = result
+
+    def mark_failed(self, step_number: int, result: str | None = None) -> None:
+        """Mark a todo as failed by step number."""
+        item = self.get_by_step_number(step_number)
+        if item:
+            item.status = "failed"
+            if result is not None:
+                item.result = result
+
+    def _dependencies_satisfied(self, item: TodoItem) -> bool:
+        """Check if all dependencies for a todo item are in a terminal state.
+
+        A dependency is satisfied when it has finished executing — either
+        successfully (completed) or not (failed). This prevents downstream
+        todos from being permanently blocked when a dependency fails.
+        The executor/observer is responsible for deciding whether to skip,
+        replan, or continue when a dependency has failed.
+
+        Args:
+            item: The todo item to check dependencies for.
+
+        Returns:
+            True if all dependencies are in a terminal state, False otherwise.
+        """
+        for dep_num in item.depends_on:
+            dep = self.get_by_step_number(dep_num)
+            if dep is None or dep.status not in ("completed", "failed"):
+                return False
+        return True
+
+    def get_ready_todos(self) -> list[TodoItem]:
+        """Get all todos that are ready to execute (pending with satisfied dependencies).
+
+        Returns:
+            List of TodoItem objects that can be executed now.
+        """
+        ready: list[TodoItem] = []
+        for item in self.items:
+            if item.status != "pending":
+                continue
+            if self._dependencies_satisfied(item):
+                ready.append(item)
+        return ready
+
+    @property
+    def can_parallelize(self) -> bool:
+        """Check if multiple todos can run in parallel.
+
+        Returns:
+            True if more than one todo is ready to execute.
+        """
+        return len(self.get_ready_todos()) > 1
+
+    @property
+    def running_count(self) -> int:
+        """Count of currently running todos."""
+        return sum(1 for item in self.items if item.status == "running")
+
+    def get_completed_todos(self) -> list[TodoItem]:
+        """Get all completed todos.
+
+        Returns:
+            List of completed TodoItem objects.
+        """
+        return [item for item in self.items if item.status == "completed"]
+
+    def get_failed_todos(self) -> list[TodoItem]:
+        """Get all failed todos.
+
+        Returns:
+            List of failed TodoItem objects.
+        """
+        return [item for item in self.items if item.status == "failed"]
+
+    def get_pending_todos(self) -> list[TodoItem]:
+        """Get all pending todos.
+
+        Returns:
+            List of pending TodoItem objects.
+        """
+        return [item for item in self.items if item.status == "pending"]
+
+    def replace_pending_todos(self, new_items: list[TodoItem]) -> None:
+        """Replace all pending todos with new items.
+
+        Preserves completed, failed, and running todos, replaces only pending ones.
+        Used during replanning to swap in a new plan for remaining work.
+
+        Args:
+            new_items: The new todo items to replace pending ones.
+        """
+        non_pending = [item for item in self.items if item.status != "pending"]
+        self.items = non_pending + new_items
+
+
+class StepRefinement(BaseModel):
+    """A structured in-place update for a single pending step.
+
+    Returned as part of StepObservation when the Planner learns new
+    information that makes a pending step description more specific.
+    Applied directly — no second LLM call required.
+    """
+
+    step_number: int = Field(description="The step number to update (1-based)")
+    new_description: str = Field(
+        description="The updated, more specific description for this step"
+    )
+
+
+class StepObservation(BaseModel):
+    """Planner's observation after a step execution completes.
+
+    Returned by the PlannerObserver after EVERY step — not just failures.
+    The Planner uses this to decide whether to continue, refine, or replan.
+
+    Based on PLAN-AND-ACT (Section 3.3): the Planner observes what the Executor
+    did and incorporates new information into the remaining plan.
+
+    Attributes:
+        step_completed_successfully: Whether the step achieved its objective.
+        key_information_learned: New information revealed by this step
+            (e.g., "Found 3 products: A, B, C"). Used to refine upcoming steps.
+        remaining_plan_still_valid: Whether pending todos still make sense
+            given the new information. True does NOT mean no refinement needed.
+        suggested_refinements: Structured in-place updates to pending step
+            descriptions. Each entry targets a specific step by number. These
+            are applied directly without a second LLM call.
+            Example: [{"step_number": 3, "new_description": "Select product B (highest rated)"}]
+        needs_full_replan: The remaining plan is fundamentally wrong and must
+            be regenerated from scratch. Mutually exclusive with
+            remaining_plan_still_valid (if this is True, that should be False).
+        replan_reason: Explanation of why a full replan is needed (None if not).
+        goal_already_achieved: The overall task goal has been satisfied early.
+            No more steps needed — skip remaining todos and finalize.
+    """
+
+    step_completed_successfully: bool = Field(
+        description="Whether the step achieved what it was asked to do"
+    )
+    key_information_learned: str = Field(
+        default="",
+        description="What new information this step revealed",
+    )
+    remaining_plan_still_valid: bool = Field(
+        default=True,
+        description="Whether the remaining pending todos still make sense given new information",
+    )
+    suggested_refinements: list[StepRefinement] | None = Field(
+        default=None,
+        description=(
+            "Structured updates to pending step descriptions based on new information. "
+            "Each entry specifies a step_number and new_description. "
+            "Applied directly — no separate replan needed."
+        ),
+    )
+
+    @field_validator("suggested_refinements", mode="before")
+    @classmethod
+    def coerce_single_refinement_to_list(cls, v):
+        """Coerce a single dict refinement into a list to handle LLM returning a single object."""
+        if isinstance(v, dict):
+            return [v]
+        return v
+
+    needs_full_replan: bool = Field(
+        default=False,
+        description="The remaining plan is fundamentally wrong and must be regenerated",
+    )
+    replan_reason: str | None = Field(
+        default=None,
+        description="Explanation of why a full replan is needed",
+    )
+    goal_already_achieved: bool = Field(
+        default=False,
+        description="The overall task goal has been satisfied early; no more steps needed",
+    )
--- a/lib/crewai/src/crewai/utilities/reasoning_handler.py
+++ b/lib/crewai/src/crewai/utilities/reasoning_handler.py
@@ -1,10 +1,13 @@
+"""Handles planning/reasoning for agents before task execution."""
+
+from __future__ import annotations
+
 import json
 import logging
-from typing import Any, Final, Literal, cast
+from typing import TYPE_CHECKING, Any, Final, Literal, cast

 from pydantic import BaseModel, Field

-from crewai.agent import Agent
 from crewai.events.event_bus import crewai_event_bus
 from crewai.events.types.reasoning_events import (
    AgentReasoningCompletedEvent,
@@ -12,14 +15,24 @@ from crewai.events.types.reasoning_events import (
    AgentReasoningStartedEvent,
 )
 from crewai.llm import LLM
-from crewai.task import Task
+from crewai.utilities.llm_utils import create_llm
+from crewai.utilities.planning_types import PlanStep
 from crewai.utilities.string_utils import sanitize_tool_name


+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.agent.planning_config import PlanningConfig
+    from crewai.task import Task
+
+
 class ReasoningPlan(BaseModel):
    """Model representing a reasoning plan for a task."""

    plan: str = Field(description="The detailed reasoning plan for the task.")
+    steps: list[PlanStep] = Field(
+        default_factory=list, description="Structured steps to execute"
+    )
    ready: bool = Field(description="Whether the agent is ready to execute the task.")


@@ -29,24 +42,63 @@ class AgentReasoningOutput(BaseModel):
    plan: ReasoningPlan = Field(description="The reasoning plan for the task.")


+# Aliases for backward compatibility
+PlanningPlan = ReasoningPlan
+AgentPlanningOutput = AgentReasoningOutput
+
+
 FUNCTION_SCHEMA: Final[dict[str, Any]] = {
    "type": "function",
    "function": {
        "name": "create_reasoning_plan",
-        "description": "Create or refine a reasoning plan for a task",
+        "description": "Create or refine a reasoning plan for a task with structured steps",
        "parameters": {
            "type": "object",
            "properties": {
                "plan": {
                    "type": "string",
-                    "description": "The detailed reasoning plan for the task.",
+                    "description": "A brief summary of the overall plan.",
+                },
+                "steps": {
+                    "type": "array",
+                    "description": "List of discrete steps to execute the plan",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "step_number": {
+                                "type": "integer",
+                                "description": "Step number (1-based)",
+                            },
+                            "description": {
+                                "type": "string",
+                                "description": "What to do in this step",
+                            },
+                            "tool_to_use": {
+                                "type": ["string", "null"],
+                                "description": "Tool to use for this step, or null if no tool needed",
+                            },
+                            "depends_on": {
+                                "type": "array",
+                                "items": {"type": "integer"},
+                                "description": "Step numbers this step depends on (empty array if none)",
+                            },
+                        },
+                        "required": [
+                            "step_number",
+                            "description",
+                            "tool_to_use",
+                            "depends_on",
+                        ],
+                        "additionalProperties": False,
+                    },
                },
                "ready": {
                    "type": "boolean",
                    "description": "Whether the agent is ready to execute the task.",
                },
            },
-            "required": ["plan", "ready"],
+            "required": ["plan", "steps", "ready"],
+            "additionalProperties": False,
        },
    },
 }
@@ -54,41 +106,101 @@ FUNCTION_SCHEMA: Final[dict[str, Any]] = {

 class AgentReasoning:
    """
-    Handles the agent reasoning process, enabling an agent to reflect and create a plan
-    before executing a task.
+    Handles the agent planning/reasoning process, enabling an agent to reflect
+    and create a plan before executing a task.

    Attributes:
-        task: The task for which the agent is reasoning.
-        agent: The agent performing the reasoning.
-        llm: The language model used for reasoning.
+        task: The task for which the agent is planning (optional).
+        agent: The agent performing the planning.
+        config: The planning configuration.
+        llm: The language model used for planning.
        logger: Logger for logging events and errors.
+        description: Task description or input text for planning.
+        expected_output: Expected output description.
    """

-    def __init__(self, task: Task, agent: Agent) -> None:
-        """Initialize the AgentReasoning with a task and an agent.
+    def __init__(
+        self,
+        agent: Agent,
+        task: Task | None = None,
+        *,
+        description: str | None = None,
+        expected_output: str | None = None,
+    ) -> None:
+        """Initialize the AgentReasoning with an agent and optional task.

        Args:
-            task: The task for which the agent is reasoning.
-            agent: The agent performing the reasoning.
+            agent: The agent performing the planning.
+            task: The task for which the agent is planning (optional).
+            description: Task description or input text (used if task is None).
+            expected_output: Expected output (used if task is None).
        """
-        self.task = task
        self.agent = agent
-        self.llm = cast(LLM, agent.llm)
+        self.task = task
+        # Use task attributes if available, otherwise use provided values
+        self._description = description or (
+            task.description if task else "Complete the requested task"
+        )
+        self._expected_output = expected_output or (
+            task.expected_output if task else "Complete the task successfully"
+        )
+        self.config = self._get_planning_config()
+        self.llm = self._resolve_llm()
        self.logger = logging.getLogger(__name__)

-    def handle_agent_reasoning(self) -> AgentReasoningOutput:
-        """Public method for the reasoning process that creates and refines a plan for the task until the agent is ready to execute it.
+    @property
+    def description(self) -> str:
+        """Get the task/input description."""
+        return self._description
+
+    @property
+    def expected_output(self) -> str:
+        """Get the expected output."""
+        return self._expected_output
+
+    def _get_planning_config(self) -> PlanningConfig:
+        """Get the planning configuration from the agent.

        Returns:
-            AgentReasoningOutput: The output of the agent reasoning process.
+            The planning configuration, using defaults if not set.
        """
-        # Emit a reasoning started event (attempt 1)
+        from crewai.agent.planning_config import PlanningConfig
+
+        if self.agent.planning_config is not None:
+            return self.agent.planning_config
+        # Fallback for backward compatibility
+        return PlanningConfig(
+            max_attempts=getattr(self.agent, "max_reasoning_attempts", None),
+        )
+
+    def _resolve_llm(self) -> LLM:
+        """Resolve which LLM to use for planning.
+
+        Returns:
+            The LLM to use - either from config or the agent's LLM.
+        """
+        if self.config.llm is not None:
+            if isinstance(self.config.llm, LLM):
+                return self.config.llm
+            return create_llm(self.config.llm)
+        return cast(LLM, self.agent.llm)
+
+    def handle_agent_reasoning(self) -> AgentReasoningOutput:
+        """Public method for the planning process that creates and refines a plan
+        for the task until the agent is ready to execute it.
+
+        Returns:
+            AgentReasoningOutput: The output of the agent planning process.
+        """
+        task_id = str(self.task.id) if self.task else "kickoff"
+
+        # Emit a planning started event (attempt 1)
        try:
            crewai_event_bus.emit(
                self.agent,
                AgentReasoningStartedEvent(
                    agent_role=self.agent.role,
-                    task_id=str(self.task.id),
+                    task_id=task_id,
                    attempt=1,
                    from_task=self.task,
                ),
@@ -98,13 +210,13 @@ class AgentReasoning:
            pass

        try:
-            output = self.__handle_agent_reasoning()
+            output = self._execute_planning()

            crewai_event_bus.emit(
                self.agent,
                AgentReasoningCompletedEvent(
                    agent_role=self.agent.role,
-                    task_id=str(self.task.id),
+                    task_id=task_id,
                    plan=output.plan.plan,
                    ready=output.plan.ready,
                    attempt=1,
@@ -115,135 +227,158 @@ class AgentReasoning:

            return output
        except Exception as e:
-            # Emit reasoning failed event
+            # Emit planning failed event
            try:
                crewai_event_bus.emit(
                    self.agent,
                    AgentReasoningFailedEvent(
                        agent_role=self.agent.role,
-                        task_id=str(self.task.id),
+                        task_id=task_id,
                        error=str(e),
                        attempt=1,
                        from_task=self.task,
                        from_agent=self.agent,
                    ),
                )
-            except Exception as e:
-                logging.error(f"Error emitting reasoning failed event: {e}")
+            except Exception as event_error:
+                logging.error(f"Error emitting planning failed event: {event_error}")

            raise

-    def __handle_agent_reasoning(self) -> AgentReasoningOutput:
-        """Private method that handles the agent reasoning process.
+    def _execute_planning(self) -> AgentReasoningOutput:
+        """Execute the planning process.

        Returns:
-            The output of the agent reasoning process.
+            The output of the agent planning process.
        """
-        plan, ready = self.__create_initial_plan()
+        plan, steps, ready = self._create_initial_plan()
+        plan, steps, ready = self._refine_plan_if_needed(plan, steps, ready)

-        plan, ready = self.__refine_plan_if_needed(plan, ready)
-
-        reasoning_plan = ReasoningPlan(plan=plan, ready=ready)
+        reasoning_plan = ReasoningPlan(plan=plan, steps=steps, ready=ready)
        return AgentReasoningOutput(plan=reasoning_plan)

-    def __create_initial_plan(self) -> tuple[str, bool]:
-        """Creates the initial reasoning plan for the task.
+    def _create_initial_plan(self) -> tuple[str, list[PlanStep], bool]:
+        """Creates the initial plan for the task.

        Returns:
-            The initial plan and whether the agent is ready to execute the task.
+            A tuple of the plan summary, list of steps, and whether the agent is ready.
        """
-        reasoning_prompt = self.__create_reasoning_prompt()
+        planning_prompt = self._create_planning_prompt()

        if self.llm.supports_function_calling():
-            plan, ready = self.__call_with_function(reasoning_prompt, "initial_plan")
-            return plan, ready
-        response = _call_llm_with_reasoning_prompt(
-            llm=self.llm,
-            prompt=reasoning_prompt,
-            task=self.task,
-            reasoning_agent=self.agent,
-            backstory=self.__get_agent_backstory(),
-            plan_type="initial_plan",
+            plan, steps, ready = self._call_with_function(
+                planning_prompt, "create_plan"
+            )
+            return plan, steps, ready
+
+        response = self._call_llm_with_prompt(
+            prompt=planning_prompt,
+            plan_type="create_plan",
        )

-        return self.__parse_reasoning_response(str(response))
+        plan, ready = self._parse_planning_response(str(response))
+        return plan, [], ready  # No structured steps from text parsing

-    def __refine_plan_if_needed(self, plan: str, ready: bool) -> tuple[str, bool]:
-        """Refines the reasoning plan if the agent is not ready to execute the task.
+    def _refine_plan_if_needed(
+        self, plan: str, steps: list[PlanStep], ready: bool
+    ) -> tuple[str, list[PlanStep], bool]:
+        """Refines the plan if the agent is not ready to execute the task.

        Args:
-            plan: The current reasoning plan.
+            plan: The current plan.
+            steps: The current list of steps.
            ready: Whether the agent is ready to execute the task.

        Returns:
-            The refined plan and whether the agent is ready to execute the task.
+            The refined plan, steps, and whether the agent is ready to execute.
        """
+
        attempt = 1
-        max_attempts = self.agent.max_reasoning_attempts
+        max_attempts = self.config.max_attempts
+        task_id = str(self.task.id) if self.task else "kickoff"

        while not ready and (max_attempts is None or attempt < max_attempts):
+            attempt += 1
+
            # Emit event for each refinement attempt
            try:
                crewai_event_bus.emit(
                    self.agent,
                    AgentReasoningStartedEvent(
                        agent_role=self.agent.role,
-                        task_id=str(self.task.id),
-                        attempt=attempt + 1,
+                        task_id=task_id,
+                        attempt=attempt,
                        from_task=self.task,
                    ),
                )
            except Exception:  # noqa: S110
                pass

-            refine_prompt = self.__create_refine_prompt(plan)
+            refine_prompt = self._create_refine_prompt(plan)

            if self.llm.supports_function_calling():
-                plan, ready = self.__call_with_function(refine_prompt, "refine_plan")
+                plan, steps, ready = self._call_with_function(
+                    refine_prompt, "refine_plan"
+                )
            else:
-                response = _call_llm_with_reasoning_prompt(
-                    llm=self.llm,
+                response = self._call_llm_with_prompt(
                    prompt=refine_prompt,
-                    task=self.task,
-                    reasoning_agent=self.agent,
-                    backstory=self.__get_agent_backstory(),
                    plan_type="refine_plan",
                )
-                plan, ready = self.__parse_reasoning_response(str(response))
+                plan, ready = self._parse_planning_response(str(response))
+                steps = []  # No structured steps from text parsing

-            attempt += 1
+            # Emit completed event for this refinement attempt
+            try:
+                crewai_event_bus.emit(
+                    self.agent,
+                    AgentReasoningCompletedEvent(
+                        agent_role=self.agent.role,
+                        task_id=task_id,
+                        plan=plan,
+                        ready=ready,
+                        attempt=attempt,
+                        from_task=self.task,
+                        from_agent=self.agent,
+                    ),
+                )
+            except Exception:  # noqa: S110
+                pass

            if max_attempts is not None and attempt >= max_attempts:
                self.logger.warning(
-                    f"Agent reasoning reached maximum attempts ({max_attempts}) without being ready. Proceeding with current plan."
+                    f"Agent planning reached maximum attempts ({max_attempts}) "
+                    "without being ready. Proceeding with current plan."
                )
                break

-        return plan, ready
+        return plan, steps, ready

-    def __call_with_function(self, prompt: str, prompt_type: str) -> tuple[str, bool]:
-        """Calls the LLM with function calling to get a reasoning plan.
+    def _call_with_function(
+        self, prompt: str, plan_type: Literal["create_plan", "refine_plan"]
+    ) -> tuple[str, list[PlanStep], bool]:
+        """Calls the LLM with function calling to get a plan.

        Args:
            prompt: The prompt to send to the LLM.
-            prompt_type: The type of prompt (initial_plan or refine_plan).
+            plan_type: The type of plan being created.

        Returns:
-            A tuple containing the plan and whether the agent is ready.
+            A tuple containing the plan summary, list of steps, and whether the agent is ready.
        """
-        self.logger.debug(f"Using function calling for {prompt_type} reasoning")
+        self.logger.debug(f"Using function calling for {plan_type} planning")

        try:
-            system_prompt = self.agent.i18n.retrieve("reasoning", prompt_type).format(
-                role=self.agent.role,
-                goal=self.agent.goal,
-                backstory=self.__get_agent_backstory(),
-            )
+            system_prompt = self._get_system_prompt()

            # Prepare a simple callable that just returns the tool arguments as JSON
-            def _create_reasoning_plan(plan: str, ready: bool = True) -> str:
-                """Return the reasoning plan result in JSON string form."""
-                return json.dumps({"plan": plan, "ready": ready})
+            def _create_reasoning_plan(
+                plan: str,
+                steps: list[dict[str, Any]] | None = None,
+                ready: bool = True,
+            ) -> str:
+                """Return the planning result in JSON string form."""
+                return json.dumps({"plan": plan, "steps": steps or [], "ready": ready})

            response = self.llm.call(
                [
@@ -255,19 +390,33 @@ class AgentReasoning:
                from_task=self.task,
                from_agent=self.agent,
            )
-
-            self.logger.debug(f"Function calling response: {response[:100]}...")
-
            try:
                result = json.loads(response)
                if "plan" in result and "ready" in result:
-                    return result["plan"], result["ready"]
+                    # Parse steps from the response
+                    steps: list[PlanStep] = []
+                    raw_steps = result.get("steps", [])
+                    try:
+                        for step_data in raw_steps:
+                            step = PlanStep(
+                                step_number=step_data.get("step_number", 0),
+                                description=step_data.get("description", ""),
+                                tool_to_use=step_data.get("tool_to_use"),
+                                depends_on=step_data.get("depends_on", []),
+                            )
+                            steps.append(step)
+                    except Exception as step_error:
+                        self.logger.warning(
+                            f"Failed to parse step: {step_data}, error: {step_error}"
+                        )
+                    return result["plan"], steps, result["ready"]
            except (json.JSONDecodeError, KeyError):
                pass

            response_str = str(response)
            return (
                response_str,
+                [],
                "READY: I am ready to execute the task." in response_str,
            )

@@ -277,13 +426,7 @@ class AgentReasoning:
            )

            try:
-                system_prompt = self.agent.i18n.retrieve(
-                    "reasoning", prompt_type
-                ).format(
-                    role=self.agent.role,
-                    goal=self.agent.goal,
-                    backstory=self.__get_agent_backstory(),
-                )
+                system_prompt = self._get_system_prompt()

                fallback_response = self.llm.call(
                    [
@@ -297,78 +440,165 @@ class AgentReasoning:
                fallback_str = str(fallback_response)
                return (
                    fallback_str,
+                    [],
                    "READY: I am ready to execute the task." in fallback_str,
                )
            except Exception as inner_e:
                self.logger.error(f"Error during fallback text parsing: {inner_e!s}")
                return (
                    "Failed to generate a plan due to an error.",
+                    [],
                    True,
                )  # Default to ready to avoid getting stuck

-    def __get_agent_backstory(self) -> str:
-        """
-        Safely gets the agent's backstory, providing a default if not available.
+    def _call_llm_with_prompt(
+        self,
+        prompt: str,
+        plan_type: Literal["create_plan", "refine_plan"],
+    ) -> str:
+        """Calls the LLM with the planning prompt.
+
+        Args:
+            prompt: The prompt to send to the LLM.
+            plan_type: The type of plan being created.

        Returns:
-            str: The agent's backstory or a default value.
+            The LLM response.
+        """
+        system_prompt = self._get_system_prompt()
+
+        response = self.llm.call(
+            [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": prompt},
+            ],
+            from_task=self.task,
+            from_agent=self.agent,
+        )
+        return str(response)
+
+    def _get_system_prompt(self) -> str:
+        """Get the system prompt for planning.
+
+        Returns:
+            The system prompt, either custom or from i18n.
+        """
+        if self.config.system_prompt is not None:
+            return self.config.system_prompt
+
+        # Try new "planning" section first, fall back to "reasoning" for compatibility
+        try:
+            return self.agent.i18n.retrieve("planning", "system_prompt")
+        except (KeyError, AttributeError):
+            # Fallback to reasoning section for backward compatibility
+            return self.agent.i18n.retrieve("reasoning", "initial_plan").format(
+                role=self.agent.role,
+                goal=self.agent.goal,
+                backstory=self._get_agent_backstory(),
+            )
+
+    def _get_agent_backstory(self) -> str:
+        """Safely gets the agent's backstory, providing a default if not available.
+
+        Returns:
+            The agent's backstory or a default value.
        """
        return getattr(self.agent, "backstory", "No backstory provided")

-    def __create_reasoning_prompt(self) -> str:
-        """
-        Creates a prompt for the agent to reason about the task.
+    def _create_planning_prompt(self) -> str:
+        """Creates a prompt for the agent to plan the task.

        Returns:
-            str: The reasoning prompt.
+            The planning prompt.
        """
-        available_tools = self.__format_available_tools()
+        available_tools = self._format_available_tools()

-        return self.agent.i18n.retrieve("reasoning", "create_plan_prompt").format(
-            role=self.agent.role,
-            goal=self.agent.goal,
-            backstory=self.__get_agent_backstory(),
-            description=self.task.description,
-            expected_output=self.task.expected_output,
-            tools=available_tools,
-        )
+        # Use custom prompt if provided
+        if self.config.plan_prompt is not None:
+            return self.config.plan_prompt.format(
+                role=self.agent.role,
+                goal=self.agent.goal,
+                backstory=self._get_agent_backstory(),
+                description=self.description,
+                expected_output=self.expected_output,
+                tools=available_tools,
+                max_steps=self.config.max_steps,
+            )

-    def __format_available_tools(self) -> str:
-        """
-        Formats the available tools for inclusion in the prompt.
+        # Try new "planning" section first
+        try:
+            return self.agent.i18n.retrieve("planning", "create_plan_prompt").format(
+                description=self.description,
+                expected_output=self.expected_output,
+                tools=available_tools,
+                max_steps=self.config.max_steps,
+            )
+        except (KeyError, AttributeError):
+            # Fallback to reasoning section for backward compatibility
+            return self.agent.i18n.retrieve("reasoning", "create_plan_prompt").format(
+                role=self.agent.role,
+                goal=self.agent.goal,
+                backstory=self._get_agent_backstory(),
+                description=self.description,
+                expected_output=self.expected_output,
+                tools=available_tools,
+            )
+
+    def _format_available_tools(self) -> str:
+        """Formats the available tools for inclusion in the prompt.

        Returns:
-            str: Comma-separated list of tool names.
+            Comma-separated list of tool names.
        """
        try:
-            return ", ".join(
-                [sanitize_tool_name(tool.name) for tool in (self.task.tools or [])]
-            )
+            # Try task tools first, then agent tools
+            tools = []
+            if self.task:
+                tools = self.task.tools or []
+            if not tools:
+                tools = getattr(self.agent, "tools", []) or []
+            if not tools:
+                return "No tools available"
+            return ", ".join([sanitize_tool_name(tool.name) for tool in tools])
        except (AttributeError, TypeError):
            return "No tools available"

-    def __create_refine_prompt(self, current_plan: str) -> str:
-        """
-        Creates a prompt for the agent to refine its reasoning plan.
+    def _create_refine_prompt(self, current_plan: str) -> str:
+        """Creates a prompt for the agent to refine its plan.

        Args:
-            current_plan: The current reasoning plan.
+            current_plan: The current plan.

        Returns:
-            str: The refine prompt.
+            The refine prompt.
        """
-        return self.agent.i18n.retrieve("reasoning", "refine_plan_prompt").format(
-            role=self.agent.role,
-            goal=self.agent.goal,
-            backstory=self.__get_agent_backstory(),
-            current_plan=current_plan,
-        )
+        # Use custom prompt if provided
+        if self.config.refine_prompt is not None:
+            return self.config.refine_prompt.format(
+                role=self.agent.role,
+                goal=self.agent.goal,
+                backstory=self._get_agent_backstory(),
+                current_plan=current_plan,
+                max_steps=self.config.max_steps,
+            )
+
+        # Try new "planning" section first
+        try:
+            return self.agent.i18n.retrieve("planning", "refine_plan_prompt").format(
+                current_plan=current_plan,
+            )
+        except (KeyError, AttributeError):
+            # Fallback to reasoning section for backward compatibility
+            return self.agent.i18n.retrieve("reasoning", "refine_plan_prompt").format(
+                role=self.agent.role,
+                goal=self.agent.goal,
+                backstory=self._get_agent_backstory(),
+                current_plan=current_plan,
+            )

    @staticmethod
-    def __parse_reasoning_response(response: str) -> tuple[str, bool]:
-        """
-        Parses the reasoning response to extract the plan and whether
-        the agent is ready to execute the task.
+    def _parse_planning_response(response: str) -> tuple[str, bool]:
+        """Parses the planning response to extract the plan and readiness.

        Args:
            response: The LLM response.
@@ -380,25 +610,13 @@ class AgentReasoning:
            return "No plan was generated.", False

        plan = response
-        ready = False
-
-        if "READY: I am ready to execute the task." in response:
-            ready = True
+        ready = "READY: I am ready to execute the task." in response

        return plan, ready

-    def _handle_agent_reasoning(self) -> AgentReasoningOutput:
-        """
-        Deprecated method for backward compatibility.
-        Use handle_agent_reasoning() instead.

-        Returns:
-            AgentReasoningOutput: The output of the agent reasoning process.
-        """
-        self.logger.warning(
-            "The _handle_agent_reasoning method is deprecated. Use handle_agent_reasoning instead."
-        )
-        return self.handle_agent_reasoning()
+# Alias for backward compatibility
+AgentPlanning = AgentReasoning


 def _call_llm_with_reasoning_prompt(
@@ -409,7 +627,9 @@ def _call_llm_with_reasoning_prompt(
    backstory: str,
    plan_type: Literal["initial_plan", "refine_plan"],
 ) -> str:
-    """Calls the LLM with the reasoning prompt.
+    """Deprecated: Calls the LLM with the reasoning prompt.
+
+    This function is kept for backward compatibility.

    Args:
        llm: The language model to use.
@@ -417,7 +637,7 @@ def _call_llm_with_reasoning_prompt(
        task: The task for which the agent is reasoning.
        reasoning_agent: The agent performing the reasoning.
        backstory: The agent's backstory.
-        plan_type: The type of plan being created ("initial_plan" or "refine_plan").
+        plan_type: The type of plan being created.

    Returns:
        The LLM response.
--- a/lib/crewai/src/crewai/utilities/step_execution_context.py
+++ b/lib/crewai/src/crewai/utilities/step_execution_context.py
@@ -0,0 +1,64 @@
+"""Context and result types for isolated step execution in Plan-and-Execute architecture.
+
+These types mediate between the AgentExecutor (orchestrator) and StepExecutor (per-step worker).
+StepExecutionContext carries only final results from dependencies — never LLM message histories.
+StepResult carries only the outcome of a step — never internal execution traces.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass(frozen=True)
+class StepExecutionContext:
+    """Immutable context passed to a StepExecutor for a single todo.
+
+    Contains only the information the Executor needs to complete one step:
+    the task description, goal, and final results from dependency steps.
+    No LLM message history, no execution traces, no shared mutable state.
+
+    Attributes:
+        task_description: The original task description (from Task or kickoff input).
+        task_goal: The expected output / goal of the overall task.
+        dependency_results: Mapping of step_number → final result string
+            for all completed dependencies of the current step.
+    """
+
+    task_description: str
+    task_goal: str
+    dependency_results: dict[int, str] = field(default_factory=dict)
+
+    def get_dependency_result(self, step_number: int) -> str | None:
+        """Get the final result of a dependency step.
+
+        Args:
+            step_number: The step number to look up.
+
+        Returns:
+            The result string if available, None otherwise.
+        """
+        return self.dependency_results.get(step_number)
+
+
+@dataclass
+class StepResult:
+    """Result returned by a StepExecutor after executing a single todo.
+
+    Contains the final outcome and metadata for debugging/metrics.
+    Tool call details are for audit logging only — they are NOT passed
+    to subsequent steps or the Planner.
+
+    Attributes:
+        success: Whether the step completed successfully.
+        result: The final output string from the step.
+        error: Error message if the step failed (None on success).
+        tool_calls_made: List of tool names invoked (for debugging/logging only).
+        execution_time: Wall-clock time in seconds for the step execution.
+    """
+
+    success: bool
+    result: str
+    error: str | None = None
+    tool_calls_made: list[str] = field(default_factory=list)
+    execution_time: float = 0.0
--- a/lib/crewai/tests/agents/test_agent.py
+++ b/lib/crewai/tests/agents/test_agent.py
@@ -1456,7 +1456,7 @@ def test_agent_execute_task_with_tool():
    )

    result = agent.execute_task(task)
-    assert "you should always think about what to do" in result
+    assert "test query" in result


@pytest.mark.vcr()
@@ -1475,9 +1475,9 @@ def test_agent_execute_task_with_custom_llm():
    )

    result = agent.execute_task(task)
-    assert "In circuits they thrive" in result
-    assert "Artificial minds awake" in result
-    assert "Future's coded drive" in result
+    assert "Artificial minds" in result
+    assert "Code and circuits" in result
+    assert "Future undefined" in result


@pytest.mark.vcr()
@@ -2353,3 +2353,68 @@ def test_agent_without_apps_no_platform_tools():

    tools = crew._prepare_tools(agent, task, [])
    assert tools == []
+
+
+def test_agent_mcps_accepts_slug_with_specific_tool():
+    """Agent(mcps=["notion#get_page"]) must pass validation (_SLUG_RE)."""
+    agent = Agent(
+        role="MCP Agent",
+        goal="Test MCP validation",
+        backstory="Test agent",
+        mcps=["notion#get_page"],
+    )
+    assert agent.mcps == ["notion#get_page"]
+
+
+def test_agent_mcps_accepts_slug_with_hyphenated_tool():
+    agent = Agent(
+        role="MCP Agent",
+        goal="Test MCP validation",
+        backstory="Test agent",
+        mcps=["notion#get-page"],
+    )
+    assert agent.mcps == ["notion#get-page"]
+
+
+def test_agent_mcps_accepts_multiple_hash_refs():
+    agent = Agent(
+        role="MCP Agent",
+        goal="Test MCP validation",
+        backstory="Test agent",
+        mcps=["notion#get_page", "notion#search", "github#list_repos"],
+    )
+    assert len(agent.mcps) == 3
+
+
+def test_agent_mcps_accepts_mixed_ref_types():
+    agent = Agent(
+        role="MCP Agent",
+        goal="Test MCP validation",
+        backstory="Test agent",
+        mcps=[
+            "notion#get_page",
+            "notion",
+            "https://mcp.example.com/api",
+        ],
+    )
+    assert len(agent.mcps) == 3
+
+
+def test_agent_mcps_rejects_hash_without_slug():
+    with pytest.raises(ValueError, match="Invalid MCP reference"):
+        Agent(
+            role="MCP Agent",
+            goal="Test MCP validation",
+            backstory="Test agent",
+            mcps=["#get_page"],
+        )
+
+
+def test_agent_mcps_accepts_legacy_prefix_with_tool():
+    agent = Agent(
+        role="MCP Agent",
+        goal="Test MCP validation",
+        backstory="Test agent",
+        mcps=["crewai-amp:notion#get_page"],
+    )
+    assert agent.mcps == ["crewai-amp:notion#get_page"]
--- a/lib/crewai/tests/agents/test_agent_executor.py
+++ b/lib/crewai/tests/agents/test_agent_executor.py
--- a/lib/crewai/tests/agents/test_agent_reasoning.py
+++ b/lib/crewai/tests/agents/test_agent_reasoning.py
@@ -1,240 +1,345 @@
-"""Tests for reasoning in agents."""
+"""Tests for planning/reasoning in agents."""

-import json
+import warnings

 import pytest

-from crewai import Agent, Task
+from crewai import Agent, PlanningConfig, Task
 from crewai.llm import LLM


-@pytest.fixture
-def mock_llm_responses():
-    """Fixture for mock LLM responses."""
-    return {
-        "ready": "I'll solve this simple math problem.\n\nREADY: I am ready to execute the task.\n\n",
-        "not_ready": "I need to think about derivatives.\n\nNOT READY: I need to refine my plan because I'm not sure about the derivative rules.",
-        "ready_after_refine": "I'll use the power rule for derivatives where d/dx(x^n) = n*x^(n-1).\n\nREADY: I am ready to execute the task.",
-        "execution": "4",
-    }
+# =============================================================================
+# Tests for PlanningConfig configuration (no LLM calls needed)
+# =============================================================================


-def test_agent_with_reasoning(mock_llm_responses):
-    """Test agent with reasoning."""
-    llm = LLM("gpt-3.5-turbo")
+def test_planning_config_default_values():
+    """Test PlanningConfig default values."""
+    config = PlanningConfig()
+
+    assert config.max_attempts is None
+    assert config.max_steps == 20
+    assert config.system_prompt is None
+    assert config.plan_prompt is None
+    assert config.refine_prompt is None
+    assert config.llm is None
+
+
+def test_planning_config_custom_values():
+    """Test PlanningConfig with custom values."""
+    config = PlanningConfig(
+        max_attempts=5,
+        max_steps=15,
+        system_prompt="Custom system",
+        plan_prompt="Custom plan: {description}",
+        refine_prompt="Custom refine: {current_plan}",
+        llm="gpt-4",
+    )
+
+    assert config.max_attempts == 5
+    assert config.max_steps == 15
+    assert config.system_prompt == "Custom system"
+    assert config.plan_prompt == "Custom plan: {description}"
+    assert config.refine_prompt == "Custom refine: {current_plan}"
+    assert config.llm == "gpt-4"
+
+
+def test_agent_with_planning_config_custom_prompts():
+    """Test agent with PlanningConfig using custom prompts."""
+    llm = LLM("gpt-4o-mini")
+
+    custom_system_prompt = "You are a specialized planner."
+    custom_plan_prompt = "Plan this task: {description}"
+
+    agent = Agent(
+        role="Test Agent",
+        goal="To test custom prompts",
+        backstory="I am a test agent.",
+        llm=llm,
+        planning_config=PlanningConfig(
+            system_prompt=custom_system_prompt,
+            plan_prompt=custom_plan_prompt,
+            max_steps=10,
+        ),
+        verbose=False,
+    )
+
+    # Just test that the agent is created properly
+    assert agent.planning_config is not None
+    assert agent.planning_config.system_prompt == custom_system_prompt
+    assert agent.planning_config.plan_prompt == custom_plan_prompt
+    assert agent.planning_config.max_steps == 10
+
+
+def test_agent_with_planning_config_disabled():
+    """Test agent with PlanningConfig disabled."""
+    llm = LLM("gpt-4o-mini")
+
+    agent = Agent(
+        role="Test Agent",
+        goal="To test disabled planning",
+        backstory="I am a test agent.",
+        llm=llm,
+        planning=False,
+        verbose=False,
+    )
+
+    # Planning should be disabled
+    assert agent.planning_enabled is False
+
+
+def test_planning_enabled_property():
+    """Test the planning_enabled property on Agent."""
+    llm = LLM("gpt-4o-mini")
+
+    # With planning_config enabled
+    agent_with_planning = Agent(
+        role="Test Agent",
+        goal="Test",
+        backstory="Test",
+        llm=llm,
+        planning=True,
+    )
+    assert agent_with_planning.planning_enabled is True
+
+    # With planning_config disabled
+    agent_disabled = Agent(
+        role="Test Agent",
+        goal="Test",
+        backstory="Test",
+        llm=llm,
+        planning=False,
+    )
+    assert agent_disabled.planning_enabled is False
+
+    # Without planning_config
+    agent_no_planning = Agent(
+        role="Test Agent",
+        goal="Test",
+        backstory="Test",
+        llm=llm,
+    )
+    assert agent_no_planning.planning_enabled is False
+
+
+# =============================================================================
+# Tests for backward compatibility with reasoning=True (no LLM calls)
+# =============================================================================
+
+
+def test_agent_with_reasoning_backward_compat():
+    """Test agent with reasoning=True (backward compatibility)."""
+    llm = LLM("gpt-4o-mini")
+
+    # This should emit a deprecation warning
+    with warnings.catch_warnings(record=True):
+        warnings.simplefilter("always")
+        agent = Agent(
+            role="Test Agent",
+            goal="To test the reasoning feature",
+            backstory="I am a test agent created to verify the reasoning feature works correctly.",
+            llm=llm,
+            reasoning=True,
+            verbose=False,
+        )
+
+    # Should have created a PlanningConfig internally
+    assert agent.planning_config is not None
+    assert agent.planning_enabled is True
+
+
+def test_agent_with_reasoning_and_max_attempts_backward_compat():
+    """Test agent with reasoning=True and max_reasoning_attempts (backward compatibility)."""
+    llm = LLM("gpt-4o-mini")

    agent = Agent(
        role="Test Agent",
        goal="To test the reasoning feature",
-        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        backstory="I am a test agent.",
        llm=llm,
        reasoning=True,
-        verbose=True,
+        max_reasoning_attempts=5,
+        verbose=False,
    )

-    task = Task(
-        description="Simple math task: What's 2+2?",
-        expected_output="The answer should be a number.",
-        agent=agent,
-    )
-
-    agent.llm.call = lambda messages, *args, **kwargs: (
-        mock_llm_responses["ready"]
-        if any("create a detailed plan" in msg.get("content", "") for msg in messages)
-        else mock_llm_responses["execution"]
-    )
-
-    result = agent.execute_task(task)
-
-    assert result == mock_llm_responses["execution"]
-    assert "Reasoning Plan:" in task.description
+    # Should have created a PlanningConfig with max_attempts
+    assert agent.planning_config is not None
+    assert agent.planning_config.max_attempts == 5


-def test_agent_with_reasoning_not_ready_initially(mock_llm_responses):
-    """Test agent with reasoning that requires refinement."""
-    llm = LLM("gpt-3.5-turbo")
+# =============================================================================
+# Tests for Agent.kickoff() with planning (uses AgentExecutor)
+# =============================================================================
+
+
+@pytest.mark.vcr()
+def test_agent_kickoff_with_planning():
+    """Test Agent.kickoff() with planning enabled generates a plan."""
+    llm = LLM("gpt-4o-mini")

    agent = Agent(
-        role="Test Agent",
-        goal="To test the reasoning feature",
-        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        role="Math Assistant",
+        goal="Help solve math problems step by step",
+        backstory="A helpful math tutor",
        llm=llm,
-        reasoning=True,
-        max_reasoning_attempts=2,
-        verbose=True,
+        planning_config=PlanningConfig(max_attempts=1),
+        verbose=False,
    )

-    task = Task(
-        description="Complex math task: What's the derivative of x²?",
-        expected_output="The answer should be a mathematical expression.",
-        agent=agent,
-    )
+    result = agent.kickoff("What is 15 + 27?")

-    call_count = [0]
-
-    def mock_llm_call(messages, *args, **kwargs):
-        if any(
-            "create a detailed plan" in msg.get("content", "") for msg in messages
-        ) or any("refine your plan" in msg.get("content", "") for msg in messages):
-            call_count[0] += 1
-            if call_count[0] == 1:
-                return mock_llm_responses["not_ready"]
-            return mock_llm_responses["ready_after_refine"]
-        return "2x"
-
-    agent.llm.call = mock_llm_call
-
-    result = agent.execute_task(task)
-
-    assert result == "2x"
-    assert call_count[0] == 2  # Should have made 2 reasoning calls
-    assert "Reasoning Plan:" in task.description
+    assert result is not None
+    assert "42" in str(result)


-def test_agent_with_reasoning_max_attempts_reached():
-    """Test agent with reasoning that reaches max attempts without being ready."""
-    llm = LLM("gpt-3.5-turbo")
+@pytest.mark.vcr()
+def test_agent_kickoff_without_planning():
+    """Test Agent.kickoff() without planning skips plan generation."""
+    llm = LLM("gpt-4o-mini")

    agent = Agent(
-        role="Test Agent",
-        goal="To test the reasoning feature",
-        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        role="Math Assistant",
+        goal="Help solve math problems",
+        backstory="A helpful assistant",
        llm=llm,
-        reasoning=True,
-        max_reasoning_attempts=2,
-        verbose=True,
+        # No planning_config = no planning
+        verbose=False,
    )

-    task = Task(
-        description="Complex math task: Solve the Riemann hypothesis.",
-        expected_output="A proof or disproof of the hypothesis.",
-        agent=agent,
-    )
+    result = agent.kickoff("What is 8 * 7?")

-    call_count = [0]
-
-    def mock_llm_call(messages, *args, **kwargs):
-        if any(
-            "create a detailed plan" in msg.get("content", "") for msg in messages
-        ) or any("refine your plan" in msg.get("content", "") for msg in messages):
-            call_count[0] += 1
-            return f"Attempt {call_count[0]}: I need more time to think.\n\nNOT READY: I need to refine my plan further."
-        return "This is an unsolved problem in mathematics."
-
-    agent.llm.call = mock_llm_call
-
-    result = agent.execute_task(task)
-
-    assert result == "This is an unsolved problem in mathematics."
-    assert (
-        call_count[0] == 2
-    )  # Should have made exactly 2 reasoning calls (max_attempts)
-    assert "Reasoning Plan:" in task.description
+    assert result is not None
+    assert "56" in str(result)


-def test_agent_reasoning_error_handling():
-    """Test error handling during the reasoning process."""
-    llm = LLM("gpt-3.5-turbo")
+@pytest.mark.vcr()
+def test_agent_kickoff_with_planning_disabled():
+    """Test Agent.kickoff() with planning explicitly disabled via planning=False."""
+    llm = LLM("gpt-4o-mini")

    agent = Agent(
-        role="Test Agent",
-        goal="To test the reasoning feature",
-        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        role="Math Assistant",
+        goal="Help solve math problems",
+        backstory="A helpful assistant",
        llm=llm,
-        reasoning=True,
+        planning=False,  # Explicitly disable planning
+        verbose=False,
    )

-    task = Task(
-        description="Task that will cause an error",
-        expected_output="Output that will never be generated",
-        agent=agent,
-    )
+    result = agent.kickoff("What is 100 / 4?")

-    call_count = [0]
-
-    def mock_llm_call_error(*args, **kwargs):
-        call_count[0] += 1
-        if call_count[0] <= 2:  # First calls are for reasoning
-            raise Exception("LLM error during reasoning")
-        return "Fallback execution result"  # Return a value for task execution
-
-    agent.llm.call = mock_llm_call_error
-
-    result = agent.execute_task(task)
-
-    assert result == "Fallback execution result"
-    assert call_count[0] > 2  # Ensure we called the mock multiple times
+    assert result is not None
+    assert "25" in str(result)


-@pytest.mark.skip(reason="Test requires updates for native tool calling changes")
-def test_agent_with_function_calling():
-    """Test agent with reasoning using function calling."""
-    llm = LLM("gpt-3.5-turbo")
+@pytest.mark.vcr()
+def test_agent_kickoff_multi_step_task_with_planning():
+    """Test Agent.kickoff() with a multi-step task that benefits from planning."""
+    llm = LLM("gpt-4o-mini")

    agent = Agent(
-        role="Test Agent",
-        goal="To test the reasoning feature",
-        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        role="Math Tutor",
+        goal="Solve multi-step math problems",
+        backstory="An expert tutor who explains step by step",
        llm=llm,
-        reasoning=True,
-        verbose=True,
+        planning_config=PlanningConfig(max_attempts=1, max_steps=5),
+        verbose=False,
    )

-    task = Task(
-        description="Simple math task: What's 2+2?",
-        expected_output="The answer should be a number.",
-        agent=agent,
+    # Task requires: find primes, sum them, then double
+    result = agent.kickoff(
+        "Find the first 3 prime numbers, add them together, then multiply by 2."
    )

-    agent.llm.supports_function_calling = lambda: True
-
-    def mock_function_call(messages, *args, **kwargs):
-        if "tools" in kwargs:
-            return json.dumps(
-                {"plan": "I'll solve this simple math problem: 2+2=4.", "ready": True}
-            )
-        return "4"
-
-    agent.llm.call = mock_function_call
-
-    result = agent.execute_task(task)
-
-    assert result == "4"
-    assert "Reasoning Plan:" in task.description
-    assert "I'll solve this simple math problem: 2+2=4." in task.description
+    assert result is not None
+    # First 3 primes: 2, 3, 5 -> sum = 10 -> doubled = 20
+    assert "20" in str(result)


-@pytest.mark.skip(reason="Test requires updates for native tool calling changes")
-def test_agent_with_function_calling_fallback():
-    """Test agent with reasoning using function calling that falls back to text parsing."""
-    llm = LLM("gpt-3.5-turbo")
+# =============================================================================
+# Tests for Agent.execute_task() with planning (uses CrewAgentExecutor)
+# These test the legacy path via handle_reasoning()
+# =============================================================================
+
+
+@pytest.mark.vcr()
+def test_agent_execute_task_with_planning():
+    """Test Agent.execute_task() with planning via CrewAgentExecutor."""
+    llm = LLM("gpt-4o-mini")

    agent = Agent(
-        role="Test Agent",
-        goal="To test the reasoning feature",
-        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        role="Math Assistant",
+        goal="Help solve math problems",
+        backstory="A helpful math tutor",
        llm=llm,
-        reasoning=True,
-        verbose=True,
+        planning_config=PlanningConfig(max_attempts=1),
+        verbose=False,
    )

    task = Task(
-        description="Simple math task: What's 2+2?",
-        expected_output="The answer should be a number.",
+        description="What is 9 + 11?",
+        expected_output="A number",
        agent=agent,
    )

-    agent.llm.supports_function_calling = lambda: True
+    result = agent.execute_task(task)

-    def mock_function_call(messages, *args, **kwargs):
-        if "tools" in kwargs:
-            return "Invalid JSON that will trigger fallback. READY: I am ready to execute the task."
-        return "4"
+    assert result is not None
+    assert "20" in str(result)
+    # Planning should be appended to task description
+    assert "Planning:" in task.description

-    agent.llm.call = mock_function_call
+
+@pytest.mark.vcr()
+def test_agent_execute_task_without_planning():
+    """Test Agent.execute_task() without planning."""
+    llm = LLM("gpt-4o-mini")
+
+    agent = Agent(
+        role="Math Assistant",
+        goal="Help solve math problems",
+        backstory="A helpful assistant",
+        llm=llm,
+        verbose=False,
+    )
+
+    task = Task(
+        description="What is 12 * 3?",
+        expected_output="A number",
+        agent=agent,
+    )

    result = agent.execute_task(task)

-    assert result == "4"
-    assert "Reasoning Plan:" in task.description
-    assert "Invalid JSON that will trigger fallback" in task.description
+    assert result is not None
+    assert "36" in str(result)
+    # No planning should be added
+    assert "Planning:" not in task.description
+
+
+@pytest.mark.vcr()
+def test_agent_execute_task_with_planning_refine():
+    """Test Agent.execute_task() with planning that requires refinement."""
+    llm = LLM("gpt-4o-mini")
+
+    agent = Agent(
+        role="Math Tutor",
+        goal="Solve complex math problems step by step",
+        backstory="An expert tutor",
+        llm=llm,
+        planning_config=PlanningConfig(max_attempts=2),
+        verbose=False,
+    )
+
+    task = Task(
+        description="Calculate the area of a circle with radius 5 (use pi = 3.14)",
+        expected_output="The area as a number",
+        agent=agent,
+    )
+
+    result = agent.execute_task(task)
+
+    assert result is not None
+    # Area = pi * r^2 = 3.14 * 25 = 78.5
+    assert "78" in str(result) or "79" in str(result)
+    assert "Planning:" in task.description
--- a/lib/crewai/tests/agents/test_lite_agent.py
+++ b/lib/crewai/tests/agents/test_lite_agent.py
@@ -359,17 +359,34 @@ def test_sets_flow_context_when_inside_flow():

@pytest.mark.vcr()
 def test_guardrail_is_called_using_string():
+    """Test that a string guardrail triggers events and retries correctly.
+
+    Uses a callable guardrail that deterministically fails on the first
+    attempt and passes on the second. This tests the guardrail event
+    machinery (started/completed events, retry loop) without depending
+    on the LLM to comply with contradictory constraints.
+    """
    guardrail_events: dict[str, list] = defaultdict(list)
    from crewai.events.event_types import (
        LLMGuardrailCompletedEvent,
        LLMGuardrailStartedEvent,
    )

+    # Deterministic guardrail: fail first call, pass second
+    call_count = {"n": 0}
+
+    def fail_then_pass_guardrail(output):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return (False, "Missing required format — please use a numbered list")
+        return (True, output)
+
    agent = Agent(
        role="Sports Analyst",
-        goal="Gather information about the best soccer players",
-        backstory="""You are an expert at gathering and organizing information. You carefully collect details and present them in a structured way.""",
-        guardrail="""Only include Brazilian players, both women and men""",
+        goal="List the best soccer players",
+        backstory="You are an expert at gathering and organizing information.",
+        guardrail=fail_then_pass_guardrail,
+        guardrail_max_retries=3,
    )

    condition = threading.Condition()
@@ -388,7 +405,7 @@ def test_guardrail_is_called_using_string():
            guardrail_events["completed"].append(event)
            condition.notify()

-    result = agent.kickoff(messages="Top 10 best players in the world?")
+    result = agent.kickoff(messages="Top 5 best soccer players in the world?")

    with condition:
        success = condition.wait_for(
@@ -1136,7 +1153,7 @@ def test_lite_agent_memory_instance_recall_and_save_called():
        successful_requests=1,
    )
    mock_memory = Mock()
-    mock_memory._read_only = False
+    mock_memory.read_only = False
    mock_memory.recall.return_value = []
    mock_memory.extract_memories.return_value = ["Fact one.", "Fact two."]

--- a/lib/crewai/tests/cassettes/TestAgentMultimodalAnthropic.test_image_file[anthropic-claude-3-5-haiku-20241022].yaml
+++ b/lib/crewai/tests/cassettes/TestAgentMultimodalAnthropic.test_image_file[anthropic-claude-3-5-haiku-20241022].yaml
--- a/lib/crewai/tests/cassettes/TestAgentMultimodalAnthropic.test_mixed_files[anthropic-claude-3-5-haiku-20241022].yaml
+++ b/lib/crewai/tests/cassettes/TestAgentMultimodalAnthropic.test_mixed_files[anthropic-claude-3-5-haiku-20241022].yaml
--- a/lib/crewai/tests/cassettes/TestAgentMultimodalAnthropic.test_pdf_file[anthropic-claude-3-5-haiku-20241022].yaml
+++ b/lib/crewai/tests/cassettes/TestAgentMultimodalAnthropic.test_pdf_file[anthropic-claude-3-5-haiku-20241022].yaml
@@ -1,15 +1,9 @@
 interactions:
 - request:
    body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"\nCurrent
-      Task: What type of document is this?\n\nBegin! This is VERY important to you,
-      use the tools available and give your best Final Answer, your job depends on
-      it!\n\nThought:"},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stop_sequences":["\nObservation:"],"stream":false,"system":"You
+      Task: What type of document is this?\n\nProvide your complete response:"},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stop_sequences":["\nObservation:"],"stream":false,"system":"You
      are File Analyst. Expert at analyzing various file types.\nYour personal goal
-      is: Analyze and describe files accurately\nTo give my best complete final answer
-      to the task respond using the exact following format:\n\nThought: I now can
-      give a great answer\nFinal Answer: Your final answer must be the great and the
-      most complete as possible, it must be outcome described.\n\nI MUST use these
-      formats, my job depends on it!"}'
+      is: Analyze and describe files accurately"}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -22,7 +16,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1351'
+      - '950'
      content-type:
      - application/json
      host:
@@ -38,35 +32,35 @@ interactions:
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
-      - 0.71.1
+      - 0.73.0
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
-      - 3.12.10
+      - 3.13.3
      x-stainless-timeout:
      - NOT_GIVEN
    method: POST
    uri: https://api.anthropic.com/v1/messages
  response:
    body:
-      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01AcygCF93tRhc7A3bfXMqe7","type":"message","role":"assistant","content":[{"type":"text","text":"Thought:
-        I can see this is a PDF document, but the image appears to be completely white
-        or blank. Without any visible content, I cannot definitively determine the
-        specific type of document.\n\nFinal Answer: The document is a PDF file, but
-        the provided image shows a blank white page with no discernible content or
-        text. More information or a clearer image would be needed to identify the
-        precise type of document."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":1750,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":89,"service_tier":"standard"}}'
+      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01C8ZkZMunUVDUDd8mh1r1We","type":"message","role":"assistant","content":[{"type":"text","text":"I
+        apologize, but the image appears to be completely blank or white. Without
+        any visible text, graphics, or distinguishing features, I cannot determine
+        the type of document. The file is a PDF, but the content page seems to be
+        empty or failed to render properly."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":1658,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":58,"service_tier":"standard","inference_geo":"not_available"}}'
    headers:
      CF-RAY:
      - CF-RAY-XXX
      Connection:
      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
      Content-Type:
      - application/json
      Date:
-      - Fri, 23 Jan 2026 19:08:04 GMT
+      - Thu, 12 Feb 2026 19:30:55 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
@@ -92,7 +86,7 @@ interactions:
      anthropic-ratelimit-requests-remaining:
      - '3999'
      anthropic-ratelimit-requests-reset:
-      - '2026-01-23T19:08:01Z'
+      - '2026-02-12T19:30:53Z'
      anthropic-ratelimit-tokens-limit:
      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
      anthropic-ratelimit-tokens-remaining:
@@ -106,7 +100,112 @@ interactions:
      strict-transport-security:
      - STS-XXX
      x-envoy-upstream-service-time:
-      - '2837'
+      - '2129'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"\nCurrent
+      Task: What type of document is this?\n\nProvide your complete response:"},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stop_sequences":["\nObservation:"],"stream":false,"system":"You
+      are File Analyst. Expert at analyzing various file types.\nYour personal goal
+      is: Analyze and describe files accurately"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '950'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      x-api-key:
+      - X-API-KEY-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_013jb7edagayZxqGs6ioACyU","type":"message","role":"assistant","content":[{"type":"text","text":"I
+        apologize, but the image appears to be completely blank or white. There are
+        no visible contents or text that I can analyze to determine the type of document.
+        Without any discernible information, I cannot definitively state what type
+        of document this is."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":1658,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":55,"service_tier":"standard","inference_geo":"not_available"}}'
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 12 Feb 2026 19:30:58 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '4000'
+      anthropic-ratelimit-requests-remaining:
+      - '3999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-12T19:30:56Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '2005'
    status:
      code: 200
      message: OK
--- a/lib/crewai/tests/cassettes/TestAgentMultimodalAsync.test_async_agent_with_image.yaml
+++ b/lib/crewai/tests/cassettes/TestAgentMultimodalAsync.test_async_agent_with_image.yaml
--- a/lib/crewai/tests/cassettes/TestAgentMultimodalFileTypes.test_audio_gemini.yaml
+++ b/lib/crewai/tests/cassettes/TestAgentMultimodalFileTypes.test_audio_gemini.yaml
--- a/lib/crewai/tests/cassettes/TestAgentMultimodalFileTypes.test_image_openai.yaml
+++ b/lib/crewai/tests/cassettes/TestAgentMultimodalFileTypes.test_image_openai.yaml
--- a/lib/crewai/tests/cassettes/TestAgentMultimodalFileTypes.test_pdf_anthropic.yaml
+++ b/lib/crewai/tests/cassettes/TestAgentMultimodalFileTypes.test_pdf_anthropic.yaml
@@ -1,14 +1,9 @@
 interactions:
 - request:
    body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"\nCurrent
-      Task: What is this document?\n\nBegin! This is VERY important to you, use the
-      tools available and give your best Final Answer, your job depends on it!\n\nThought:"},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stop_sequences":["\nObservation:"],"stream":false,"system":"You
+      Task: What is this document?\n\nProvide your complete response:"},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stop_sequences":["\nObservation:"],"stream":false,"system":"You
      are File Analyst. Expert at analyzing various file types.\nYour personal goal
-      is: Analyze and describe files accurately\nTo give my best complete final answer
-      to the task respond using the exact following format:\n\nThought: I now can
-      give a great answer\nFinal Answer: Your final answer must be the great and the
-      most complete as possible, it must be outcome described.\n\nI MUST use these
-      formats, my job depends on it!"}'
+      is: Analyze and describe files accurately"}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -21,7 +16,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1343'
+      - '942'
      content-type:
      - application/json
      host:
@@ -37,34 +32,35 @@ interactions:
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
-      - 0.71.1
+      - 0.73.0
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
-      - 3.12.10
+      - 3.13.3
      x-stainless-timeout:
      - NOT_GIVEN
    method: POST
    uri: https://api.anthropic.com/v1/messages
  response:
    body:
-      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01XwAhfdaMxwTNzTy7YhmA5e","type":"message","role":"assistant","content":[{"type":"text","text":"Thought:
-        I can see this is a PDF document, but the image appears to be blank or completely
-        white. Without any visible text or content, I cannot determine the specific
-        type or purpose of this document.\n\nFinal Answer: The document appears to
-        be a blank white PDF page with no discernible text, images, or content visible.
-        It could be an empty document, a scanning error, or a placeholder file."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":1748,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":88,"service_tier":"standard"}}'
+      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01RnyTYpTE9Dd8BfwyMfuwum","type":"message","role":"assistant","content":[{"type":"text","text":"I
+        apologize, but the image appears to be blank or completely white. Without
+        any visible text or content, I cannot determine the type or nature of the
+        document. If you intended to share a specific document, you may want to check
+        the file and try uploading it again."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":1656,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":59,"service_tier":"standard","inference_geo":"not_available"}}'
    headers:
      CF-RAY:
      - CF-RAY-XXX
      Connection:
      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
      Content-Type:
      - application/json
      Date:
-      - Fri, 23 Jan 2026 19:08:19 GMT
+      - Thu, 12 Feb 2026 19:29:25 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
@@ -90,7 +86,7 @@ interactions:
      anthropic-ratelimit-requests-remaining:
      - '3999'
      anthropic-ratelimit-requests-reset:
-      - '2026-01-23T19:08:16Z'
+      - '2026-02-12T19:29:23Z'
      anthropic-ratelimit-tokens-limit:
      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
      anthropic-ratelimit-tokens-remaining:
@@ -104,7 +100,111 @@ interactions:
      strict-transport-security:
      - STS-XXX
      x-envoy-upstream-service-time:
-      - '3114'
+      - '2072'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"\nCurrent
+      Task: What is this document?\n\nProvide your complete response:"},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stop_sequences":["\nObservation:"],"stream":false,"system":"You
+      are File Analyst. Expert at analyzing various file types.\nYour personal goal
+      is: Analyze and describe files accurately"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '942'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      x-api-key:
+      - X-API-KEY-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_011J2La8KpjxAK255NsSpePY","type":"message","role":"assistant","content":[{"type":"text","text":"I
+        apologize, but the document appears to be a blank white page. No text, images,
+        or discernible content is visible in this PDF file. Without any readable information,
+        I cannot determine the type or purpose of this document."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":1656,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":51,"service_tier":"standard","inference_geo":"not_available"}}'
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 12 Feb 2026 19:29:27 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '4000'
+      anthropic-ratelimit-requests-remaining:
+      - '3999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-12T19:29:26Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '1802'
    status:
      code: 200
      message: OK
--- a/Show More
+++ b/Show More