chore: remove unused imports in test file

Co-Authored-By: João <joao@crewai.com>
fix: extract Bedrock-style dict tool call arguments correctly (#4495 )
2026-04-15 15:32:40 +00:00 · 2026-02-16 11:46:01 +00:00 · 2026-02-16 11:44:54 +00:00
43 changed files with 1672 additions and 6665 deletions
--- a/docs/en/enterprise/features/flow-hitl-management.mdx
+++ b/docs/en/enterprise/features/flow-hitl-management.mdx
@@ -38,21 +38,22 @@ CrewAI Enterprise provides a comprehensive Human-in-the-Loop (HITL) management s
 Configure human review checkpoints within your Flows using the `@human_feedback` decorator. When execution reaches a review point, the system pauses, notifies the assignee via email, and waits for a response.

 ```python
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult

 class ContentApprovalFlow(Flow):
    @start()
    def generate_content(self):
+        # AI generates content
        return "Generated marketing copy for Q1 campaign..."

+    @listen(generate_content)
    @human_feedback(
        message="Please review this content for brand compliance:",
        emit=["approved", "rejected", "needs_revision"],
    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "Marketing copy for review..."
+    def review_content(self, content):
+        return content

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
@@ -61,6 +62,10 @@ class ContentApprovalFlow(Flow):
    @listen("rejected")
    def archive_content(self, result: HumanFeedbackResult):
        print(f"Content rejected. Reason: {result.feedback}")
+
+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        print(f"Revision requested: {result.feedback}")
 ```

 For complete implementation details, see the [Human Feedback in Flows](/en/learn/human-feedback-in-flows) guide.
--- a/docs/en/learn/human-feedback-in-flows.mdx
+++ b/docs/en/learn/human-feedback-in-flows.mdx
@@ -98,43 +98,33 @@ def handle_feedback(self, result):
 When you specify `emit`, the decorator becomes a router. The human's free-form feedback is interpreted by an LLM and collapsed into one of the specified outcomes:

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
-from crewai.flow.human_feedback import human_feedback
+@start()
+@human_feedback(
+    message="Do you approve this content for publication?",
+    emit=["approved", "rejected", "needs_revision"],
+    llm="gpt-4o-mini",
+    default_outcome="needs_revision",
+)
+def review_content(self):
+    return "Draft blog post content here..."

-class ReviewFlow(Flow):
-    @start()
-    def generate_content(self):
-        return "Draft blog post content here..."
+@listen("approved")
+def publish(self, result):
+    print(f"Publishing! User said: {result.feedback}")

-    @human_feedback(
-        message="Do you approve this content for publication?",
-        emit=["approved", "rejected", "needs_revision"],
-        llm="gpt-4o-mini",
-        default_outcome="needs_revision",
-    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "Draft blog post content here..."
+@listen("rejected")
+def discard(self, result):
+    print(f"Discarding. Reason: {result.feedback}")

-    @listen("approved")
-    def publish(self, result):
-        print(f"Publishing! User said: {result.feedback}")
-
-    @listen("rejected")
-    def discard(self, result):
-        print(f"Discarding. Reason: {result.feedback}")
+@listen("needs_revision")
+def revise(self, result):
+    print(f"Revising based on: {result.feedback}")
 ```

-When the human says something like "needs more detail", the LLM collapses that to `"needs_revision"`, which triggers `review_content` again via `or_()` — creating a revision loop. The loop continues until the outcome is `"approved"` or `"rejected"`.
-
 <Tip>
 The LLM uses structured outputs (function calling) when available to guarantee the response is one of your specified outcomes. This makes routing reliable and predictable.
 </Tip>

-<Warning>
-A `@start()` method only runs once at the beginning of the flow. If you need a revision loop, separate the start method from the review method and use `@listen(or_("trigger", "revision_outcome"))` on the review method to enable the self-loop.
-</Warning>
-
 ## HumanFeedbackResult

 The `HumanFeedbackResult` dataclass contains all information about a human feedback interaction:
@@ -198,183 +188,127 @@ Each `HumanFeedbackResult` is appended to `human_feedback_history`, so multiple

 ## Complete Example: Content Approval Workflow

-Here's a full example implementing a content review and approval workflow with a revision loop:
+Here's a full example implementing a content review and approval workflow:

 <CodeGroup>

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult
 from pydantic import BaseModel


 class ContentState(BaseModel):
+    topic: str = ""
    draft: str = ""
+    final_content: str = ""
    revision_count: int = 0
-    status: str = "pending"


 class ContentApprovalFlow(Flow[ContentState]):
-    """A flow that generates content and loops until the human approves."""
+    """A flow that generates content and gets human approval."""

    @start()
-    def generate_draft(self):
-        self.state.draft = "# AI Safety\n\nThis is a draft about AI Safety..."
+    def get_topic(self):
+        self.state.topic = input("What topic should I write about? ")
+        return self.state.topic
+
+    @listen(get_topic)
+    def generate_draft(self, topic):
+        # In real use, this would call an LLM
+        self.state.draft = f"# {topic}\n\nThis is a draft about {topic}..."
        return self.state.draft

+    @listen(generate_draft)
    @human_feedback(
-        message="Please review this draft. Approve, reject, or describe what needs changing:",
+        message="Please review this draft. Reply 'approved', 'rejected', or provide revision feedback:",
        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
        default_outcome="needs_revision",
    )
-    @listen(or_("generate_draft", "needs_revision"))
-    def review_draft(self):
-        self.state.revision_count += 1
-        return f"{self.state.draft} (v{self.state.revision_count})"
+    def review_draft(self, draft):
+        return draft

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
-        self.state.status = "published"
-        print(f"Content approved and published! Reviewer said: {result.feedback}")
+        self.state.final_content = result.output
+        print("\n✅ Content approved and published!")
+        print(f"Reviewer comment: {result.feedback}")
        return "published"

    @listen("rejected")
    def handle_rejection(self, result: HumanFeedbackResult):
-        self.state.status = "rejected"
-        print(f"Content rejected. Reason: {result.feedback}")
+        print("\n❌ Content rejected")
+        print(f"Reason: {result.feedback}")
        return "rejected"

+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        self.state.revision_count += 1
+        print(f"\n📝 Revision #{self.state.revision_count} requested")
+        print(f"Feedback: {result.feedback}")

+        # In a real flow, you might loop back to generate_draft
+        # For this example, we just acknowledge
+        return "revision_requested"
+
+
+# Run the flow
 flow = ContentApprovalFlow()
 result = flow.kickoff()
-print(f"\nFlow completed. Status: {flow.state.status}, Reviews: {flow.state.revision_count}")
+print(f"\nFlow completed. Revisions requested: {flow.state.revision_count}")
 ```

 ```text Output
-==================================================
-OUTPUT FOR REVIEW:
-==================================================
-# AI Safety
-
-This is a draft about AI Safety... (v1)
-==================================================
-
-Please review this draft. Approve, reject, or describe what needs changing:
-(Press Enter to skip, or type your feedback)
-
-Your feedback: Needs more detail on alignment research
+What topic should I write about? AI Safety

 ==================================================
 OUTPUT FOR REVIEW:
 ==================================================
 # AI Safety

-This is a draft about AI Safety... (v2)
+This is a draft about AI Safety...
 ==================================================

-Please review this draft. Approve, reject, or describe what needs changing:
+Please review this draft. Reply 'approved', 'rejected', or provide revision feedback:
 (Press Enter to skip, or type your feedback)

 Your feedback: Looks good, approved!

-Content approved and published! Reviewer said: Looks good, approved!
+✅ Content approved and published!
+Reviewer comment: Looks good, approved!

-Flow completed. Status: published, Reviews: 2
+Flow completed. Revisions requested: 0
 ```

 </CodeGroup>

-The key pattern is `@listen(or_("generate_draft", "needs_revision"))` — the review method listens to both the initial trigger and its own revision outcome, creating a self-loop that repeats until the human approves or rejects.
-
 ## Combining with Other Decorators

-The `@human_feedback` decorator works with `@start()`, `@listen()`, and `or_()`. Both decorator orderings work — the framework propagates attributes in both directions — but the recommended patterns are:
+The `@human_feedback` decorator works with other flow decorators. Place it as the innermost decorator (closest to the function):

 ```python Code
-# One-shot review at the start of a flow (no self-loop)
+# Correct: @human_feedback is innermost (closest to the function)
@start()
-@human_feedback(message="Review this:", emit=["approved", "rejected"], llm="gpt-4o-mini")
+@human_feedback(message="Review this:")
 def my_start_method(self):
    return "content"

-# Linear review on a listener (no self-loop)
@listen(other_method)
-@human_feedback(message="Review this too:", emit=["good", "bad"], llm="gpt-4o-mini")
+@human_feedback(message="Review this too:")
 def my_listener(self, data):
    return f"processed: {data}"
-
-# Self-loop: review that can loop back for revisions
-@human_feedback(message="Approve or revise?", emit=["approved", "revise"], llm="gpt-4o-mini")
-@listen(or_("upstream_method", "revise"))
-def review_with_loop(self):
-    return "content for review"
 ```

-### Self-loop pattern
-
-To create a revision loop, the review method must listen to **both** an upstream trigger and its own revision outcome using `or_()`:
-
-```python Code
-@start()
-def generate(self):
-    return "initial draft"
-
-@human_feedback(
-    message="Approve or request changes?",
-    emit=["revise", "approved"],
-    llm="gpt-4o-mini",
-    default_outcome="approved",
-)
-@listen(or_("generate", "revise"))
-def review(self):
-    return "content"
-
-@listen("approved")
-def publish(self):
-    return "published"
-```
-
-When the outcome is `"revise"`, the flow routes back to `review` (because it listens to `"revise"` via `or_()`). When the outcome is `"approved"`, the flow continues to `publish`. This works because the flow engine exempts routers from the "fire once" rule, allowing them to re-execute on each loop iteration.
-
-### Chained routers
-
-A listener triggered by one router's outcome can itself be a router:
-
-```python Code
-@start()
-def generate(self):
-    return "draft content"
-
-@human_feedback(message="First review:", emit=["approved", "rejected"], llm="gpt-4o-mini")
-@listen("generate")
-def first_review(self):
-    return "draft content"
-
-@human_feedback(message="Final review:", emit=["publish", "hold"], llm="gpt-4o-mini")
-@listen("approved")
-def final_review(self, prev):
-    return "final content"
-
-@listen("publish")
-def on_publish(self, prev):
-    return "published"
-
-@listen("hold")
-def on_hold(self, prev):
-    return "held for later"
-```
-
-### Limitations
-
- **`@start()` methods run once**: A `@start()` method cannot self-loop. If you need a revision cycle, use a separate `@start()` method as the entry point and put the `@human_feedback` on a `@listen()` method.
- **No `@start()` + `@listen()` on the same method**: This is a Flow framework constraint. A method is either a start point or a listener, not both.
+<Tip>
+Place `@human_feedback` as the innermost decorator (last/closest to the function) so it wraps the method directly and can capture the return value before passing to the flow system.
+</Tip>

 ## Best Practices

 ### 1. Write Clear Request Messages

-The `message` parameter is what the human sees. Make it actionable:
+The `request` parameter is what the human sees. Make it actionable:

 ```python Code
 # ✅ Good - clear and actionable
@@ -582,9 +516,9 @@ class ContentPipeline(Flow):
    @start()
    @human_feedback(
        message="Approve this content for publication?",
-        emit=["approved", "rejected"],
+        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
-        default_outcome="rejected",
+        default_outcome="needs_revision",
        provider=SlackNotificationProvider("#content-reviews"),
    )
    def generate_content(self):
@@ -600,6 +534,11 @@ class ContentPipeline(Flow):
        print(f"Archived. Reason: {result.feedback}")
        return {"status": "archived"}

+    @listen("needs_revision")
+    def queue_revision(self, result):
+        print(f"Queued for revision: {result.feedback}")
+        return {"status": "revision_needed"}
+

 # Starting the flow (will pause and wait for Slack response)
 def start_content_pipeline():
@@ -655,22 +594,22 @@ Over time, the human sees progressively better pre-reviewed output because each
 ```python Code
 class ArticleReviewFlow(Flow):
    @start()
-    def generate_article(self):
-        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw
-
    @human_feedback(
        message="Review this article draft:",
        emit=["approved", "needs_revision"],
        llm="gpt-4o-mini",
        learn=True,  # enable HITL learning
    )
-    @listen(or_("generate_article", "needs_revision"))
-    def review_article(self):
-        return self.last_human_feedback.output if self.last_human_feedback else "article draft"
+    def generate_article(self):
+        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw

    @listen("approved")
    def publish(self):
        print(f"Publishing: {self.last_human_feedback.output}")
+
+    @listen("needs_revision")
+    def revise(self):
+        print("Revising based on feedback...")
 ```

 **First run**: The human sees the raw output and says "Always include citations for factual claims." The lesson is distilled and stored in memory.
--- a/docs/ko/enterprise/features/flow-hitl-management.mdx
+++ b/docs/ko/enterprise/features/flow-hitl-management.mdx
@@ -38,21 +38,22 @@ CrewAI Enterprise는 AI 워크플로우를 협업적인 인간-AI 프로세스
 `@human_feedback` 데코레이터를 사용하여 Flow 내에 인간 검토 체크포인트를 구성합니다. 실행이 검토 포인트에 도달하면 시스템이 일시 중지되고, 담당자에게 이메일로 알리며, 응답을 기다립니다.

 ```python
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult

 class ContentApprovalFlow(Flow):
    @start()
    def generate_content(self):
+        # AI가 콘텐츠 생성
        return "Q1 캠페인용 마케팅 카피 생성..."

+    @listen(generate_content)
    @human_feedback(
        message="브랜드 준수를 위해 이 콘텐츠를 검토해 주세요:",
        emit=["approved", "rejected", "needs_revision"],
    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "검토용 마케팅 카피..."
+    def review_content(self, content):
+        return content

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
@@ -61,6 +62,10 @@ class ContentApprovalFlow(Flow):
    @listen("rejected")
    def archive_content(self, result: HumanFeedbackResult):
        print(f"콘텐츠 거부됨. 사유: {result.feedback}")
+
+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        print(f"수정 요청: {result.feedback}")
 ```

 완전한 구현 세부 사항은 [Flow에서 인간 피드백](/ko/learn/human-feedback-in-flows) 가이드를 참조하세요.
--- a/docs/ko/learn/human-feedback-in-flows.mdx
+++ b/docs/ko/learn/human-feedback-in-flows.mdx
@@ -98,43 +98,33 @@ def handle_feedback(self, result):
 `emit`을 지정하면, 데코레이터는 라우터가 됩니다. 인간의 자유 형식 피드백이 LLM에 의해 해석되어 지정된 outcome 중 하나로 매핑됩니다:

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
-from crewai.flow.human_feedback import human_feedback
+@start()
+@human_feedback(
+    message="이 콘텐츠의 출판을 승인하시겠습니까?",
+    emit=["approved", "rejected", "needs_revision"],
+    llm="gpt-4o-mini",
+    default_outcome="needs_revision",
+)
+def review_content(self):
+    return "블로그 게시물 초안 내용..."

-class ReviewFlow(Flow):
-    @start()
-    def generate_content(self):
-        return "블로그 게시물 초안 내용..."
+@listen("approved")
+def publish(self, result):
+    print(f"출판 중! 사용자 의견: {result.feedback}")

-    @human_feedback(
-        message="이 콘텐츠의 출판을 승인하시겠습니까?",
-        emit=["approved", "rejected", "needs_revision"],
-        llm="gpt-4o-mini",
-        default_outcome="needs_revision",
-    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "블로그 게시물 초안 내용..."
+@listen("rejected")
+def discard(self, result):
+    print(f"폐기됨. 이유: {result.feedback}")

-    @listen("approved")
-    def publish(self, result):
-        print(f"출판 중! 사용자 의견: {result.feedback}")
-
-    @listen("rejected")
-    def discard(self, result):
-        print(f"폐기됨. 이유: {result.feedback}")
+@listen("needs_revision")
+def revise(self, result):
+    print(f"다음을 기반으로 수정 중: {result.feedback}")
 ```

-사용자가 "더 자세한 내용이 필요합니다"와 같이 말하면, LLM이 이를 `"needs_revision"`으로 매핑하고, `or_()`를 통해 `review_content`가 다시 트리거됩니다 — 수정 루프가 생성됩니다. outcome이 `"approved"` 또는 `"rejected"`가 될 때까지 루프가 계속됩니다.
-
 <Tip>
 LLM은 가능한 경우 구조화된 출력(function calling)을 사용하여 응답이 지정된 outcome 중 하나임을 보장합니다. 이로 인해 라우팅이 신뢰할 수 있고 예측 가능해집니다.
 </Tip>

-<Warning>
-`@start()` 메서드는 flow 시작 시 한 번만 실행됩니다. 수정 루프가 필요한 경우, start 메서드를 review 메서드와 분리하고 review 메서드에 `@listen(or_("trigger", "revision_outcome"))`를 사용하여 self-loop을 활성화하세요.
-</Warning>
-
 ## HumanFeedbackResult

 `HumanFeedbackResult` 데이터클래스는 인간 피드백 상호작용에 대한 모든 정보를 포함합니다:
@@ -203,162 +193,116 @@ def summarize(self):
 <CodeGroup>

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult
 from pydantic import BaseModel


 class ContentState(BaseModel):
+    topic: str = ""
    draft: str = ""
+    final_content: str = ""
    revision_count: int = 0
-    status: str = "pending"


 class ContentApprovalFlow(Flow[ContentState]):
-    """콘텐츠를 생성하고 승인될 때까지 반복하는 Flow."""
+    """콘텐츠를 생성하고 인간의 승인을 받는 Flow입니다."""

    @start()
-    def generate_draft(self):
-        self.state.draft = "# AI 안전\n\nAI 안전에 대한 초안..."
+    def get_topic(self):
+        self.state.topic = input("어떤 주제에 대해 글을 쓸까요? ")
+        return self.state.topic
+
+    @listen(get_topic)
+    def generate_draft(self, topic):
+        # 실제 사용에서는 LLM을 호출합니다
+        self.state.draft = f"# {topic}\n\n{topic}에 대한 초안입니다..."
        return self.state.draft

+    @listen(generate_draft)
    @human_feedback(
-        message="이 초안을 검토해 주세요. 승인, 거부 또는 변경이 필요한 사항을 설명해 주세요:",
+        message="이 초안을 검토해 주세요. 'approved', 'rejected'로 답하거나 수정 피드백을 제공해 주세요:",
        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
        default_outcome="needs_revision",
    )
-    @listen(or_("generate_draft", "needs_revision"))
-    def review_draft(self):
-        self.state.revision_count += 1
-        return f"{self.state.draft} (v{self.state.revision_count})"
+    def review_draft(self, draft):
+        return draft

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
-        self.state.status = "published"
-        print(f"콘텐츠 승인 및 게시! 리뷰어 의견: {result.feedback}")
+        self.state.final_content = result.output
+        print("\n✅ 콘텐츠가 승인되어 출판되었습니다!")
+        print(f"검토자 코멘트: {result.feedback}")
        return "published"

    @listen("rejected")
    def handle_rejection(self, result: HumanFeedbackResult):
-        self.state.status = "rejected"
-        print(f"콘텐츠 거부됨. 이유: {result.feedback}")
+        print("\n❌ 콘텐츠가 거부되었습니다")
+        print(f"이유: {result.feedback}")
        return "rejected"

+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        self.state.revision_count += 1
+        print(f"\n📝 수정 #{self.state.revision_count} 요청됨")
+        print(f"피드백: {result.feedback}")

+        # 실제 Flow에서는 generate_draft로 돌아갈 수 있습니다
+        # 이 예제에서는 단순히 확인합니다
+        return "revision_requested"
+
+
+# Flow 실행
 flow = ContentApprovalFlow()
 result = flow.kickoff()
-print(f"\nFlow 완료. 상태: {flow.state.status}, 검토 횟수: {flow.state.revision_count}")
+print(f"\nFlow 완료. 요청된 수정: {flow.state.revision_count}")
 ```

 ```text Output
-==================================================
-OUTPUT FOR REVIEW:
-==================================================
-# AI 안전
-
-AI 안전에 대한 초안... (v1)
-==================================================
-
-이 초안을 검토해 주세요. 승인, 거부 또는 변경이 필요한 사항을 설명해 주세요:
-(Press Enter to skip, or type your feedback)
-
-Your feedback: 더 자세한 내용이 필요합니다
+어떤 주제에 대해 글을 쓸까요? AI 안전

 ==================================================
 OUTPUT FOR REVIEW:
 ==================================================
 # AI 안전

-AI 안전에 대한 초안... (v2)
+AI 안전에 대한 초안입니다...
 ==================================================

-이 초안을 검토해 주세요. 승인, 거부 또는 변경이 필요한 사항을 설명해 주세요:
+이 초안을 검토해 주세요. 'approved', 'rejected'로 답하거나 수정 피드백을 제공해 주세요:
 (Press Enter to skip, or type your feedback)

 Your feedback: 좋아 보입니다, 승인!

-콘텐츠 승인 및 게시! 리뷰어 의견: 좋아 보입니다, 승인!
+✅ 콘텐츠가 승인되어 출판되었습니다!
+검토자 코멘트: 좋아 보입니다, 승인!

-Flow 완료. 상태: published, 검토 횟수: 2
+Flow 완료. 요청된 수정: 0
 ```

 </CodeGroup>

 ## 다른 데코레이터와 결합하기

-`@human_feedback` 데코레이터는 `@start()`, `@listen()`, `or_()`와 함께 작동합니다. 데코레이터 순서는 두 가지 모두 동작합니다—프레임워크가 양방향으로 속성을 전파합니다—하지만 권장 패턴은 다음과 같습니다:
+`@human_feedback` 데코레이터는 다른 Flow 데코레이터와 함께 작동합니다. 가장 안쪽 데코레이터(함수에 가장 가까운)로 배치하세요:

 ```python Code
-# Flow 시작 시 일회성 검토 (self-loop 없음)
+# 올바름: @human_feedback이 가장 안쪽(함수에 가장 가까움)
@start()
-@human_feedback(message="이것을 검토해 주세요:", emit=["approved", "rejected"], llm="gpt-4o-mini")
+@human_feedback(message="이것을 검토해 주세요:")
 def my_start_method(self):
    return "content"

-# 리스너에서 선형 검토 (self-loop 없음)
@listen(other_method)
-@human_feedback(message="이것도 검토해 주세요:", emit=["good", "bad"], llm="gpt-4o-mini")
+@human_feedback(message="이것도 검토해 주세요:")
 def my_listener(self, data):
    return f"processed: {data}"
-
-# Self-loop: 수정을 위해 반복할 수 있는 검토
-@human_feedback(message="승인 또는 수정 요청?", emit=["approved", "revise"], llm="gpt-4o-mini")
-@listen(or_("upstream_method", "revise"))
-def review_with_loop(self):
-    return "content for review"
 ```

-### Self-loop 패턴
-
-수정 루프를 만들려면 `or_()`를 사용하여 검토 메서드가 **상위 트리거**와 **자체 수정 outcome**을 모두 리스닝해야 합니다:
-
-```python Code
-@start()
-def generate(self):
-    return "initial draft"
-
-@human_feedback(
-    message="승인하시겠습니까, 아니면 변경을 요청하시겠습니까?",
-    emit=["revise", "approved"],
-    llm="gpt-4o-mini",
-    default_outcome="approved",
-)
-@listen(or_("generate", "revise"))
-def review(self):
-    return "content"
-
-@listen("approved")
-def publish(self):
-    return "published"
-```
-
-outcome이 `"revise"`이면 flow가 `review`로 다시 라우팅됩니다 (`or_()`를 통해 `"revise"`를 리스닝하기 때문). outcome이 `"approved"`이면 flow가 `publish`로 계속됩니다. flow 엔진이 라우터를 "한 번만 실행" 규칙에서 제외하여 각 루프 반복마다 재실행할 수 있기 때문에 이 패턴이 동작합니다.
-
-### 체인된 라우터
-
-한 라우터의 outcome으로 트리거된 리스너가 그 자체로 라우터가 될 수 있습니다:
-
-```python Code
-@start()
-@human_feedback(message="첫 번째 검토:", emit=["approved", "rejected"], llm="gpt-4o-mini")
-def draft(self):
-    return "draft content"
-
-@listen("approved")
-@human_feedback(message="최종 검토:", emit=["publish", "revise"], llm="gpt-4o-mini")
-def final_review(self, prev):
-    return "final content"
-
-@listen("publish")
-def on_publish(self, prev):
-    return "published"
-```
-
-### 제한 사항
-
- **`@start()` 메서드는 한 번만 실행**: `@start()` 메서드는 self-loop할 수 없습니다. 수정 주기가 필요하면 별도의 `@start()` 메서드를 진입점으로 사용하고 `@listen()` 메서드에 `@human_feedback`를 배치하세요.
- **동일 메서드에 `@start()` + `@listen()` 불가**: 이는 Flow 프레임워크 제약입니다. 메서드는 시작점이거나 리스너여야 하며, 둘 다일 수 없습니다.
+<Tip>
+`@human_feedback`를 가장 안쪽 데코레이터(마지막/함수에 가장 가까움)로 배치하여 메서드를 직접 래핑하고 Flow 시스템에 전달하기 전에 반환 값을 캡처할 수 있도록 하세요.
+</Tip>

 ## 모범 사례

@@ -572,9 +516,9 @@ class ContentPipeline(Flow):
    @start()
    @human_feedback(
        message="이 콘텐츠의 출판을 승인하시겠습니까?",
-        emit=["approved", "rejected"],
+        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
-        default_outcome="rejected",
+        default_outcome="needs_revision",
        provider=SlackNotificationProvider("#content-reviews"),
    )
    def generate_content(self):
@@ -590,6 +534,11 @@ class ContentPipeline(Flow):
        print(f"보관됨. 이유: {result.feedback}")
        return {"status": "archived"}

+    @listen("needs_revision")
+    def queue_revision(self, result):
+        print(f"수정 대기열에 추가됨: {result.feedback}")
+        return {"status": "revision_needed"}
+

 # Flow 시작 (Slack 응답을 기다리며 일시 중지)
 def start_content_pipeline():
@@ -645,22 +594,22 @@ async def on_slack_feedback_async(flow_id: str, slack_message: str):
 ```python Code
 class ArticleReviewFlow(Flow):
    @start()
-    def generate_article(self):
-        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw
-
    @human_feedback(
-        message="이 글 초안을 검토해 주세요:",
+        message="Review this article draft:",
        emit=["approved", "needs_revision"],
        llm="gpt-4o-mini",
-        learn=True,
+        learn=True,  # HITL 학습 활성화
    )
-    @listen(or_("generate_article", "needs_revision"))
-    def review_article(self):
-        return self.last_human_feedback.output if self.last_human_feedback else "article draft"
+    def generate_article(self):
+        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw

    @listen("approved")
    def publish(self):
        print(f"Publishing: {self.last_human_feedback.output}")
+
+    @listen("needs_revision")
+    def revise(self):
+        print("Revising based on feedback...")
 ```

 **첫 번째 실행**: 인간이 원시 출력을 보고 "사실에 대한 주장에는 항상 인용을 포함하세요."라고 말합니다. 교훈이 추출되어 메모리에 저장됩니다.
--- a/docs/pt-BR/enterprise/features/flow-hitl-management.mdx
+++ b/docs/pt-BR/enterprise/features/flow-hitl-management.mdx
@@ -38,21 +38,22 @@ O CrewAI Enterprise oferece um sistema abrangente de gerenciamento Human-in-the-
 Configure checkpoints de revisão humana em seus Flows usando o decorador `@human_feedback`. Quando a execução atinge um ponto de revisão, o sistema pausa, notifica o responsável via email e aguarda uma resposta.

 ```python
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult

 class ContentApprovalFlow(Flow):
    @start()
    def generate_content(self):
+        # IA gera conteúdo
        return "Texto de marketing gerado para campanha Q1..."

+    @listen(generate_content)
    @human_feedback(
        message="Por favor, revise este conteúdo para conformidade com a marca:",
        emit=["approved", "rejected", "needs_revision"],
    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "Texto de marketing para revisão..."
+    def review_content(self, content):
+        return content

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
@@ -61,6 +62,10 @@ class ContentApprovalFlow(Flow):
    @listen("rejected")
    def archive_content(self, result: HumanFeedbackResult):
        print(f"Conteúdo rejeitado. Motivo: {result.feedback}")
+
+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        print(f"Revisão solicitada: {result.feedback}")
 ```

 Para detalhes completos de implementação, consulte o guia [Feedback Humano em Flows](/pt-BR/learn/human-feedback-in-flows).
--- a/docs/pt-BR/learn/human-feedback-in-flows.mdx
+++ b/docs/pt-BR/learn/human-feedback-in-flows.mdx
@@ -98,43 +98,33 @@ def handle_feedback(self, result):
 Quando você especifica `emit`, o decorador se torna um roteador. O feedback livre do humano é interpretado por um LLM e mapeado para um dos outcomes especificados:

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
-from crewai.flow.human_feedback import human_feedback
+@start()
+@human_feedback(
+    message="Você aprova este conteúdo para publicação?",
+    emit=["approved", "rejected", "needs_revision"],
+    llm="gpt-4o-mini",
+    default_outcome="needs_revision",
+)
+def review_content(self):
+    return "Rascunho do post do blog aqui..."

-class ReviewFlow(Flow):
-    @start()
-    def generate_content(self):
-        return "Rascunho do post do blog aqui..."
+@listen("approved")
+def publish(self, result):
+    print(f"Publicando! Usuário disse: {result.feedback}")

-    @human_feedback(
-        message="Você aprova este conteúdo para publicação?",
-        emit=["approved", "rejected", "needs_revision"],
-        llm="gpt-4o-mini",
-        default_outcome="needs_revision",
-    )
-    @listen(or_("generate_content", "needs_revision"))
-    def review_content(self):
-        return "Rascunho do post do blog aqui..."
+@listen("rejected")
+def discard(self, result):
+    print(f"Descartando. Motivo: {result.feedback}")

-    @listen("approved")
-    def publish(self, result):
-        print(f"Publicando! Usuário disse: {result.feedback}")
-
-    @listen("rejected")
-    def discard(self, result):
-        print(f"Descartando. Motivo: {result.feedback}")
+@listen("needs_revision")
+def revise(self, result):
+    print(f"Revisando baseado em: {result.feedback}")
 ```

-Quando o humano diz algo como "precisa de mais detalhes", o LLM mapeia para `"needs_revision"`, que dispara `review_content` novamente via `or_()` — criando um loop de revisão. O loop continua até que o outcome seja `"approved"` ou `"rejected"`.
-
 <Tip>
 O LLM usa saídas estruturadas (function calling) quando disponível para garantir que a resposta seja um dos seus outcomes especificados. Isso torna o roteamento confiável e previsível.
 </Tip>

-<Warning>
-Um método `@start()` só executa uma vez no início do flow. Se você precisa de um loop de revisão, separe o método start do método de revisão e use `@listen(or_("trigger", "revision_outcome"))` no método de revisão para habilitar o self-loop.
-</Warning>
-
 ## HumanFeedbackResult

 O dataclass `HumanFeedbackResult` contém todas as informações sobre uma interação de feedback humano:
@@ -203,162 +193,116 @@ Aqui está um exemplo completo implementando um fluxo de revisão e aprovação
 <CodeGroup>

 ```python Code
-from crewai.flow.flow import Flow, start, listen, or_
+from crewai.flow.flow import Flow, start, listen
 from crewai.flow.human_feedback import human_feedback, HumanFeedbackResult
 from pydantic import BaseModel


 class ContentState(BaseModel):
+    topic: str = ""
    draft: str = ""
+    final_content: str = ""
    revision_count: int = 0
-    status: str = "pending"


 class ContentApprovalFlow(Flow[ContentState]):
-    """Um flow que gera conteúdo e faz loop até o humano aprovar."""
+    """Um flow que gera conteúdo e obtém aprovação humana."""

    @start()
-    def generate_draft(self):
-        self.state.draft = "# IA Segura\n\nEste é um rascunho sobre IA Segura..."
+    def get_topic(self):
+        self.state.topic = input("Sobre qual tópico devo escrever? ")
+        return self.state.topic
+
+    @listen(get_topic)
+    def generate_draft(self, topic):
+        # Em uso real, isso chamaria um LLM
+        self.state.draft = f"# {topic}\n\nEste é um rascunho sobre {topic}..."
        return self.state.draft

+    @listen(generate_draft)
    @human_feedback(
-        message="Por favor, revise este rascunho. Aprove, rejeite ou descreva o que precisa mudar:",
+        message="Por favor, revise este rascunho. Responda 'approved', 'rejected', ou forneça feedback de revisão:",
        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
        default_outcome="needs_revision",
    )
-    @listen(or_("generate_draft", "needs_revision"))
-    def review_draft(self):
-        self.state.revision_count += 1
-        return f"{self.state.draft} (v{self.state.revision_count})"
+    def review_draft(self, draft):
+        return draft

    @listen("approved")
    def publish_content(self, result: HumanFeedbackResult):
-        self.state.status = "published"
-        print(f"Conteúdo aprovado e publicado! Revisor disse: {result.feedback}")
+        self.state.final_content = result.output
+        print("\n✅ Conteúdo aprovado e publicado!")
+        print(f"Comentário do revisor: {result.feedback}")
        return "published"

    @listen("rejected")
    def handle_rejection(self, result: HumanFeedbackResult):
-        self.state.status = "rejected"
-        print(f"Conteúdo rejeitado. Motivo: {result.feedback}")
+        print("\n❌ Conteúdo rejeitado")
+        print(f"Motivo: {result.feedback}")
        return "rejected"

+    @listen("needs_revision")
+    def revise_content(self, result: HumanFeedbackResult):
+        self.state.revision_count += 1
+        print(f"\n📝 Revisão #{self.state.revision_count} solicitada")
+        print(f"Feedback: {result.feedback}")

+        # Em um flow real, você pode voltar para generate_draft
+        # Para este exemplo, apenas reconhecemos
+        return "revision_requested"
+
+
+# Executar o flow
 flow = ContentApprovalFlow()
 result = flow.kickoff()
-print(f"\nFlow finalizado. Status: {flow.state.status}, Revisões: {flow.state.revision_count}")
+print(f"\nFlow concluído. Revisões solicitadas: {flow.state.revision_count}")
 ```

 ```text Output
-==================================================
-OUTPUT FOR REVIEW:
-==================================================
-# IA Segura
-
-Este é um rascunho sobre IA Segura... (v1)
-==================================================
-
-Por favor, revise este rascunho. Aprove, rejeite ou descreva o que precisa mudar:
-(Press Enter to skip, or type your feedback)
-
-Your feedback: Preciso de mais detalhes sobre segurança em IA.
+Sobre qual tópico devo escrever? Segurança em IA

 ==================================================
 OUTPUT FOR REVIEW:
 ==================================================
-# IA Segura
+# Segurança em IA

-Este é um rascunho sobre IA Segura... (v2)
+Este é um rascunho sobre Segurança em IA...
 ==================================================

-Por favor, revise este rascunho. Aprove, rejeite ou descreva o que precisa mudar:
+Por favor, revise este rascunho. Responda 'approved', 'rejected', ou forneça feedback de revisão:
 (Press Enter to skip, or type your feedback)

 Your feedback: Parece bom, aprovado!

-Conteúdo aprovado e publicado! Revisor disse: Parece bom, aprovado!
+✅ Conteúdo aprovado e publicado!
+Comentário do revisor: Parece bom, aprovado!

-Flow finalizado. Status: published, Revisões: 2
+Flow concluído. Revisões solicitadas: 0
 ```

 </CodeGroup>

 ## Combinando com Outros Decoradores

-O decorador `@human_feedback` funciona com `@start()`, `@listen()` e `or_()`. Ambas as ordens de decoradores funcionam — o framework propaga atributos em ambas as direções — mas os padrões recomendados são:
+O decorador `@human_feedback` funciona com outros decoradores de flow. Coloque-o como o decorador mais interno (mais próximo da função):

 ```python Code
-# Revisão única no início do flow (sem self-loop)
+# Correto: @human_feedback é o mais interno (mais próximo da função)
@start()
-@human_feedback(message="Revise isto:", emit=["approved", "rejected"], llm="gpt-4o-mini")
+@human_feedback(message="Revise isto:")
 def my_start_method(self):
    return "content"

-# Revisão linear em um listener (sem self-loop)
@listen(other_method)
-@human_feedback(message="Revise isto também:", emit=["good", "bad"], llm="gpt-4o-mini")
+@human_feedback(message="Revise isto também:")
 def my_listener(self, data):
    return f"processed: {data}"
-
-# Self-loop: revisão que pode voltar para revisões
-@human_feedback(message="Aprovar ou revisar?", emit=["approved", "revise"], llm="gpt-4o-mini")
-@listen(or_("upstream_method", "revise"))
-def review_with_loop(self):
-    return "content for review"
 ```

-### Padrão de self-loop
-
-Para criar um loop de revisão, o método de revisão deve escutar **ambos** um gatilho upstream e seu próprio outcome de revisão usando `or_()`:
-
-```python Code
-@start()
-def generate(self):
-    return "initial draft"
-
-@human_feedback(
-    message="Aprovar ou solicitar alterações?",
-    emit=["revise", "approved"],
-    llm="gpt-4o-mini",
-    default_outcome="approved",
-)
-@listen(or_("generate", "revise"))
-def review(self):
-    return "content"
-
-@listen("approved")
-def publish(self):
-    return "published"
-```
-
-Quando o outcome é `"revise"`, o flow roteia de volta para `review` (porque ele escuta `"revise"` via `or_()`). Quando o outcome é `"approved"`, o flow continua para `publish`. Isso funciona porque o engine de flow isenta roteadores da regra "fire once", permitindo que eles re-executem em cada iteração do loop.
-
-### Roteadores encadeados
-
-Um listener disparado pelo outcome de um roteador pode ser ele mesmo um roteador:
-
-```python Code
-@start()
-@human_feedback(message="Primeira revisão:", emit=["approved", "rejected"], llm="gpt-4o-mini")
-def draft(self):
-    return "draft content"
-
-@listen("approved")
-@human_feedback(message="Revisão final:", emit=["publish", "revise"], llm="gpt-4o-mini")
-def final_review(self, prev):
-    return "final content"
-
-@listen("publish")
-def on_publish(self, prev):
-    return "published"
-```
-
-### Limitações
-
- **Métodos `@start()` executam uma vez**: Um método `@start()` não pode fazer self-loop. Se você precisa de um ciclo de revisão, use um método `@start()` separado como ponto de entrada e coloque o `@human_feedback` em um método `@listen()`.
- **Sem `@start()` + `@listen()` no mesmo método**: Esta é uma restrição do framework de Flow. Um método é ou um ponto de início ou um listener, não ambos.
+<Tip>
+Coloque `@human_feedback` como o decorador mais interno (último/mais próximo da função) para que ele envolva o método diretamente e possa capturar o valor de retorno antes de passar para o sistema de flow.
+</Tip>

 ## Melhores Práticas

@@ -572,9 +516,9 @@ class ContentPipeline(Flow):
    @start()
    @human_feedback(
        message="Aprova este conteúdo para publicação?",
-        emit=["approved", "rejected"],
+        emit=["approved", "rejected", "needs_revision"],
        llm="gpt-4o-mini",
-        default_outcome="rejected",
+        default_outcome="needs_revision",
        provider=SlackNotificationProvider("#content-reviews"),
    )
    def generate_content(self):
@@ -590,6 +534,11 @@ class ContentPipeline(Flow):
        print(f"Arquivado. Motivo: {result.feedback}")
        return {"status": "archived"}

+    @listen("needs_revision")
+    def queue_revision(self, result):
+        print(f"Na fila para revisão: {result.feedback}")
+        return {"status": "revision_needed"}
+

 # Iniciando o flow (vai pausar e aguardar resposta do Slack)
 def start_content_pipeline():
@@ -645,22 +594,22 @@ Com o tempo, o humano vê saídas pré-revisadas progressivamente melhores porqu
 ```python Code
 class ArticleReviewFlow(Flow):
    @start()
-    def generate_article(self):
-        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw
-
    @human_feedback(
-        message="Revise este rascunho do artigo:",
+        message="Review this article draft:",
        emit=["approved", "needs_revision"],
        llm="gpt-4o-mini",
        learn=True,  # enable HITL learning
    )
-    @listen(or_("generate_article", "needs_revision"))
-    def review_article(self):
-        return self.last_human_feedback.output if self.last_human_feedback else "article draft"
+    def generate_article(self):
+        return self.crew.kickoff(inputs={"topic": "AI Safety"}).raw

    @listen("approved")
    def publish(self):
        print(f"Publishing: {self.last_human_feedback.output}")
+
+    @listen("needs_revision")
+    def revise(self):
+        print("Revising based on feedback...")
 ```

 **Primeira execução**: O humano vê a saída bruta e diz "Sempre inclua citações para afirmações factuais." A lição é destilada e armazenada na memória.
--- a/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/misc.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/misc.py
@@ -1,7 +1,5 @@
 import os

-from crewai.context import get_platform_integration_token as _get_context_token
-

 def get_platform_api_base_url() -> str:
    """Get the platform API base URL from environment or use default."""
@@ -10,16 +8,10 @@ def get_platform_api_base_url() -> str:


 def get_platform_integration_token() -> str:
-    """Get the platform integration token from the context.
-    Fallback to the environment variable if no token has been set in the context.
-
-    Raises:
-        ValueError: If no token has been set in the context.
-    """
-    token = _get_context_token() or os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN")
+    """Get the platform API base URL from environment or use default."""
+    token = os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN") or ""
    if not token:
        raise ValueError(
-            "No platform integration token found. "
-            "Set it via platform_integration_context() or set_platform_integration_token()."
+            "No platform integration token found, please set the CREWAI_PLATFORM_INTEGRATION_TOKEN environment variable"
        )
-    return token
+    return token  # TODO: Use context manager to get token
--- a/lib/crewai-tools/tests/test_platform_tools_misc.py
+++ b/lib/crewai-tools/tests/test_platform_tools_misc.py
@@ -1,56 +0,0 @@
-"""Tests for platform tools misc functionality."""
-
-import os
-from unittest.mock import patch
-
-import pytest
-from crewai.context import platform_integration_context, set_platform_integration_token, reset_platform_integration_token
-from crewai_tools.tools.crewai_platform_tools.misc import (
-    get_platform_integration_token,
-)
-
-
-
-class TestTokenRetrievalWithFallback:
-    """Test token retrieval logic with environment fallback."""
-
-    @pytest.fixture
-    def clean_context(self):
-        token = set_platform_integration_token(None)
-        env_backup = os.environ.pop("CREWAI_PLATFORM_INTEGRATION_TOKEN", None)
-        yield
-        reset_platform_integration_token(token)
-        if env_backup is not None:
-            os.environ["CREWAI_PLATFORM_INTEGRATION_TOKEN"] = env_backup
-        else:
-            os.environ.pop("CREWAI_PLATFORM_INTEGRATION_TOKEN", None)
-
-    def test_context_token_takes_precedence(self, clean_context):
-        """Test that context token takes precedence over environment variable."""
-        context_token = "context-token"
-        env_token = "env-token"
-
-        with patch.dict(os.environ, {"CREWAI_PLATFORM_INTEGRATION_TOKEN": env_token}):
-            with platform_integration_context(context_token):
-                token = get_platform_integration_token()
-                assert token == context_token
-
-    def test_environment_fallback_when_no_context(self, clean_context):
-        """Test fallback to environment variable when no context token."""
-        env_token = "env-fallback-token"
-
-        with patch.dict(os.environ, {"CREWAI_PLATFORM_INTEGRATION_TOKEN": env_token}):
-            token = get_platform_integration_token()
-            assert token == env_token
-
-    @pytest.mark.parametrize("empty_value", ["", None])
-    def test_missing_token_raises_error(self, clean_context, empty_value):
-        """Test that missing tokens raise appropriate errors."""
-        env_dict = {"CREWAI_PLATFORM_INTEGRATION_TOKEN": empty_value} if empty_value is not None else {}
-
-        with patch.dict(os.environ, env_dict, clear=True):
-            with pytest.raises(ValueError) as exc_info:
-                get_platform_integration_token()
-
-            assert "No platform integration token found" in str(exc_info.value)
-            assert "platform_integration_context()" in str(exc_info.value)
--- a/lib/crewai-tools/tool.specs.json
+++ b/lib/crewai-tools/tool.specs.json
@@ -20117,6 +20117,18 @@
      "humanized_name": "Web Automation Tool",
      "init_params_schema": {
        "$defs": {
+          "AvailableModel": {
+            "enum": [
+              "gpt-4o",
+              "gpt-4o-mini",
+              "claude-3-5-sonnet-latest",
+              "claude-3-7-sonnet-latest",
+              "computer-use-preview",
+              "gemini-2.0-flash"
+            ],
+            "title": "AvailableModel",
+            "type": "string"
+          },
          "EnvVar": {
            "properties": {
              "default": {
@@ -20194,6 +20206,17 @@
            "default": null,
            "title": "Model Api Key"
          },
+          "model_name": {
+            "anyOf": [
+              {
+                "$ref": "#/$defs/AvailableModel"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": "claude-3-7-sonnet-latest"
+          },
          "project_id": {
            "anyOf": [
              {
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -7,7 +7,6 @@ and memory management.
 from __future__ import annotations

 from collections.abc import Callable
-from concurrent.futures import ThreadPoolExecutor, as_completed
 import logging
 from typing import TYPE_CHECKING, Any, Literal, cast

@@ -686,138 +685,30 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        Returns:
            AgentFinish if tool has result_as_answer=True, None otherwise.
        """
+        from datetime import datetime
+        import json
+
+        from crewai.events import crewai_event_bus
+        from crewai.events.types.tool_usage_events import (
+            ToolUsageErrorEvent,
+            ToolUsageFinishedEvent,
+            ToolUsageStartedEvent,
+        )
+
        if not tool_calls:
            return None

-        parsed_calls = [
-            parsed
-            for tool_call in tool_calls
-            if (parsed := self._parse_native_tool_call(tool_call)) is not None
-        ]
-        if not parsed_calls:
-            return None
+        # Only process the FIRST tool call for sequential execution with reflection
+        tool_call = tool_calls[0]

-        original_tools_by_name: dict[str, Any] = {}
-        for tool in self.original_tools or []:
-            original_tools_by_name[sanitize_tool_name(tool.name)] = tool
-
-        if len(parsed_calls) > 1:
-            has_result_as_answer_in_batch = any(
-                bool(
-                    original_tools_by_name.get(func_name)
-                    and getattr(
-                        original_tools_by_name.get(func_name), "result_as_answer", False
-                    )
-                )
-                for _, func_name, _ in parsed_calls
-            )
-            has_max_usage_count_in_batch = any(
-                bool(
-                    original_tools_by_name.get(func_name)
-                    and getattr(
-                        original_tools_by_name.get(func_name),
-                        "max_usage_count",
-                        None,
-                    )
-                    is not None
-                )
-                for _, func_name, _ in parsed_calls
-            )
-
-            # Preserve historical sequential behavior for result_as_answer batches.
-            # Also avoid threading around usage counters for max_usage_count tools.
-            if has_result_as_answer_in_batch or has_max_usage_count_in_batch:
-                logger.debug(
-                    "Skipping parallel native execution because batch includes result_as_answer or max_usage_count tool"
-                )
-            else:
-                execution_plan: list[
-                    tuple[str, str, str | dict[str, Any], Any | None]
-                ] = []
-                for call_id, func_name, func_args in parsed_calls:
-                    original_tool = original_tools_by_name.get(func_name)
-                    execution_plan.append((call_id, func_name, func_args, original_tool))
-
-                self._append_assistant_tool_calls_message(
-                    [
-                        (call_id, func_name, func_args)
-                        for call_id, func_name, func_args, _ in execution_plan
-                    ]
-                )
-
-                max_workers = min(8, len(execution_plan))
-                ordered_results: list[dict[str, Any] | None] = [None] * len(execution_plan)
-                with ThreadPoolExecutor(max_workers=max_workers) as pool:
-                    futures = {
-                        pool.submit(
-                            self._execute_single_native_tool_call,
-                            call_id=call_id,
-                            func_name=func_name,
-                            func_args=func_args,
-                            available_functions=available_functions,
-                            original_tool=original_tool,
-                            should_execute=True,
-                        ): idx
-                        for idx, (
-                            call_id,
-                            func_name,
-                            func_args,
-                            original_tool,
-                        ) in enumerate(execution_plan)
-                    }
-                    for future in as_completed(futures):
-                        idx = futures[future]
-                        ordered_results[idx] = future.result()
-
-                for execution_result in ordered_results:
-                    if not execution_result:
-                        continue
-                    tool_finish = self._append_tool_result_and_check_finality(
-                        execution_result
-                    )
-                    if tool_finish:
-                        return tool_finish
-
-                reasoning_prompt = self._i18n.slice("post_tool_reasoning")
-                reasoning_message: LLMMessage = {
-                    "role": "user",
-                    "content": reasoning_prompt,
-                }
-                self.messages.append(reasoning_message)
-                return None
-
-        # Sequential behavior: process only first tool call, then force reflection.
-        call_id, func_name, func_args = parsed_calls[0]
-        self._append_assistant_tool_calls_message([(call_id, func_name, func_args)])
-
-        execution_result = self._execute_single_native_tool_call(
-            call_id=call_id,
-            func_name=func_name,
-            func_args=func_args,
-            available_functions=available_functions,
-            original_tool=original_tools_by_name.get(func_name),
-            should_execute=True,
-        )
-        tool_finish = self._append_tool_result_and_check_finality(execution_result)
-        if tool_finish:
-            return tool_finish
-
-        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
-        reasoning_message: LLMMessage = {
-            "role": "user",
-            "content": reasoning_prompt,
-        }
-        self.messages.append(reasoning_message)
-        return None
-
-    def _parse_native_tool_call(
-        self, tool_call: Any
-    ) -> tuple[str, str, str | dict[str, Any]] | None:
+        # Extract tool call info - handle OpenAI-style, Anthropic-style, and Gemini-style
        if hasattr(tool_call, "function"):
+            # OpenAI-style: has .function.name and .function.arguments
            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
            func_name = sanitize_tool_name(tool_call.function.name)
-            return call_id, func_name, tool_call.function.arguments
-        if hasattr(tool_call, "function_call") and tool_call.function_call:
+            func_args = tool_call.function.arguments
+        elif hasattr(tool_call, "function_call") and tool_call.function_call:
+            # Gemini-style: has .function_call.name and .function_call.args
            call_id = f"call_{id(tool_call)}"
            func_name = sanitize_tool_name(tool_call.function_call.name)
            func_args = (
@@ -825,12 +716,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                if tool_call.function_call.args
                else {}
            )
-            return call_id, func_name, func_args
-        if hasattr(tool_call, "name") and hasattr(tool_call, "input"):
+        elif hasattr(tool_call, "name") and hasattr(tool_call, "input"):
+            # Anthropic format: has .name and .input (ToolUseBlock)
            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
            func_name = sanitize_tool_name(tool_call.name)
-            return call_id, func_name, tool_call.input
-        if isinstance(tool_call, dict):
+            func_args = tool_call.input  # Already a dict in Anthropic
+        elif isinstance(tool_call, dict):
+            # Support OpenAI "id", Bedrock "toolUseId", or generate one
            call_id = (
                tool_call.get("id")
                or tool_call.get("toolUseId")
@@ -840,16 +732,11 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            func_name = sanitize_tool_name(
                func_info.get("name", "") or tool_call.get("name", "")
            )
-            func_args = func_info.get("arguments", "{}") or tool_call.get("input", {})
-            return call_id, func_name, func_args
-        return None
-
-    def _append_assistant_tool_calls_message(
-        self,
-        parsed_calls: list[tuple[str, str, str | dict[str, Any]]],
-    ) -> None:
-        import json
+            func_args = func_info.get("arguments") or tool_call.get("input") or {}
+        else:
+            return None

+        # Append assistant message with single tool call
        assistant_message: LLMMessage = {
            "role": "assistant",
            "content": None,
@@ -864,30 +751,12 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        else json.dumps(func_args),
                    },
                }
-                for call_id, func_name, func_args in parsed_calls
            ],
        }
+
        self.messages.append(assistant_message)

-    def _execute_single_native_tool_call(
-        self,
-        *,
-        call_id: str,
-        func_name: str,
-        func_args: str | dict[str, Any],
-        available_functions: dict[str, Callable[..., Any]],
-        original_tool: Any | None = None,
-        should_execute: bool = True,
-    ) -> dict[str, Any]:
-        from datetime import datetime
-        import json
-
-        from crewai.events.types.tool_usage_events import (
-            ToolUsageErrorEvent,
-            ToolUsageFinishedEvent,
-            ToolUsageStartedEvent,
-        )
-
+        # Parse arguments for the single tool call
        if isinstance(func_args, str):
            try:
                args_dict = json.loads(func_args)
@@ -896,26 +765,28 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        else:
            args_dict = func_args

-        if original_tool is None:
-            for tool in self.original_tools or []:
-                if sanitize_tool_name(tool.name) == func_name:
-                    original_tool = tool
-                    break
+        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"

+        # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
+
+        original_tool = None
+        for tool in self.original_tools or []:
+            if sanitize_tool_name(tool.name) == func_name:
+                original_tool = tool
+                break
+
+        # Check if tool has reached max usage count
        max_usage_reached = False
-        if not should_execute and original_tool:
-            max_usage_reached = True
-        elif (
-            should_execute
-            and original_tool
-            and getattr(original_tool, "max_usage_count", None) is not None
-            and getattr(original_tool, "current_usage_count", 0)
-            >= original_tool.max_usage_count
-        ):
-            max_usage_reached = True
+        if original_tool:
+            if (
+                hasattr(original_tool, "max_usage_count")
+                and original_tool.max_usage_count is not None
+                and original_tool.current_usage_count >= original_tool.max_usage_count
+            ):
+                max_usage_reached = True

+        # Check cache before executing
        from_cache = False
-        result: str = "Tool not found"
        input_str = json.dumps(args_dict) if args_dict else ""
        if self.tools_handler and self.tools_handler.cache:
            cached_result = self.tools_handler.cache.read(
@@ -929,7 +800,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                )
                from_cache = True

-        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+        # Emit tool usage started event
        started_at = datetime.now()
        crewai_event_bus.emit(
            self,
@@ -945,12 +816,14 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

        track_delegation_if_needed(func_name, args_dict, self.task)

+        # Find the structured tool for hook context
        structured_tool: CrewStructuredTool | None = None
        for structured in self.tools or []:
            if sanitize_tool_name(structured.name) == func_name:
                structured_tool = structured
                break

+        # Execute before_tool_call hooks
        hook_blocked = False
        before_hook_context = ToolCallHookContext(
            tool_name=func_name,
@@ -974,44 +847,58 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    color="red",
                )

+        # If hook blocked execution, set result and skip tool execution
        if hook_blocked:
            result = f"Tool execution blocked by hook. Tool: {func_name}"
+        # Execute the tool (only if not cached, not at max usage, and not blocked by hook)
+        elif not from_cache and not max_usage_reached:
+            result = "Tool not found"
+            if func_name in available_functions:
+                try:
+                    tool_func = available_functions[func_name]
+                    raw_result = tool_func(**args_dict)
+
+                    # Add to cache after successful execution (before string conversion)
+                    if self.tools_handler and self.tools_handler.cache:
+                        should_cache = True
+                        if (
+                            original_tool
+                            and hasattr(original_tool, "cache_function")
+                            and callable(original_tool.cache_function)
+                        ):
+                            should_cache = original_tool.cache_function(
+                                args_dict, raw_result
+                            )
+                        if should_cache:
+                            self.tools_handler.cache.add(
+                                tool=func_name, input=input_str, output=raw_result
+                            )
+
+                    # Convert to string for message
+                    result = (
+                        str(raw_result)
+                        if not isinstance(raw_result, str)
+                        else raw_result
+                    )
+                except Exception as e:
+                    result = f"Error executing tool: {e}"
+                    if self.task:
+                        self.task.increment_tools_errors()
+                    crewai_event_bus.emit(
+                        self,
+                        event=ToolUsageErrorEvent(
+                            tool_name=func_name,
+                            tool_args=args_dict,
+                            from_agent=self.agent,
+                            from_task=self.task,
+                            agent_key=agent_key,
+                            error=e,
+                        ),
+                    )
+                    error_event_emitted = True
        elif max_usage_reached and original_tool:
+            # Return error message when max usage limit is reached
            result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
-        elif not from_cache and func_name in available_functions:
-            try:
-                raw_result = available_functions[func_name](**args_dict)
-
-                if self.tools_handler and self.tools_handler.cache:
-                    should_cache = True
-                    if (
-                        original_tool
-                        and hasattr(original_tool, "cache_function")
-                        and callable(original_tool.cache_function)
-                    ):
-                        should_cache = original_tool.cache_function(args_dict, raw_result)
-                    if should_cache:
-                        self.tools_handler.cache.add(
-                            tool=func_name, input=input_str, output=raw_result
-                        )
-
-                result = str(raw_result) if not isinstance(raw_result, str) else raw_result
-            except Exception as e:
-                result = f"Error executing tool: {e}"
-                if self.task:
-                    self.task.increment_tools_errors()
-                crewai_event_bus.emit(
-                    self,
-                    event=ToolUsageErrorEvent(
-                        tool_name=func_name,
-                        tool_args=args_dict,
-                        from_agent=self.agent,
-                        from_task=self.task,
-                        agent_key=agent_key,
-                        error=e,
-                    ),
-                )
-                error_event_emitted = True

        after_hook_context = ToolCallHookContext(
            tool_name=func_name,
@@ -1051,23 +938,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                ),
            )

-        return {
-            "call_id": call_id,
-            "func_name": func_name,
-            "result": result,
-            "from_cache": from_cache,
-            "original_tool": original_tool,
-        }
-
-    def _append_tool_result_and_check_finality(
-        self, execution_result: dict[str, Any]
-    ) -> AgentFinish | None:
-        call_id = cast(str, execution_result["call_id"])
-        func_name = cast(str, execution_result["func_name"])
-        result = cast(str, execution_result["result"])
-        from_cache = cast(bool, execution_result["from_cache"])
-        original_tool = execution_result["original_tool"]
-
+        # Append tool result message
        tool_message: LLMMessage = {
            "role": "tool",
            "tool_call_id": call_id,
@@ -1076,6 +947,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        }
        self.messages.append(tool_message)

+        # Log the tool execution
        if self.agent and self.agent.verbose:
            cache_info = " (from cache)" if from_cache else ""
            self._printer.print(
@@ -1088,11 +960,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            and hasattr(original_tool, "result_as_answer")
            and original_tool.result_as_answer
        ):
+            # Return immediately with tool result as final answer
            return AgentFinish(
                thought="Tool result is the final answer",
                output=result,
                text=result,
            )
+
+        # Inject post-tool reasoning prompt to enforce analysis
+        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+        reasoning_message: LLMMessage = {
+            "role": "user",
+            "content": reasoning_prompt,
+        }
+        self.messages.append(reasoning_message)
        return None

    async def ainvoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
--- a/lib/crewai/src/crewai/cli/reset_memories_command.py
+++ b/lib/crewai/src/crewai/cli/reset_memories_command.py
@@ -2,30 +2,7 @@ import subprocess

 import click

-from crewai.cli.utils import get_crews, get_flows
-from crewai.flow import Flow
-
-
-def _reset_flow_memory(flow: Flow) -> None:
-    """Reset memory for a single flow instance.
-
-    Handles Memory, MemoryScope (both have .reset()), and MemorySlice
-    (delegates to the underlying ._memory).  Silently succeeds when the
-    storage directory does not exist yet (nothing to reset).
-
-    Args:
-        flow: The flow instance whose memory should be reset.
-    """
-    mem = flow.memory
-    if mem is None:
-        return
-    try:
-        if hasattr(mem, "reset"):
-            mem.reset()
-        elif hasattr(mem, "_memory") and hasattr(mem._memory, "reset"):
-            mem._memory.reset()
-    except (FileNotFoundError, OSError):
-        pass
+from crewai.cli.utils import get_crews


 def reset_memories_command(
@@ -35,7 +12,7 @@ def reset_memories_command(
    kickoff_outputs: bool,
    all: bool,
 ) -> None:
-    """Reset the crew and flow memories.
+    """Reset the crew memories.

    Args:
        memory: Whether to reset the unified memory.
@@ -52,11 +29,8 @@ def reset_memories_command(
            return

        crews = get_crews()
-        flows = get_flows()
-
-        if not crews and not flows:
-            raise ValueError("No crew or flow found.")
-
+        if not crews:
+            raise ValueError("No crew found.")
        for crew in crews:
            if all:
                crew.reset_memories(command_type="all")
@@ -85,20 +59,6 @@ def reset_memories_command(
                    f"[Crew ({crew.name if crew.name else crew.id})] Agents knowledge has been reset."
                )

-        for flow in flows:
-            flow_name = flow.name or flow.__class__.__name__
-            if all:
-                _reset_flow_memory(flow)
-                click.echo(
-                    f"[Flow ({flow_name})] Reset memories command has been completed."
-                )
-                continue
-            if memory:
-                _reset_flow_memory(flow)
-                click.echo(
-                    f"[Flow ({flow_name})] Memory has been reset."
-                )
-
    except subprocess.CalledProcessError as e:
        click.echo(f"An error occurred while resetting the memories: {e}", err=True)
        click.echo(e.output, err=True)
--- a/lib/crewai/src/crewai/cli/utils.py
+++ b/lib/crewai/src/crewai/cli/utils.py
@@ -386,109 +386,6 @@ def fetch_crews(module_attr: Any) -> list[Crew]:
    return crew_instances


-def get_flow_instance(module_attr: Any) -> Flow | None:
-    """Check if a module attribute is a user-defined Flow subclass and return an instance.
-
-    Args:
-        module_attr: An attribute from a loaded module.
-
-    Returns:
-        A Flow instance if the attribute is a valid user-defined Flow subclass,
-        None otherwise.
-    """
-    if (
-        isinstance(module_attr, type)
-        and issubclass(module_attr, Flow)
-        and module_attr is not Flow
-    ):
-        try:
-            return module_attr()
-        except Exception:
-            return None
-    return None
-
-
-_SKIP_DIRS = frozenset(
-    {".venv", "venv", ".git", "__pycache__", "node_modules", ".tox", ".nox"}
-)
-
-
-def get_flows(flow_path: str = "main.py") -> list[Flow]:
-    """Get the flow instances from project files.
-
-    Walks the project directory looking for files matching ``flow_path``
-    (default ``main.py``), loads each module, and extracts Flow subclass
-    instances.  Directories that are clearly not user source code (virtual
-    environments, ``.git``, etc.) are pruned to avoid noisy import errors.
-
-    Args:
-        flow_path: Filename to search for (default ``main.py``).
-
-    Returns:
-        A list of discovered Flow instances.
-    """
-    flow_instances: list[Flow] = []
-    try:
-        current_dir = os.getcwd()
-        if current_dir not in sys.path:
-            sys.path.insert(0, current_dir)
-
-        src_dir = os.path.join(current_dir, "src")
-        if os.path.isdir(src_dir) and src_dir not in sys.path:
-            sys.path.insert(0, src_dir)
-
-        search_paths = [".", "src"] if os.path.isdir("src") else ["."]
-
-        for search_path in search_paths:
-            for root, dirs, files in os.walk(search_path):
-                dirs[:] = [
-                    d
-                    for d in dirs
-                    if d not in _SKIP_DIRS and not d.startswith(".")
-                ]
-                if flow_path in files and "cli/templates" not in root:
-                    file_os_path = os.path.join(root, flow_path)
-                    try:
-                        spec = importlib.util.spec_from_file_location(
-                            "flow_module", file_os_path
-                        )
-                        if not spec or not spec.loader:
-                            continue
-
-                        module = importlib.util.module_from_spec(spec)
-                        sys.modules[spec.name] = module
-
-                        try:
-                            spec.loader.exec_module(module)
-
-                            for attr_name in dir(module):
-                                module_attr = getattr(module, attr_name)
-                                try:
-                                    if flow_instance := get_flow_instance(
-                                        module_attr
-                                    ):
-                                        flow_instances.append(flow_instance)
-                                except Exception:  # noqa: S112
-                                    continue
-
-                            if flow_instances:
-                                break
-
-                        except Exception:  # noqa: S112
-                            continue
-
-                    except (ImportError, AttributeError):
-                        continue
-
-            if flow_instances:
-                break
-
-    except Exception:  # noqa: S110
-        pass
-
-    return flow_instances
-
-
 def is_valid_tool(obj: Any) -> bool:
    from crewai.tools.base_tool import Tool

--- a/lib/crewai/src/crewai/context.py
+++ b/lib/crewai/src/crewai/context.py
@@ -1,6 +1,7 @@
 from collections.abc import Generator
-from contextlib import AbstractContextManager, contextmanager, nullcontext
+from contextlib import contextmanager
 import contextvars
+import os
 from typing import Any


@@ -9,50 +10,40 @@ _platform_integration_token: contextvars.ContextVar[str | None] = (
 )


-def set_platform_integration_token(integration_token: str) -> contextvars.Token[str | None]:
+def set_platform_integration_token(integration_token: str) -> None:
    """Set the platform integration token in the current context.

    Args:
        integration_token: The integration token to set.
    """
-    return _platform_integration_token.set(integration_token)
-
-
-def reset_platform_integration_token(token: contextvars.Token[str | None]) -> None:
-    """Reset the platform integration token to its previous value."""
-    _platform_integration_token.reset(token)
+    _platform_integration_token.set(integration_token)


 def get_platform_integration_token() -> str | None:
-    """Get the platform integration token from the current context.
+    """Get the platform integration token from the current context or environment.
+
    Returns:
        The integration token if set, otherwise None.
    """
-    return _platform_integration_token.get()
+    token = _platform_integration_token.get()
+    if token is None:
+        token = os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN")
+    return token


-def platform_integration_context(integration_token: str | None) -> AbstractContextManager[None]:
+@contextmanager
+def platform_context(integration_token: str) -> Generator[None, Any, None]:
    """Context manager to temporarily set the platform integration token.

    Args:
      integration_token: The integration token to set within the context.
-                        If None or falsy, returns nullcontext (no-op).
-
-    Returns:
-        A context manager that either sets the token or does nothing.
    """
-    if not integration_token:
-        return nullcontext()
+    token = _platform_integration_token.set(integration_token)
+    try:
+        yield
+    finally:
+        _platform_integration_token.reset(token)

-    @contextmanager
-    def _token_context() -> Generator[None, Any, None]:
-        token = set_platform_integration_token(integration_token)
-        try:
-            yield
-        finally:
-            reset_platform_integration_token(token)
-
-    return _token_context()

 _current_task_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
    "current_task_id", default=None
--- a/lib/crewai/src/crewai/events/types/flow_events.py
+++ b/lib/crewai/src/crewai/events/types/flow_events.py
@@ -120,52 +120,6 @@ class FlowPlotEvent(FlowEvent):
    type: str = "flow_plot"


-class FlowInputRequestedEvent(FlowEvent):
-    """Event emitted when a flow requests user input via ``Flow.ask()``.
-
-    This event is emitted before the flow suspends waiting for user input,
-    allowing UI frameworks and observability tools to know when a flow
-    needs user interaction.
-
-    Attributes:
-        flow_name: Name of the flow requesting input.
-        method_name: Name of the flow method that called ``ask()``.
-        message: The question or prompt being shown to the user.
-        metadata: Optional metadata sent with the question (e.g., user ID,
-            channel, session context).
-    """
-
-    method_name: str
-    message: str
-    metadata: dict[str, Any] | None = None
-    type: str = "flow_input_requested"
-
-
-class FlowInputReceivedEvent(FlowEvent):
-    """Event emitted when user input is received after ``Flow.ask()``.
-
-    This event is emitted after the user provides input (or the request
-    times out), allowing UI frameworks and observability tools to track
-    input collection.
-
-    Attributes:
-        flow_name: Name of the flow that received input.
-        method_name: Name of the flow method that called ``ask()``.
-        message: The original question or prompt.
-        response: The user's response, or None if timed out / unavailable.
-        metadata: Optional metadata sent with the question.
-        response_metadata: Optional metadata from the provider about the
-            response (e.g., who responded, thread ID, timestamps).
-    """
-
-    method_name: str
-    message: str
-    response: str | None = None
-    metadata: dict[str, Any] | None = None
-    response_metadata: dict[str, Any] | None = None
-    type: str = "flow_input_received"
-
-
 class HumanFeedbackRequestedEvent(FlowEvent):
    """Event emitted when human feedback is requested.

--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -1,7 +1,6 @@
 from __future__ import annotations

 from collections.abc import Callable, Coroutine
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 import json
 import threading
@@ -669,12 +668,9 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
        if not self.state.pending_tool_calls:
            return "native_tool_completed"

-        pending_tool_calls = list(self.state.pending_tool_calls)
-        self.state.pending_tool_calls.clear()
-
        # Group all tool calls into a single assistant message
        tool_calls_to_report = []
-        for tool_call in pending_tool_calls:
+        for tool_call in self.state.pending_tool_calls:
            info = extract_tool_call_info(tool_call)
            if not info:
                continue
@@ -699,85 +695,201 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                "content": None,
                "tool_calls": tool_calls_to_report,
            }
-            if all(type(tc).__qualname__ == "Part" for tc in pending_tool_calls):
-                assistant_message["raw_tool_call_parts"] = list(pending_tool_calls)
+            if all(
+                type(tc).__qualname__ == "Part" for tc in self.state.pending_tool_calls
+            ):
+                assistant_message["raw_tool_call_parts"] = list(
+                    self.state.pending_tool_calls
+                )
            self.state.messages.append(assistant_message)

-        runnable_tool_calls = [
-            tool_call
-            for tool_call in pending_tool_calls
-            if extract_tool_call_info(tool_call) is not None
-        ]
-        should_parallelize = self._should_parallelize_native_tool_calls(
-            runnable_tool_calls
-        )
+        # Now execute each tool
+        while self.state.pending_tool_calls:
+            tool_call = self.state.pending_tool_calls.pop(0)
+            info = extract_tool_call_info(tool_call)
+            if not info:
+                continue

-        execution_results: list[dict[str, Any]] = []
-        if should_parallelize:
-            max_workers = min(8, len(runnable_tool_calls))
-            with ThreadPoolExecutor(max_workers=max_workers) as pool:
-                future_to_idx = {
-                    pool.submit(self._execute_single_native_tool_call, tool_call): idx
-                    for idx, tool_call in enumerate(runnable_tool_calls)
-                }
-                ordered_results: list[dict[str, Any] | None] = [None] * len(
-                    runnable_tool_calls
+            call_id, func_name, func_args = info
+
+            # Parse arguments
+            if isinstance(func_args, str):
+                try:
+                    args_dict = json.loads(func_args)
+                except json.JSONDecodeError:
+                    args_dict = {}
+            else:
+                args_dict = func_args
+
+            # Get agent_key for event tracking
+            agent_key = (
+                getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+            )
+
+            # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
+            original_tool = None
+            for tool in self.original_tools or []:
+                if sanitize_tool_name(tool.name) == func_name:
+                    original_tool = tool
+                    break
+
+            # Check if tool has reached max usage count
+            max_usage_reached = False
+            if (
+                original_tool
+                and original_tool.max_usage_count is not None
+                and original_tool.current_usage_count >= original_tool.max_usage_count
+            ):
+                max_usage_reached = True
+
+            # Check cache before executing
+            from_cache = False
+            input_str = json.dumps(args_dict) if args_dict else ""
+            if self.tools_handler and self.tools_handler.cache:
+                cached_result = self.tools_handler.cache.read(
+                    tool=func_name, input=input_str
                )
-                for future in as_completed(future_to_idx):
-                    idx = future_to_idx[future]
-                    ordered_results[idx] = future.result()
-                execution_results = [
-                    result for result in ordered_results if result is not None
-                ]
-        else:
-            # Execute sequentially so result_as_answer tools can short-circuit
-            # immediately without running remaining calls.
-            for tool_call in runnable_tool_calls:
-                execution_result = self._execute_single_native_tool_call(tool_call)
-                call_id = cast(str, execution_result["call_id"])
-                func_name = cast(str, execution_result["func_name"])
-                result = cast(str, execution_result["result"])
-                from_cache = cast(bool, execution_result["from_cache"])
-                original_tool = execution_result["original_tool"]
+                if cached_result is not None:
+                    result = (
+                        str(cached_result)
+                        if not isinstance(cached_result, str)
+                        else cached_result
+                    )
+                    from_cache = True

-                tool_message: LLMMessage = {
-                    "role": "tool",
-                    "tool_call_id": call_id,
-                    "name": func_name,
-                    "content": result,
-                }
-                self.state.messages.append(tool_message)
+            # Emit tool usage started event
+            started_at = datetime.now()
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageStartedEvent(
+                    tool_name=func_name,
+                    tool_args=args_dict,
+                    from_agent=self.agent,
+                    from_task=self.task,
+                    agent_key=agent_key,
+                ),
+            )
+            error_event_emitted = False

-                # Log the tool execution
-                if self.agent and self.agent.verbose:
-                    cache_info = " (from cache)" if from_cache else ""
+            track_delegation_if_needed(func_name, args_dict, self.task)
+
+            structured_tool: CrewStructuredTool | None = None
+            for structured in self.tools or []:
+                if sanitize_tool_name(structured.name) == func_name:
+                    structured_tool = structured
+                    break
+
+            hook_blocked = False
+            before_hook_context = ToolCallHookContext(
+                tool_name=func_name,
+                tool_input=args_dict,
+                tool=structured_tool,  # type: ignore[arg-type]
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+            )
+            before_hooks = get_before_tool_call_hooks()
+            try:
+                for hook in before_hooks:
+                    hook_result = hook(before_hook_context)
+                    if hook_result is False:
+                        hook_blocked = True
+                        break
+            except Exception as hook_error:
+                if self.agent.verbose:
                    self._printer.print(
-                        content=f"Tool {func_name} executed with result{cache_info}: {result[:200]}...",
-                        color="green",
+                        content=f"Error in before_tool_call hook: {hook_error}",
+                        color="red",
                    )

-                if (
-                    original_tool
-                    and hasattr(original_tool, "result_as_answer")
-                    and original_tool.result_as_answer
-                ):
-                    self.state.current_answer = AgentFinish(
-                        thought="Tool result is the final answer",
+            if hook_blocked:
+                result = f"Tool execution blocked by hook. Tool: {func_name}"
+            elif not from_cache and not max_usage_reached:
+                result = "Tool not found"
+                if func_name in self._available_functions:
+                    try:
+                        tool_func = self._available_functions[func_name]
+                        raw_result = tool_func(**args_dict)
+
+                        # Add to cache after successful execution (before string conversion)
+                        if self.tools_handler and self.tools_handler.cache:
+                            should_cache = True
+                            if original_tool:
+                                should_cache = original_tool.cache_function(
+                                    args_dict, raw_result
+                                )
+                            if should_cache:
+                                self.tools_handler.cache.add(
+                                    tool=func_name, input=input_str, output=raw_result
+                                )
+
+                        # Convert to string for message
+                        result = (
+                            str(raw_result)
+                            if not isinstance(raw_result, str)
+                            else raw_result
+                        )
+                    except Exception as e:
+                        result = f"Error executing tool: {e}"
+                        if self.task:
+                            self.task.increment_tools_errors()
+                        # Emit tool usage error event
+                        crewai_event_bus.emit(
+                            self,
+                            event=ToolUsageErrorEvent(
+                                tool_name=func_name,
+                                tool_args=args_dict,
+                                from_agent=self.agent,
+                                from_task=self.task,
+                                agent_key=agent_key,
+                                error=e,
+                            ),
+                        )
+                        error_event_emitted = True
+            elif max_usage_reached and original_tool:
+                # Return error message when max usage limit is reached
+                result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
+
+            # Execute after_tool_call hooks (even if blocked, to allow logging/monitoring)
+            after_hook_context = ToolCallHookContext(
+                tool_name=func_name,
+                tool_input=args_dict,
+                tool=structured_tool,  # type: ignore[arg-type]
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+                tool_result=result,
+            )
+            after_hooks = get_after_tool_call_hooks()
+            try:
+                for after_hook in after_hooks:
+                    after_hook_result = after_hook(after_hook_context)
+                    if after_hook_result is not None:
+                        result = after_hook_result
+                        after_hook_context.tool_result = result
+            except Exception as hook_error:
+                if self.agent.verbose:
+                    self._printer.print(
+                        content=f"Error in after_tool_call hook: {hook_error}",
+                        color="red",
+                    )
+
+            if not error_event_emitted:
+                crewai_event_bus.emit(
+                    self,
+                    event=ToolUsageFinishedEvent(
                        output=result,
-                        text=result,
-                    )
-                    self.state.is_finished = True
-                    return "tool_result_is_final"
-
-            return "native_tool_completed"
-
-        for execution_result in execution_results:
-            call_id = cast(str, execution_result["call_id"])
-            func_name = cast(str, execution_result["func_name"])
-            result = cast(str, execution_result["result"])
-            from_cache = cast(bool, execution_result["from_cache"])
-            original_tool = execution_result["original_tool"]
+                        tool_name=func_name,
+                        tool_args=args_dict,
+                        from_agent=self.agent,
+                        from_task=self.task,
+                        agent_key=agent_key,
+                        started_at=started_at,
+                        finished_at=datetime.now(),
+                    ),
+                )

+            # Append tool result message
            tool_message: LLMMessage = {
                "role": "tool",
                "tool_call_id": call_id,
@@ -810,224 +922,6 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):

        return "native_tool_completed"

-    def _should_parallelize_native_tool_calls(self, tool_calls: list[Any]) -> bool:
-        """Determine if native tool calls are safe to run in parallel."""
-        if len(tool_calls) <= 1:
-            return False
-
-        for tool_call in tool_calls:
-            info = extract_tool_call_info(tool_call)
-            if not info:
-                continue
-            _, func_name, _ = info
-
-            original_tool = None
-            for tool in self.original_tools or []:
-                if sanitize_tool_name(tool.name) == func_name:
-                    original_tool = tool
-                    break
-
-            if not original_tool:
-                continue
-
-            if getattr(original_tool, "result_as_answer", False):
-                return False
-            if getattr(original_tool, "max_usage_count", None) is not None:
-                return False
-
-        return True
-
-    def _execute_single_native_tool_call(self, tool_call: Any) -> dict[str, Any]:
-        """Execute a single native tool call and return metadata/result."""
-        info = extract_tool_call_info(tool_call)
-        if not info:
-            raise ValueError("Invalid native tool call format")
-
-        call_id, func_name, func_args = info
-
-        # Parse arguments
-        if isinstance(func_args, str):
-            try:
-                args_dict = json.loads(func_args)
-            except json.JSONDecodeError:
-                args_dict = {}
-        else:
-            args_dict = func_args
-
-        # Get agent_key for event tracking
-        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
-
-        # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
-        original_tool = None
-        for tool in self.original_tools or []:
-            if sanitize_tool_name(tool.name) == func_name:
-                original_tool = tool
-                break
-
-        # Check if tool has reached max usage count
-        max_usage_reached = False
-        if (
-            original_tool
-            and original_tool.max_usage_count is not None
-            and original_tool.current_usage_count >= original_tool.max_usage_count
-        ):
-            max_usage_reached = True
-
-        # Check cache before executing
-        from_cache = False
-        input_str = json.dumps(args_dict) if args_dict else ""
-        if self.tools_handler and self.tools_handler.cache:
-            cached_result = self.tools_handler.cache.read(
-                tool=func_name, input=input_str
-            )
-            if cached_result is not None:
-                result = (
-                    str(cached_result)
-                    if not isinstance(cached_result, str)
-                    else cached_result
-                )
-                from_cache = True
-
-        # Emit tool usage started event
-        started_at = datetime.now()
-        crewai_event_bus.emit(
-            self,
-            event=ToolUsageStartedEvent(
-                tool_name=func_name,
-                tool_args=args_dict,
-                from_agent=self.agent,
-                from_task=self.task,
-                agent_key=agent_key,
-            ),
-        )
-        error_event_emitted = False
-
-        track_delegation_if_needed(func_name, args_dict, self.task)
-
-        structured_tool: CrewStructuredTool | None = None
-        for structured in self.tools or []:
-            if sanitize_tool_name(structured.name) == func_name:
-                structured_tool = structured
-                break
-
-        hook_blocked = False
-        before_hook_context = ToolCallHookContext(
-            tool_name=func_name,
-            tool_input=args_dict,
-            tool=structured_tool,  # type: ignore[arg-type]
-            agent=self.agent,
-            task=self.task,
-            crew=self.crew,
-        )
-        before_hooks = get_before_tool_call_hooks()
-        try:
-            for hook in before_hooks:
-                hook_result = hook(before_hook_context)
-                if hook_result is False:
-                    hook_blocked = True
-                    break
-        except Exception as hook_error:
-            if self.agent.verbose:
-                self._printer.print(
-                    content=f"Error in before_tool_call hook: {hook_error}",
-                    color="red",
-                )
-
-        if hook_blocked:
-            result = f"Tool execution blocked by hook. Tool: {func_name}"
-        elif not from_cache and not max_usage_reached:
-            result = "Tool not found"
-            if func_name in self._available_functions:
-                try:
-                    tool_func = self._available_functions[func_name]
-                    raw_result = tool_func(**args_dict)
-
-                    # Add to cache after successful execution (before string conversion)
-                    if self.tools_handler and self.tools_handler.cache:
-                        should_cache = True
-                        if original_tool:
-                            should_cache = original_tool.cache_function(
-                                args_dict, raw_result
-                            )
-                        if should_cache:
-                            self.tools_handler.cache.add(
-                                tool=func_name, input=input_str, output=raw_result
-                            )
-
-                    # Convert to string for message
-                    result = (
-                        str(raw_result)
-                        if not isinstance(raw_result, str)
-                        else raw_result
-                    )
-                except Exception as e:
-                    result = f"Error executing tool: {e}"
-                    if self.task:
-                        self.task.increment_tools_errors()
-                    # Emit tool usage error event
-                    crewai_event_bus.emit(
-                        self,
-                        event=ToolUsageErrorEvent(
-                            tool_name=func_name,
-                            tool_args=args_dict,
-                            from_agent=self.agent,
-                            from_task=self.task,
-                            agent_key=agent_key,
-                            error=e,
-                        ),
-                    )
-                    error_event_emitted = True
-        elif max_usage_reached and original_tool:
-            # Return error message when max usage limit is reached
-            result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
-
-        # Execute after_tool_call hooks (even if blocked, to allow logging/monitoring)
-        after_hook_context = ToolCallHookContext(
-            tool_name=func_name,
-            tool_input=args_dict,
-            tool=structured_tool,  # type: ignore[arg-type]
-            agent=self.agent,
-            task=self.task,
-            crew=self.crew,
-            tool_result=result,
-        )
-        after_hooks = get_after_tool_call_hooks()
-        try:
-            for after_hook in after_hooks:
-                after_hook_result = after_hook(after_hook_context)
-                if after_hook_result is not None:
-                    result = after_hook_result
-                    after_hook_context.tool_result = result
-        except Exception as hook_error:
-            if self.agent.verbose:
-                self._printer.print(
-                    content=f"Error in after_tool_call hook: {hook_error}",
-                    color="red",
-                )
-
-        if not error_event_emitted:
-            crewai_event_bus.emit(
-                self,
-                event=ToolUsageFinishedEvent(
-                    output=result,
-                    tool_name=func_name,
-                    tool_args=args_dict,
-                    from_agent=self.agent,
-                    from_task=self.task,
-                    agent_key=agent_key,
-                    started_at=started_at,
-                    finished_at=datetime.now(),
-                ),
-            )
-
-        return {
-            "call_id": call_id,
-            "func_name": func_name,
-            "result": result,
-            "from_cache": from_cache,
-            "original_tool": original_tool,
-        }
-
    def _extract_tool_name(self, tool_call: Any) -> str:
        """Extract tool name from various tool call formats."""
        if hasattr(tool_call, "function"):
--- a/lib/crewai/src/crewai/flow/init.py
+++ b/lib/crewai/src/crewai/flow/init.py
@@ -7,7 +7,6 @@ from crewai.flow.async_feedback import (
 from crewai.flow.flow import Flow, and_, listen, or_, router, start
 from crewai.flow.flow_config import flow_config
 from crewai.flow.human_feedback import HumanFeedbackResult, human_feedback
-from crewai.flow.input_provider import InputProvider, InputResponse
 from crewai.flow.persistence import persist
 from crewai.flow.visualization import (
    FlowStructure,
@@ -23,8 +22,6 @@ __all__ = [
    "HumanFeedbackPending",
    "HumanFeedbackProvider",
    "HumanFeedbackResult",
-    "InputProvider",
-    "InputResponse",
    "PendingFeedbackContext",
    "and_",
    "build_flow_structure",
--- a/lib/crewai/src/crewai/flow/async_feedback/providers.py
+++ b/lib/crewai/src/crewai/flow/async_feedback/providers.py
@@ -1,8 +1,7 @@
-"""Default provider implementations for human feedback and user input.
+"""Default provider implementations for human feedback.

 This module provides the ConsoleProvider, which is the default synchronous
-provider that collects both feedback (for ``@human_feedback``) and user input
-(for ``Flow.ask()``) via console.
+provider that collects feedback via console input.
 """

 from __future__ import annotations
@@ -17,23 +16,20 @@ if TYPE_CHECKING:


 class ConsoleProvider:
-    """Default synchronous console-based provider for feedback and input.
+    """Default synchronous console-based feedback provider.

    This provider blocks execution and waits for console input from the user.
-    It serves two purposes:
-
-    - **Feedback** (``request_feedback``): Used by ``@human_feedback`` to
-      display method output and collect review feedback.
-    - **Input** (``request_input``): Used by ``Flow.ask()`` to prompt the
-      user with a question and collect a response.
+    It displays the method output with formatting and prompts for feedback.

    This is the default provider used when no custom provider is specified
-    in the ``@human_feedback`` decorator or on the Flow's ``input_provider``.
+    in the @human_feedback decorator.

-    Example (feedback):
+    Example:
        ```python
        from crewai.flow.async_feedback import ConsoleProvider

+
+        # Explicitly use console provider
        @human_feedback(
            message="Review this:",
            provider=ConsoleProvider(),
@@ -41,20 +37,9 @@ class ConsoleProvider:
        def my_method(self):
            return "Content to review"
        ```
-
-    Example (input):
-        ```python
-        from crewai.flow import Flow, start
-
-        class MyFlow(Flow):
-            @start()
-            def gather_info(self):
-                topic = self.ask("What topic should we research?")
-                return topic
-        ```
    """

-    def __init__(self, verbose: bool = True) -> None:
+    def __init__(self, verbose: bool = True):
        """Initialize the console provider.

        Args:
@@ -139,55 +124,3 @@ class ConsoleProvider:
        finally:
            # Resume live updates
            formatter.resume_live_updates()
-
-    def request_input(
-        self,
-        message: str,
-        flow: Flow[Any],
-        metadata: dict[str, Any] | None = None,
-    ) -> str | None:
-        """Request user input via console (blocking).
-
-        Displays the prompt message with formatting and waits for the user
-        to type their response. Used by ``Flow.ask()``.
-
-        Unlike ``request_feedback``, this method does not display an
-        "OUTPUT FOR REVIEW" panel or emit feedback-specific events (those
-        are handled by ``ask()`` itself).
-
-        Args:
-            message: The question or prompt to display to the user.
-            flow: The Flow instance requesting input.
-            metadata: Optional metadata from the caller. Ignored by the
-                console provider (console has no concept of user routing).
-
-        Returns:
-            The user's input as a stripped string. Returns empty string
-            if user presses Enter without input. Never returns None
-            (console input is always available).
-        """
-        from crewai.events.event_listener import event_listener
-
-        # Pause live updates during human input
-        formatter = event_listener.formatter
-        formatter.pause_live_updates()
-
-        try:
-            console = formatter.console
-
-            if self.verbose:
-                console.print()
-                console.print(message, style="yellow")
-                console.print()
-
-                response = input(">>> \n").strip()
-            else:
-                response = input(f"{message} ").strip()
-
-            # Add line break after input so formatter output starts clean
-            console.print()
-
-            return response
-        finally:
-            # Resume live updates
-            formatter.resume_live_updates()
--- a/lib/crewai/src/crewai/flow/flow.py
+++ b/lib/crewai/src/crewai/flow/flow.py
@@ -10,7 +10,6 @@ import asyncio
 from collections.abc import (
    Callable,
    ItemsView,
-    Iterable,
    Iterator,
    KeysView,
    Sequence,
@@ -18,7 +17,6 @@ from collections.abc import (
 )
 from concurrent.futures import Future
 import copy
-import enum
 import inspect
 import logging
 import threading
@@ -29,10 +27,8 @@ from typing import (
    Generic,
    Literal,
    ParamSpec,
-    SupportsIndex,
    TypeVar,
    cast,
-    overload,
 )
 from uuid import uuid4

@@ -81,12 +77,7 @@ from crewai.flow.flow_wrappers import (
    StartMethod,
 )
 from crewai.flow.persistence.base import FlowPersistence
-from crewai.flow.types import (
-    FlowExecutionData,
-    FlowMethodName,
-    InputHistoryEntry,
-    PendingListenerKey,
-)
+from crewai.flow.types import FlowExecutionData, FlowMethodName, PendingListenerKey
 from crewai.flow.utils import (
    _extract_all_methods,
    _extract_all_methods_recursive,
@@ -435,7 +426,8 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
    """

    def __init__(self, lst: list[T], lock: threading.Lock) -> None:
-        super().__init__()  # empty builtin list; all access goes through self._list
+        # Do NOT call super().__init__() -- we don't want to copy data into
+        # the builtin list storage. All access goes through self._list.
        self._list = lst
        self._lock = lock

@@ -443,11 +435,11 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.append(item)

-    def extend(self, items: Iterable[T]) -> None:
+    def extend(self, items: list[T]) -> None:
        with self._lock:
            self._list.extend(items)

-    def insert(self, index: SupportsIndex, item: T) -> None:
+    def insert(self, index: int, item: T) -> None:
        with self._lock:
            self._list.insert(index, item)

@@ -455,7 +447,7 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.remove(item)

-    def pop(self, index: SupportsIndex = -1) -> T:
+    def pop(self, index: int = -1) -> T:
        with self._lock:
            return self._list.pop(index)

@@ -463,23 +455,15 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._list.clear()

-    @overload
-    def __setitem__(self, index: SupportsIndex, value: T) -> None: ...
-    @overload
-    def __setitem__(self, index: slice, value: Iterable[T]) -> None: ...
-    def __setitem__(self, index: Any, value: Any) -> None:
+    def __setitem__(self, index: int, value: T) -> None:
        with self._lock:
            self._list[index] = value

-    def __delitem__(self, index: SupportsIndex | slice) -> None:
+    def __delitem__(self, index: int) -> None:
        with self._lock:
            del self._list[index]

-    @overload
-    def __getitem__(self, index: SupportsIndex) -> T: ...
-    @overload
-    def __getitem__(self, index: slice) -> list[T]: ...
-    def __getitem__(self, index: Any) -> Any:
+    def __getitem__(self, index: int) -> T:
        return self._list[index]

    def __len__(self) -> int:
@@ -497,7 +481,7 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
    def __bool__(self) -> bool:
        return bool(self._list)

-    def __eq__(self, other: object) -> bool:
+    def __eq__(self, other: object) -> bool:  # type: ignore[override]
        """Compare based on the underlying list contents."""
        if isinstance(other, LockedListProxy):
            # Avoid deadlocks by acquiring locks in a consistent order.
@@ -508,7 +492,7 @@ class LockedListProxy(list, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            return self._list == other

-    def __ne__(self, other: object) -> bool:
+    def __ne__(self, other: object) -> bool:  # type: ignore[override]
        return not self.__eq__(other)


@@ -521,7 +505,8 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    """

    def __init__(self, d: dict[str, T], lock: threading.Lock) -> None:
-        super().__init__()  # empty builtin dict; all access goes through self._dict
+        # Do NOT call super().__init__() -- we don't want to copy data into
+        # the builtin dict storage. All access goes through self._dict.
        self._dict = d
        self._lock = lock

@@ -533,11 +518,11 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            del self._dict[key]

-    def pop(self, key: str, *default: T) -> T:  # type: ignore[override]
+    def pop(self, key: str, *default: T) -> T:
        with self._lock:
            return self._dict.pop(key, *default)

-    def update(self, other: dict[str, T]) -> None:  # type: ignore[override]
+    def update(self, other: dict[str, T]) -> None:
        with self._lock:
            self._dict.update(other)

@@ -545,7 +530,7 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            self._dict.clear()

-    def setdefault(self, key: str, default: T) -> T:  # type: ignore[override]
+    def setdefault(self, key: str, default: T) -> T:
        with self._lock:
            return self._dict.setdefault(key, default)

@@ -561,16 +546,16 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    def __contains__(self, key: object) -> bool:
        return key in self._dict

-    def keys(self) -> KeysView[str]:  # type: ignore[override]
+    def keys(self) -> KeysView[str]:
        return self._dict.keys()

-    def values(self) -> ValuesView[T]:  # type: ignore[override]
+    def values(self) -> ValuesView[T]:
        return self._dict.values()

-    def items(self) -> ItemsView[str, T]:  # type: ignore[override]
+    def items(self) -> ItemsView[str, T]:
        return self._dict.items()

-    def get(self, key: str, default: T | None = None) -> T | None:  # type: ignore[override]
+    def get(self, key: str, default: T | None = None) -> T | None:
        return self._dict.get(key, default)

    def __repr__(self) -> str:
@@ -579,7 +564,7 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
    def __bool__(self) -> bool:
        return bool(self._dict)

-    def __eq__(self, other: object) -> bool:
+    def __eq__(self, other: object) -> bool:  # type: ignore[override]
        """Compare based on the underlying dict contents."""
        if isinstance(other, LockedDictProxy):
            # Avoid deadlocks by acquiring locks in a consistent order.
@@ -590,7 +575,7 @@ class LockedDictProxy(dict, Generic[T]):  # type: ignore[type-arg]
        with self._lock:
            return self._dict == other

-    def __ne__(self, other: object) -> bool:
+    def __ne__(self, other: object) -> bool:  # type: ignore[override]
        return not self.__eq__(other)


@@ -752,10 +737,7 @@ class Flow(Generic[T], metaclass=FlowMeta):
    name: str | None = None
    tracing: bool | None = None
    stream: bool = False
-    memory: Any = (
-        None  # Memory | MemoryScope | MemorySlice | None; auto-created if not set
-    )
-    input_provider: Any = None  # InputProvider | None; per-flow override for self.ask()
+    memory: Any = None  # Memory | MemoryScope | MemorySlice | None; auto-created if not set

    def __class_getitem__(cls: type[Flow[T]], item: type[T]) -> type[Flow[T]]:
        class _FlowGeneric(cls):  # type: ignore
@@ -802,9 +784,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
        self._pending_feedback_context: PendingFeedbackContext | None = None
        self.suppress_flow_events: bool = suppress_flow_events

-        # User input history (for self.ask())
-        self._input_history: list[InputHistoryEntry] = []
-
        # Initialize state with initial values
        self._state = self._create_initial_state()
        self.tracing = tracing
@@ -898,8 +877,7 @@ class Flow(Generic[T], metaclass=FlowMeta):
        """
        if self.memory is None:
            raise ValueError("No memory configured for this flow")
-        result: list[str] = self.memory.extract_memories(content)
-        return result
+        return self.memory.extract_memories(content)

    def _mark_or_listener_fired(self, listener_name: FlowMethodName) -> bool:
        """Mark an OR listener as fired atomically.
@@ -1370,10 +1348,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
            ValueError: If structured state model lacks 'id' field
            TypeError: If state is neither BaseModel nor dictionary
        """
-        init_state = self.initial_state
-
        # Handle case where initial_state is None but we have a type parameter
-        if init_state is None and hasattr(self, "_initial_state_t"):
+        if self.initial_state is None and hasattr(self, "_initial_state_t"):
            state_type = self._initial_state_t
            if isinstance(state_type, type):
                if issubclass(state_type, FlowState):
@@ -1397,12 +1373,12 @@ class Flow(Generic[T], metaclass=FlowMeta):
                    return cast(T, {"id": str(uuid4())})

        # Handle case where no initial state is provided
-        if init_state is None:
+        if self.initial_state is None:
            return cast(T, {"id": str(uuid4())})

        # Handle case where initial_state is a type (class)
-        if isinstance(init_state, type):
-            state_class = init_state
+        if isinstance(self.initial_state, type):
+            state_class: type[T] = self.initial_state
            if issubclass(state_class, FlowState):
                return state_class()
            if issubclass(state_class, BaseModel):
@@ -1413,19 +1389,19 @@ class Flow(Generic[T], metaclass=FlowMeta):
                if not getattr(model_instance, "id", None):
                    object.__setattr__(model_instance, "id", str(uuid4()))
                return model_instance
-            if init_state is dict:
+            if self.initial_state is dict:
                return cast(T, {"id": str(uuid4())})

        # Handle dictionary instance case
-        if isinstance(init_state, dict):
-            new_state = dict(init_state)  # Copy to avoid mutations
+        if isinstance(self.initial_state, dict):
+            new_state = dict(self.initial_state)  # Copy to avoid mutations
            if "id" not in new_state:
                new_state["id"] = str(uuid4())
            return cast(T, new_state)

        # Handle BaseModel instance case
-        if isinstance(init_state, BaseModel):
-            model = cast(BaseModel, init_state)
+        if isinstance(self.initial_state, BaseModel):
+            model = cast(BaseModel, self.initial_state)
            if not hasattr(model, "id"):
                raise ValueError("Flow state model must have an 'id' field")

@@ -1824,13 +1800,8 @@ class Flow(Generic[T], metaclass=FlowMeta):
                self._pending_and_listeners.clear()
                self._clear_or_listeners()
            else:
-                # Only enter resumption mode if there are completed methods to
-                # replay.  When _completed_methods is empty (e.g. a pure
-                # state-reload via kickoff(inputs={"id": ...})), the flow
-                # executes from scratch and the flag would incorrectly
-                # suppress cyclic re-execution on the second iteration.
-                if self._completed_methods:
-                    self._is_execution_resuming = True
+                # We're restoring from persistence, set the flag
+                self._is_execution_resuming = True

            if inputs:
                # Override the id in the state if it exists in inputs
@@ -2148,24 +2119,15 @@ class Flow(Generic[T], metaclass=FlowMeta):
                if future:
                    self._event_futures.append(future)

-            # Set method name in context so ask() can read it without
-            # stack inspection.  Must happen before copy_context() so the
-            # value propagates into the thread pool for sync methods.
-            from crewai.flow.flow_context import current_flow_method_name
+            if asyncio.iscoroutinefunction(method):
+                result = await method(*args, **kwargs)
+            else:
+                # Run sync methods in thread pool for isolation
+                # This allows Agent.kickoff() to work synchronously inside Flow methods
+                import contextvars

-            method_name_token = current_flow_method_name.set(method_name)
-            try:
-                if asyncio.iscoroutinefunction(method):
-                    result = await method(*args, **kwargs)
-                else:
-                    # Run sync methods in thread pool for isolation
-                    # This allows Agent.kickoff() to work synchronously inside Flow methods
-                    import contextvars
-
-                    ctx = contextvars.copy_context()
-                    result = await asyncio.to_thread(ctx.run, method, *args, **kwargs)
-            finally:
-                current_flow_method_name.reset(method_name_token)
+                ctx = contextvars.copy_context()
+                result = await asyncio.to_thread(ctx.run, method, *args, **kwargs)

            # Auto-await coroutines returned from sync methods (enables AgentExecutor pattern)
            if asyncio.iscoroutine(result):
@@ -2198,8 +2160,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
            from crewai.flow.async_feedback.types import HumanFeedbackPending

            if isinstance(e, HumanFeedbackPending):
-                e.context.method_name = method_name
-
                # Auto-save pending feedback (create default persistence if needed)
                if self._persistence is None:
                    from crewai.flow.persistence import SQLiteFlowPersistence
@@ -2299,23 +2259,14 @@ class Flow(Generic[T], metaclass=FlowMeta):
                    router_name, router_input, current_triggering_event_id
                )
                if router_result:  # Only add non-None results
-                    router_result_str = (
-                        router_result.value
-                        if isinstance(router_result, enum.Enum)
-                        else str(router_result)
-                    )
-                    router_results.append(FlowMethodName(router_result_str))
+                    router_results.append(FlowMethodName(str(router_result)))
                    # If this was a human_feedback router, map the outcome to the feedback
                    if self.last_human_feedback is not None:
-                        router_result_to_feedback[router_result_str] = (
+                        router_result_to_feedback[str(router_result)] = (
                            self.last_human_feedback
                        )
                current_trigger = (
-                    FlowMethodName(
-                        router_result.value
-                        if isinstance(router_result, enum.Enum)
-                        else str(router_result)
-                    )
+                    FlowMethodName(str(router_result))
                    if router_result is not None
                    else FlowMethodName("")  # Update for next iteration of router chain
                )
@@ -2631,206 +2582,6 @@ class Flow(Generic[T], metaclass=FlowMeta):
                logger.error(f"Error executing listener {listener_name}: {e}")
            raise

-    # ── User Input (self.ask) ────────────────────────────────────────
-
-    def _resolve_input_provider(self) -> Any:
-        """Resolve the input provider using the priority chain.
-
-        Resolution order:
-        1. ``self.input_provider`` (per-flow override)
-        2. ``flow_config.input_provider`` (global default)
-        3. ``ConsoleInputProvider()`` (built-in fallback)
-
-        Returns:
-            An object implementing the ``InputProvider`` protocol.
-        """
-        from crewai.flow.async_feedback.providers import ConsoleProvider
-        from crewai.flow.flow_config import flow_config
-
-        if self.input_provider is not None:
-            return self.input_provider
-        if flow_config.input_provider is not None:
-            return flow_config.input_provider
-        return ConsoleProvider()
-
-    def _checkpoint_state_for_ask(self) -> None:
-        """Auto-checkpoint flow state before waiting for user input.
-
-        If persistence is configured, saves the current state so that
-        ``self.state`` is recoverable even if the process crashes while
-        waiting for input.
-
-        This is best-effort: if persistence is not configured, this is a no-op.
-        """
-        if self._persistence is None:
-            return
-        try:
-            state_data = (
-                self._state
-                if isinstance(self._state, dict)
-                else self._state.model_dump()
-            )
-            self._persistence.save_state(
-                flow_uuid=self.flow_id,
-                method_name="_ask_checkpoint",
-                state_data=state_data,
-            )
-        except Exception:
-            logger.debug("Failed to checkpoint state before ask()", exc_info=True)
-
-    def ask(
-        self,
-        message: str,
-        timeout: float | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> str | None:
-        """Request input from the user during flow execution.
-
-        Blocks the current thread until the user provides input or the
-        timeout expires. Works in both sync and async flow methods (the
-        flow framework runs sync methods in a thread pool via
-        ``asyncio.to_thread``, so the event loop stays free).
-
-        Timeout ensures flows always terminate. When timeout expires,
-        ``None`` is returned, enabling the pattern::
-
-            while (msg := self.ask("You: ", timeout=300)) is not None:
-                process(msg)
-
-        Before waiting for input, the current ``self.state`` is automatically
-        checkpointed to persistence (if configured) for durability.
-
-        Args:
-            message: The question or prompt to display to the user.
-            timeout: Maximum seconds to wait for input. ``None`` means
-                wait indefinitely. When timeout expires, returns ``None``.
-                Note: timeout is best-effort for the provider call --
-                ``ask()`` returns ``None`` promptly, but the underlying
-                ``request_input()`` may continue running in a background
-                thread until it completes naturally. Network providers
-                should implement their own internal timeouts.
-            metadata: Optional metadata to send to the input provider,
-                such as user ID, channel, session context. The provider
-                can use this to route the question to the right recipient.
-
-        Returns:
-            The user's input as a string, or ``None`` on timeout, disconnect,
-            or provider error. Empty string ``""`` means the user pressed
-            Enter without typing (intentional empty input).
-
-        Example:
-            ```python
-            class MyFlow(Flow):
-                @start()
-                def gather_info(self):
-                    topic = self.ask(
-                        "What topic should we research?",
-                        metadata={"user_id": "u123", "channel": "#research"},
-                    )
-                    if topic is None:
-                        return "No input received"
-                    return topic
-            ```
-        """
-        from concurrent.futures import (
-            ThreadPoolExecutor,
-            TimeoutError as FuturesTimeoutError,
-        )
-        from datetime import datetime
-
-        from crewai.events.types.flow_events import (
-            FlowInputReceivedEvent,
-            FlowInputRequestedEvent,
-        )
-        from crewai.flow.flow_context import current_flow_method_name
-        from crewai.flow.input_provider import InputResponse
-
-        method_name = current_flow_method_name.get("unknown")
-
-        # Emit input requested event
-        crewai_event_bus.emit(
-            self,
-            FlowInputRequestedEvent(
-                type="flow_input_requested",
-                flow_name=self.name or self.__class__.__name__,
-                method_name=method_name,
-                message=message,
-                metadata=metadata,
-            ),
-        )
-
-        # Auto-checkpoint state before waiting
-        self._checkpoint_state_for_ask()
-
-        provider = self._resolve_input_provider()
-        raw: str | InputResponse | None = None
-
-        try:
-            if timeout is not None:
-                # Manual executor management to avoid shutdown(wait=True)
-                # deadlock when the provider call outlives the timeout.
-                executor = ThreadPoolExecutor(max_workers=1)
-                future = executor.submit(
-                    provider.request_input, message, self, metadata
-                )
-                try:
-                    raw = future.result(timeout=timeout)
-                except FuturesTimeoutError:
-                    future.cancel()
-                    raw = None
-                finally:
-                    # wait=False so we don't block if the provider is still
-                    # running (e.g. input() stuck waiting for user).
-                    # cancel_futures=True cleans up any queued-but-not-started tasks.
-                    executor.shutdown(wait=False, cancel_futures=True)
-            else:
-                raw = provider.request_input(message, self, metadata=metadata)
-        except KeyboardInterrupt:
-            raise
-        except Exception:
-            logger.debug("Input provider error in ask()", exc_info=True)
-            raw = None
-
-        # Normalize provider response: str, InputResponse, or None
-        response: str | None = None
-        response_metadata: dict[str, Any] | None = None
-
-        if isinstance(raw, InputResponse):
-            response = raw.text
-            response_metadata = raw.metadata
-        elif isinstance(raw, str):
-            response = raw
-        else:
-            response = None
-
-        # Record in history
-        self._input_history.append(
-            {
-                "message": message,
-                "response": response,
-                "method_name": method_name,
-                "timestamp": datetime.now(),
-                "metadata": metadata,
-                "response_metadata": response_metadata,
-            }
-        )
-
-        # Emit input received event
-        crewai_event_bus.emit(
-            self,
-            FlowInputReceivedEvent(
-                type="flow_input_received",
-                flow_name=self.name or self.__class__.__name__,
-                method_name=method_name,
-                message=message,
-                response=response,
-                metadata=metadata,
-                response_metadata=response_metadata,
-            ),
-        )
-
-        return response
-
    def _request_human_feedback(
        self,
        message: str,
--- a/lib/crewai/src/crewai/flow/flow_config.py
+++ b/lib/crewai/src/crewai/flow/flow_config.py
@@ -11,7 +11,6 @@ from typing import TYPE_CHECKING, Any

 if TYPE_CHECKING:
    from crewai.flow.async_feedback.types import HumanFeedbackProvider
-    from crewai.flow.input_provider import InputProvider


 class FlowConfig:
@@ -21,15 +20,10 @@ class FlowConfig:
        hitl_provider: The human-in-the-loop feedback provider.
                       Defaults to None (uses console input).
                       Can be overridden by deployments at startup.
-        input_provider: The input provider used by ``Flow.ask()``.
-                        Defaults to None (uses ``ConsoleProvider``).
-                        Can be overridden by
-                        deployments at startup.
    """

    def __init__(self) -> None:
        self._hitl_provider: HumanFeedbackProvider | None = None
-        self._input_provider: InputProvider | None = None

    @property
    def hitl_provider(self) -> Any:
@@ -41,32 +35,6 @@ class FlowConfig:
        """Set the HITL provider."""
        self._hitl_provider = provider

-    @property
-    def input_provider(self) -> Any:
-        """Get the configured input provider for ``Flow.ask()``.
-
-        Returns:
-            The configured InputProvider instance, or None if not set
-            (in which case ``ConsoleInputProvider`` is used as default).
-        """
-        return self._input_provider
-
-    @input_provider.setter
-    def input_provider(self, provider: Any) -> None:
-        """Set the input provider for ``Flow.ask()``.
-
-        Args:
-            provider: An object implementing the ``InputProvider`` protocol.
-
-        Example:
-            ```python
-            from crewai.flow import flow_config
-
-            flow_config.input_provider = WebSocketInputProvider(...)
-            ```
-        """
-        self._input_provider = provider
-

 # Singleton instance
 flow_config = FlowConfig()
--- a/lib/crewai/src/crewai/flow/flow_context.py
+++ b/lib/crewai/src/crewai/flow/flow_context.py
@@ -14,7 +14,3 @@ current_flow_request_id: contextvars.ContextVar[str | None] = contextvars.Contex
 current_flow_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
    "flow_id", default=None
 )
-
-current_flow_method_name: contextvars.ContextVar[str] = contextvars.ContextVar(
-    "flow_method_name", default="unknown"
-)
--- a/lib/crewai/src/crewai/flow/input_provider.py
+++ b/lib/crewai/src/crewai/flow/input_provider.py
@@ -1,151 +0,0 @@
-"""Input provider protocol for Flow.ask().
-
-This module provides the InputProvider protocol and InputResponse dataclass
-used by Flow.ask() to request input from users during flow execution.
-
-The default implementation is ``ConsoleProvider`` (from
-``crewai.flow.async_feedback.providers``), which serves both feedback
-and input collection via console.
-
-Example (default console input):
-    ```python
-    from crewai.flow import Flow, start
-
-
-    class MyFlow(Flow):
-        @start()
-        def gather_info(self):
-            topic = self.ask("What topic should we research?")
-            return topic
-    ```
-
-Example (custom provider with metadata):
-    ```python
-    from crewai.flow import Flow, start
-    from crewai.flow.input_provider import InputProvider, InputResponse
-
-
-    class SlackProvider:
-        def request_input(self, message, flow, metadata=None):
-            channel = metadata.get("channel", "#general") if metadata else "#general"
-            thread = self.post_question(channel, message)
-            reply = self.wait_for_reply(thread)
-            return InputResponse(
-                text=reply.text,
-                metadata={"responded_by": reply.user_id, "thread_id": thread.id},
-            )
-
-
-    class MyFlow(Flow):
-        input_provider = SlackProvider()
-
-        @start()
-        def gather_info(self):
-            topic = self.ask("What topic?", metadata={"channel": "#research"})
-            return topic
-    ```
-"""
-
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
-
-
-if TYPE_CHECKING:
-    from crewai.flow.flow import Flow
-
-
-@dataclass
-class InputResponse:
-    """Response from an InputProvider, optionally carrying metadata.
-
-    Simple providers can just return a string from ``request_input()``.
-    Providers that need to send metadata back (e.g., who responded,
-    thread ID, external timestamps) return an ``InputResponse`` instead.
-
-    ``ask()`` normalizes both cases -- callers always get ``str | None``.
-    The response metadata is stored in ``_input_history`` and emitted
-    in ``FlowInputReceivedEvent``.
-
-    Attributes:
-        text: The user's input text, or None if unavailable.
-        metadata: Optional metadata from the provider about the response
-            (e.g., who responded, thread ID, timestamps).
-
-    Example:
-        ```python
-        class MyProvider:
-            def request_input(self, message, flow, metadata=None):
-                response = get_response_from_external_system(message)
-                return InputResponse(
-                    text=response.text,
-                    metadata={"responded_by": response.user_id},
-                )
-        ```
-    """
-
-    text: str | None
-    metadata: dict[str, Any] | None = field(default=None)
-
-
-@runtime_checkable
-class InputProvider(Protocol):
-    """Protocol for user input collection strategies.
-
-    Implement this protocol to create custom input providers that integrate
-    with external systems like websockets, web UIs, Slack, or custom APIs.
-
-    The default provider is ``ConsoleProvider``, which blocks waiting for
-    console input via Python's built-in ``input()`` function.
-
-    Providers are always synchronous. The flow framework runs sync methods
-    in a thread pool (via ``asyncio.to_thread``), so ``ask()`` never blocks
-    the event loop even inside async flow methods.
-
-    Providers can return either:
-    - ``str | None`` for simple cases (no response metadata)
-    - ``InputResponse`` when they need to send metadata back with the answer
-
-    Example (simple):
-        ```python
-        class SimpleProvider:
-            def request_input(self, message: str, flow: Flow) -> str | None:
-                return input(message)
-        ```
-
-    Example (with metadata):
-        ```python
-        class SlackProvider:
-            def request_input(self, message, flow, metadata=None):
-                channel = metadata.get("channel") if metadata else "#general"
-                reply = self.post_and_wait(channel, message)
-                return InputResponse(
-                    text=reply.text,
-                    metadata={"responded_by": reply.user_id},
-                )
-        ```
-    """
-
-    def request_input(
-        self,
-        message: str,
-        flow: Flow[Any],
-        metadata: dict[str, Any] | None = None,
-    ) -> str | InputResponse | None:
-        """Request input from the user.
-
-        Args:
-            message: The question or prompt to display to the user.
-            flow: The Flow instance requesting input. Can be used to
-                access flow state, name, or other context.
-            metadata: Optional metadata from the caller, such as user ID,
-                channel, session context, etc. Providers can use this to
-                route the question to the right recipient.
-
-        Returns:
-            The user's input as a string, an ``InputResponse`` with text
-            and optional response metadata, or None if input is unavailable
-            (e.g., user cancelled, connection dropped).
-        """
-        ...
--- a/lib/crewai/src/crewai/flow/types.py
+++ b/lib/crewai/src/crewai/flow/types.py
@@ -4,7 +4,6 @@ This module contains TypedDict definitions and type aliases used throughout
 the Flow system.
 """

-from datetime import datetime
 from typing import (
    Annotated,
    Any,
@@ -102,30 +101,6 @@ class FlowData(TypedDict):
    flow_methods_attributes: list[FlowMethodData]


-class InputHistoryEntry(TypedDict):
-    """A single entry in the flow's input history from ``self.ask()``.
-
-    Each call to ``Flow.ask()`` appends one entry recording the question,
-    the user's response, which method asked, and any metadata exchanged
-    between the caller and the input provider.
-
-    Attributes:
-        message: The question or prompt that was displayed to the user.
-        response: The user's response, or None on timeout/error.
-        method_name: The flow method that called ``ask()``.
-        timestamp: When the input was received.
-        metadata: Metadata sent with the question (caller to provider).
-        response_metadata: Metadata received with the answer (provider to caller).
-    """
-
-    message: str
-    response: str | None
-    method_name: str
-    timestamp: datetime
-    metadata: dict[str, Any] | None
-    response_metadata: dict[str, Any] | None
-
-
 class FlowExecutionData(TypedDict):
    """Flow execution data.

--- a/lib/crewai/tests/agents/test_agent_executor.py
+++ b/lib/crewai/tests/agents/test_agent_executor.py
@@ -4,7 +4,6 @@ Tests the Flow-based agent executor implementation including state management,
 flow methods, routing logic, and error handling.
 """

-import time
 from unittest.mock import Mock, patch

 import pytest
@@ -463,176 +462,3 @@ class TestFlowInvoke:

        assert result == {"output": "Done"}
        assert len(executor.state.messages) >= 2
-
-
-class TestNativeToolExecution:
-    """Test native tool execution behavior."""
-
-    @pytest.fixture
-    def mock_dependencies(self):
-        llm = Mock()
-        llm.supports_stop_words.return_value = True
-
-        task = Mock()
-        task.name = "Test Task"
-        task.description = "Test"
-        task.human_input = False
-        task.response_model = None
-
-        crew = Mock()
-        crew._memory = None
-        crew.verbose = False
-        crew._train = False
-
-        agent = Mock()
-        agent.id = "test-agent-id"
-        agent.role = "Test Agent"
-        agent.verbose = False
-        agent.key = "test-key"
-
-        prompt = {"prompt": "Test {input} {tool_names} {tools}"}
-
-        tools_handler = Mock()
-        tools_handler.cache = None
-
-        return {
-            "llm": llm,
-            "task": task,
-            "crew": crew,
-            "agent": agent,
-            "prompt": prompt,
-            "max_iter": 10,
-            "tools": [],
-            "tools_names": "",
-            "stop_words": [],
-            "tools_description": "",
-            "tools_handler": tools_handler,
-        }
-
-    def test_execute_native_tool_runs_parallel_for_multiple_calls(
-        self, mock_dependencies
-    ):
-        executor = AgentExecutor(**mock_dependencies)
-
-        def slow_one() -> str:
-            time.sleep(0.2)
-            return "one"
-
-        def slow_two() -> str:
-            time.sleep(0.2)
-            return "two"
-
-        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
-        executor.state.pending_tool_calls = [
-            {
-                "id": "call_1",
-                "function": {"name": "slow_one", "arguments": "{}"},
-            },
-            {
-                "id": "call_2",
-                "function": {"name": "slow_two", "arguments": "{}"},
-            },
-        ]
-
-        started = time.perf_counter()
-        result = executor.execute_native_tool()
-        elapsed = time.perf_counter() - started
-
-        assert result == "native_tool_completed"
-        assert elapsed < 0.5
-        tool_messages = [m for m in executor.state.messages if m.get("role") == "tool"]
-        assert len(tool_messages) == 2
-        assert tool_messages[0]["tool_call_id"] == "call_1"
-        assert tool_messages[1]["tool_call_id"] == "call_2"
-
-    def test_execute_native_tool_falls_back_to_sequential_for_result_as_answer(
-        self, mock_dependencies
-    ):
-        executor = AgentExecutor(**mock_dependencies)
-
-        def slow_one() -> str:
-            time.sleep(0.2)
-            return "one"
-
-        def slow_two() -> str:
-            time.sleep(0.2)
-            return "two"
-
-        result_tool = Mock()
-        result_tool.name = "slow_one"
-        result_tool.result_as_answer = True
-        result_tool.max_usage_count = None
-        result_tool.current_usage_count = 0
-
-        executor.original_tools = [result_tool]
-        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
-        executor.state.pending_tool_calls = [
-            {
-                "id": "call_1",
-                "function": {"name": "slow_one", "arguments": "{}"},
-            },
-            {
-                "id": "call_2",
-                "function": {"name": "slow_two", "arguments": "{}"},
-            },
-        ]
-
-        started = time.perf_counter()
-        result = executor.execute_native_tool()
-        elapsed = time.perf_counter() - started
-
-        assert result == "tool_result_is_final"
-        assert elapsed >= 0.2
-        assert elapsed < 0.8
-        assert isinstance(executor.state.current_answer, AgentFinish)
-        assert executor.state.current_answer.output == "one"
-
-    def test_execute_native_tool_result_as_answer_short_circuits_remaining_calls(
-        self, mock_dependencies
-    ):
-        executor = AgentExecutor(**mock_dependencies)
-        call_counts = {"slow_one": 0, "slow_two": 0}
-
-        def slow_one() -> str:
-            call_counts["slow_one"] += 1
-            time.sleep(0.2)
-            return "one"
-
-        def slow_two() -> str:
-            call_counts["slow_two"] += 1
-            time.sleep(0.2)
-            return "two"
-
-        result_tool = Mock()
-        result_tool.name = "slow_one"
-        result_tool.result_as_answer = True
-        result_tool.max_usage_count = None
-        result_tool.current_usage_count = 0
-
-        executor.original_tools = [result_tool]
-        executor._available_functions = {"slow_one": slow_one, "slow_two": slow_two}
-        executor.state.pending_tool_calls = [
-            {
-                "id": "call_1",
-                "function": {"name": "slow_one", "arguments": "{}"},
-            },
-            {
-                "id": "call_2",
-                "function": {"name": "slow_two", "arguments": "{}"},
-            },
-        ]
-
-        started = time.perf_counter()
-        result = executor.execute_native_tool()
-        elapsed = time.perf_counter() - started
-
-        assert result == "tool_result_is_final"
-        assert isinstance(executor.state.current_answer, AgentFinish)
-        assert executor.state.current_answer.output == "one"
-        assert call_counts["slow_one"] == 1
-        assert call_counts["slow_two"] == 0
-        assert elapsed < 0.5
-
-        tool_messages = [m for m in executor.state.messages if m.get("role") == "tool"]
-        assert len(tool_messages) == 1
-        assert tool_messages[0]["tool_call_id"] == "call_1"
--- a/lib/crewai/tests/agents/test_native_tool_calling.py
+++ b/lib/crewai/tests/agents/test_native_tool_calling.py
@@ -6,20 +6,14 @@ when the LLM supports it, across multiple providers.

 from __future__ import annotations

-from collections.abc import Generator
 import os
-import threading
-import time
-from collections import Counter
+from typing import Type
 from unittest.mock import patch

 import pytest
 from pydantic import BaseModel, Field

 from crewai import Agent, Crew, Task
-from crewai.events import crewai_event_bus
-from crewai.hooks import register_after_tool_call_hook, register_before_tool_call_hook
-from crewai.hooks.tool_hooks import ToolCallHookContext
 from crewai.llm import LLM
 from crewai.tools.base_tool import BaseTool

@@ -71,73 +65,6 @@ class FailingTool(BaseTool):
    def _run(self) -> str:
        raise Exception("This tool always fails")

-
-class LocalSearchInput(BaseModel):
-    query: str = Field(description="Search query")
-
-
-class ParallelProbe:
-    """Thread-safe in-memory recorder for tool execution windows."""
-
-    _lock = threading.Lock()
-    _windows: list[tuple[str, float, float]] = []
-
-    @classmethod
-    def reset(cls) -> None:
-        with cls._lock:
-            cls._windows = []
-
-    @classmethod
-    def record(cls, tool_name: str, start: float, end: float) -> None:
-        with cls._lock:
-            cls._windows.append((tool_name, start, end))
-
-    @classmethod
-    def windows(cls) -> list[tuple[str, float, float]]:
-        with cls._lock:
-            return list(cls._windows)
-
-
-def _parallel_prompt() -> str:
-    return (
-        "This is a tool-calling compliance test. "
-        "In your next assistant turn, emit exactly 3 tool calls in the same response (parallel tool calls), in this order: "
-        "1) parallel_local_search_one(query='latest OpenAI model release notes'), "
-        "2) parallel_local_search_two(query='latest Anthropic model release notes'), "
-        "3) parallel_local_search_three(query='latest Gemini model release notes'). "
-        "Do not call any other tools and do not answer before those 3 tool calls are emitted. "
-        "After the tool results return, provide a one paragraph summary."
-    )
-
-
-def _max_concurrency(windows: list[tuple[str, float, float]]) -> int:
-    points: list[tuple[float, int]] = []
-    for _, start, end in windows:
-        points.append((start, 1))
-        points.append((end, -1))
-    points.sort(key=lambda p: (p[0], p[1]))
-
-    current = 0
-    maximum = 0
-    for _, delta in points:
-        current += delta
-        if current > maximum:
-            maximum = current
-    return maximum
-
-
-def _assert_tools_overlapped() -> None:
-    windows = ParallelProbe.windows()
-    local_windows = [
-        w
-        for w in windows
-        if w[0].startswith("parallel_local_search_")
-    ]
-
-    assert len(local_windows) >= 3, f"Expected at least 3 local tool calls, got {len(local_windows)}"
-    assert _max_concurrency(local_windows) >= 2, "Expected overlapping local tool executions"
-
-
@pytest.fixture
 def calculator_tool() -> CalculatorTool:
    """Create a calculator tool for testing."""
@@ -156,65 +83,6 @@ def failing_tool() -> BaseTool:

    )

-
-@pytest.fixture
-def parallel_tools() -> list[BaseTool]:
-    """Create local tools used to verify native parallel execution deterministically."""
-
-    class ParallelLocalSearchOne(BaseTool):
-        name: str = "parallel_local_search_one"
-        description: str = "Local search tool #1 for concurrency testing."
-        args_schema: type[BaseModel] = LocalSearchInput
-
-        def _run(self, query: str) -> str:
-            start = time.perf_counter()
-            time.sleep(1.0)
-            end = time.perf_counter()
-            ParallelProbe.record(self.name, start, end)
-            return f"[one] {query}"
-
-    class ParallelLocalSearchTwo(BaseTool):
-        name: str = "parallel_local_search_two"
-        description: str = "Local search tool #2 for concurrency testing."
-        args_schema: type[BaseModel] = LocalSearchInput
-
-        def _run(self, query: str) -> str:
-            start = time.perf_counter()
-            time.sleep(1.0)
-            end = time.perf_counter()
-            ParallelProbe.record(self.name, start, end)
-            return f"[two] {query}"
-
-    class ParallelLocalSearchThree(BaseTool):
-        name: str = "parallel_local_search_three"
-        description: str = "Local search tool #3 for concurrency testing."
-        args_schema: type[BaseModel] = LocalSearchInput
-
-        def _run(self, query: str) -> str:
-            start = time.perf_counter()
-            time.sleep(1.0)
-            end = time.perf_counter()
-            ParallelProbe.record(self.name, start, end)
-            return f"[three] {query}"
-
-    return [
-        ParallelLocalSearchOne(),
-        ParallelLocalSearchTwo(),
-        ParallelLocalSearchThree(),
-    ]
-
-
-def _attach_parallel_probe_handler() -> None:
-    @crewai_event_bus.on(ToolUsageFinishedEvent)
-    def _capture_tool_window(_source, event: ToolUsageFinishedEvent):
-        if not event.tool_name.startswith("parallel_local_search_"):
-            return
-        ParallelProbe.record(
-            event.tool_name,
-            event.started_at.timestamp(),
-            event.finished_at.timestamp(),
-        )
-
 # =============================================================================
 # OpenAI Provider Tests
 # =============================================================================
@@ -255,7 +123,7 @@ class TestOpenAINativeToolCalling:
        self, calculator_tool: CalculatorTool
    ) -> None:
        """Test OpenAI agent kickoff with mocked LLM call."""
-        llm = LLM(model="gpt-5-nano")
+        llm = LLM(model="gpt-4o-mini")

        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
            agent = Agent(
@@ -279,174 +147,6 @@ class TestOpenAINativeToolCalling:
            assert mock_call.called
            assert result is not None

-    @pytest.mark.vcr()
-    @pytest.mark.timeout(180)
-    def test_openai_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="gpt-5-nano", temperature=1),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    @pytest.mark.timeout(180)
-    def test_openai_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="gpt-4o-mini"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    @pytest.mark.timeout(180)
-    def test_openai_parallel_native_tool_calling_tool_hook_parity_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        hook_calls: dict[str, list[dict[str, str]]] = {"before": [], "after": []}
-
-        def before_hook(context: ToolCallHookContext) -> bool | None:
-            if context.tool_name.startswith("parallel_local_search_"):
-                hook_calls["before"].append(
-                    {
-                        "tool_name": context.tool_name,
-                        "query": str(context.tool_input.get("query", "")),
-                    }
-                )
-            return None
-
-        def after_hook(context: ToolCallHookContext) -> str | None:
-            if context.tool_name.startswith("parallel_local_search_"):
-                hook_calls["after"].append(
-                    {
-                        "tool_name": context.tool_name,
-                        "query": str(context.tool_input.get("query", "")),
-                    }
-                )
-            return None
-
-        register_before_tool_call_hook(before_hook)
-        register_after_tool_call_hook(after_hook)
-
-        try:
-            agent = Agent(
-                role="Parallel Tool Agent",
-                goal="Use both tools exactly as instructed",
-                backstory="You follow tool instructions precisely.",
-                tools=parallel_tools,
-                llm=LLM(model="gpt-5-nano", temperature=1),
-                verbose=False,
-                max_iter=3,
-            )
-            task = Task(
-                description=_parallel_prompt(),
-                expected_output="A one sentence summary of both tool outputs",
-                agent=agent,
-            )
-            crew = Crew(agents=[agent], tasks=[task])
-            result = crew.kickoff()
-
-            assert result is not None
-            _assert_tools_overlapped()
-
-            before_names = [call["tool_name"] for call in hook_calls["before"]]
-            after_names = [call["tool_name"] for call in hook_calls["after"]]
-            assert len(before_names) >= 3, "Expected before hooks for all parallel calls"
-            assert Counter(before_names) == Counter(after_names)
-            assert all(call["query"] for call in hook_calls["before"])
-            assert all(call["query"] for call in hook_calls["after"])
-        finally:
-            from crewai.hooks import (
-                unregister_after_tool_call_hook,
-                unregister_before_tool_call_hook,
-            )
-
-            unregister_before_tool_call_hook(before_hook)
-            unregister_after_tool_call_hook(after_hook)
-
-    @pytest.mark.vcr()
-    @pytest.mark.timeout(180)
-    def test_openai_parallel_native_tool_calling_tool_hook_parity_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        hook_calls: dict[str, list[dict[str, str]]] = {"before": [], "after": []}
-
-        def before_hook(context: ToolCallHookContext) -> bool | None:
-            if context.tool_name.startswith("parallel_local_search_"):
-                hook_calls["before"].append(
-                    {
-                        "tool_name": context.tool_name,
-                        "query": str(context.tool_input.get("query", "")),
-                    }
-                )
-            return None
-
-        def after_hook(context: ToolCallHookContext) -> str | None:
-            if context.tool_name.startswith("parallel_local_search_"):
-                hook_calls["after"].append(
-                    {
-                        "tool_name": context.tool_name,
-                        "query": str(context.tool_input.get("query", "")),
-                    }
-                )
-            return None
-
-        register_before_tool_call_hook(before_hook)
-        register_after_tool_call_hook(after_hook)
-
-        try:
-            agent = Agent(
-                role="Parallel Tool Agent",
-                goal="Use both tools exactly as instructed",
-                backstory="You follow tool instructions precisely.",
-                tools=parallel_tools,
-                llm=LLM(model="gpt-5-nano", temperature=1),
-                verbose=False,
-                max_iter=3,
-            )
-            result = agent.kickoff(_parallel_prompt())
-
-            assert result is not None
-            _assert_tools_overlapped()
-
-            before_names = [call["tool_name"] for call in hook_calls["before"]]
-            after_names = [call["tool_name"] for call in hook_calls["after"]]
-            assert len(before_names) >= 3, "Expected before hooks for all parallel calls"
-            assert Counter(before_names) == Counter(after_names)
-            assert all(call["query"] for call in hook_calls["before"])
-            assert all(call["query"] for call in hook_calls["after"])
-        finally:
-            from crewai.hooks import (
-                unregister_after_tool_call_hook,
-                unregister_before_tool_call_hook,
-            )
-
-            unregister_before_tool_call_hook(before_hook)
-            unregister_after_tool_call_hook(after_hook)
-

 # =============================================================================
 # Anthropic Provider Tests
@@ -518,46 +218,6 @@ class TestAnthropicNativeToolCalling:
            assert mock_call.called
            assert result is not None

-    @pytest.mark.vcr()
-    def test_anthropic_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="anthropic/claude-sonnet-4-6"),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    def test_anthropic_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="anthropic/claude-sonnet-4-6"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-

 # =============================================================================
 # Google/Gemini Provider Tests
@@ -588,7 +248,7 @@ class TestGeminiNativeToolCalling:
            goal="Help users with mathematical calculations",
            backstory="You are a helpful math assistant.",
            tools=[calculator_tool],
-            llm=LLM(model="gemini/gemini-2.5-flash"),
+            llm=LLM(model="gemini/gemini-2.0-flash-exp"),
        )

        task = Task(
@@ -607,7 +267,7 @@ class TestGeminiNativeToolCalling:
        self, calculator_tool: CalculatorTool
    ) -> None:
        """Test Gemini agent kickoff with mocked LLM call."""
-        llm = LLM(model="gemini/gemini-2.5-flash")
+        llm = LLM(model="gemini/gemini-2.0-flash-001")

        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
            agent = Agent(
@@ -631,46 +291,6 @@ class TestGeminiNativeToolCalling:
            assert mock_call.called
            assert result is not None

-    @pytest.mark.vcr()
-    def test_gemini_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="gemini/gemini-2.5-flash"),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    def test_gemini_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="gemini/gemini-2.5-flash"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-

 # =============================================================================
 # Azure Provider Tests
@@ -705,7 +325,7 @@ class TestAzureNativeToolCalling:
            goal="Help users with mathematical calculations",
            backstory="You are a helpful math assistant.",
            tools=[calculator_tool],
-            llm=LLM(model="azure/gpt-5-nano"),
+            llm=LLM(model="azure/gpt-4o-mini"),
            verbose=False,
            max_iter=3,
        )
@@ -728,7 +348,7 @@ class TestAzureNativeToolCalling:
    ) -> None:
        """Test Azure agent kickoff with mocked LLM call."""
        llm = LLM(
-            model="azure/gpt-5-nano",
+            model="azure/gpt-4o-mini",
            api_key="test-key",
            base_url="https://test.openai.azure.com",
        )
@@ -755,46 +375,6 @@ class TestAzureNativeToolCalling:
            assert mock_call.called
            assert result is not None

-    @pytest.mark.vcr()
-    def test_azure_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="azure/gpt-5-nano"),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    def test_azure_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="azure/gpt-5-nano"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-

 # =============================================================================
 # Bedrock Provider Tests
@@ -805,30 +385,18 @@ class TestBedrockNativeToolCalling:
    """Tests for native tool calling with AWS Bedrock models."""

    @pytest.fixture(autouse=True)
-    def validate_bedrock_credentials_for_live_recording(self):
-        """Run Bedrock tests only when explicitly enabled."""
-        run_live_bedrock = os.getenv("RUN_BEDROCK_LIVE_TESTS", "false").lower() == "true"
-
-        if not run_live_bedrock:
-            pytest.skip(
-                "Skipping Bedrock tests by default. "
-                "Set RUN_BEDROCK_LIVE_TESTS=true with valid AWS credentials to enable."
-            )
-
-        access_key = os.getenv("AWS_ACCESS_KEY_ID", "")
-        secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "")
-        if (
-            not access_key
-            or not secret_key
-            or access_key.startswith(("fake-", "test-"))
-            or secret_key.startswith(("fake-", "test-"))
-        ):
-            pytest.skip(
-                "Skipping Bedrock tests: valid AWS credentials are required when "
-                "RUN_BEDROCK_LIVE_TESTS=true."
-            )
-
-        yield
+    def mock_aws_env(self):
+        """Mock AWS environment variables for tests."""
+        env_vars = {
+        "AWS_ACCESS_KEY_ID": "test-key",
+        "AWS_SECRET_ACCESS_KEY": "test-secret",
+        "AWS_REGION": "us-east-1",
+        }
+        if "AWS_ACCESS_KEY_ID" not in os.environ:
+            with patch.dict(os.environ, env_vars):
+                yield
+        else:
+            yield

    @pytest.mark.vcr()
    def test_bedrock_agent_kickoff_with_tools_mocked(
@@ -860,46 +428,6 @@ class TestBedrockNativeToolCalling:
        assert result.raw is not None
        assert "120" in str(result.raw)

-    @pytest.mark.vcr()
-    def test_bedrock_parallel_native_tool_calling_test_crew(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="bedrock/anthropic.claude-3-haiku-20240307-v1:0"),
-            verbose=False,
-            max_iter=3,
-        )
-        task = Task(
-            description=_parallel_prompt(),
-            expected_output="A one sentence summary of both tool outputs",
-            agent=agent,
-        )
-        crew = Crew(agents=[agent], tasks=[task])
-        result = crew.kickoff()
-        assert result is not None
-        _assert_tools_overlapped()
-
-    @pytest.mark.vcr()
-    def test_bedrock_parallel_native_tool_calling_test_agent_kickoff(
-        self, parallel_tools: list[BaseTool]
-    ) -> None:
-        agent = Agent(
-            role="Parallel Tool Agent",
-            goal="Use both tools exactly as instructed",
-            backstory="You follow tool instructions precisely.",
-            tools=parallel_tools,
-            llm=LLM(model="bedrock/anthropic.claude-3-haiku-20240307-v1:0"),
-            verbose=False,
-            max_iter=3,
-        )
-        result = agent.kickoff(_parallel_prompt())
-        assert result is not None
-        _assert_tools_overlapped()
-

 # =============================================================================
 # Cross-Provider Native Tool Calling Behavior Tests
@@ -912,7 +440,7 @@ class TestNativeToolCallingBehavior:
    def test_supports_function_calling_check(self) -> None:
        """Test that supports_function_calling() is properly checked."""
        # OpenAI should support function calling
-        openai_llm = LLM(model="gpt-5-nano")
+        openai_llm = LLM(model="gpt-4o-mini")
        assert hasattr(openai_llm, "supports_function_calling")
        assert openai_llm.supports_function_calling() is True

@@ -948,7 +476,7 @@ class TestNativeToolCallingTokenUsage:
            goal="Perform calculations efficiently",
            backstory="You calculate things.",
            tools=[calculator_tool],
-            llm=LLM(model="gpt-5-nano"),
+            llm=LLM(model="gpt-4o-mini"),
            verbose=False,
            max_iter=3,
        )
@@ -992,7 +520,7 @@ def test_native_tool_calling_error_handling(failing_tool: FailingTool):
        goal="Perform calculations efficiently",
        backstory="You calculate things.",
        tools=[failing_tool],
-        llm=LLM(model="gpt-5-nano"),
+        llm=LLM(model="gpt-4o-mini"),
        verbose=False,
        max_iter=3,
    )
@@ -1051,7 +579,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Call the counting tool multiple times",
            backstory="You are an agent that counts things.",
            tools=[tool],
-            llm=LLM(model="gpt-5-nano"),
+            llm=LLM(model="gpt-4o-mini"),
            verbose=False,
            max_iter=5,
        )
@@ -1079,7 +607,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Use the counting tool as many times as requested",
            backstory="You are an agent that counts things. You must try to use the tool for each value requested.",
            tools=[tool],
-            llm=LLM(model="gpt-5-nano"),
+            llm=LLM(model="gpt-4o-mini"),
            verbose=False,
            max_iter=5,
        )
@@ -1111,7 +639,7 @@ class TestMaxUsageCountWithNativeToolCalling:
            goal="Use the counting tool exactly as requested",
            backstory="You are an agent that counts things precisely.",
            tools=[tool],
-            llm=LLM(model="gpt-5-nano"),
+            llm=LLM(model="gpt-4o-mini"),
            verbose=False,
            max_iter=5,
        )
@@ -1126,6 +654,251 @@ class TestMaxUsageCountWithNativeToolCalling:
        result = crew.kickoff()

        assert result is not None
-        # Verify the requested calls occurred while keeping usage bounded.
-        assert tool.current_usage_count >= 2
-        assert tool.current_usage_count <= tool.max_usage_count
+        # Verify usage count was incremented for each successful call
+        assert tool.current_usage_count == 2
+
+
+# =============================================================================
+# Dict Tool Call Argument Extraction Tests (Issue #4495)
+# =============================================================================
+
+
+class KBQueryInput(BaseModel):
+    """Input schema for knowledge base query tool."""
+
+    query: str = Field(..., description="Natural language query for the knowledge base.")
+
+
+class KBRetrieverTool(BaseTool):
+    """A mock knowledge base retriever tool for testing."""
+
+    name: str = "kb_retrieve"
+    description: str = "Retrieve information from a knowledge base"
+    args_schema: Type[BaseModel] = KBQueryInput
+
+    def _run(self, query: str) -> str:
+        return f"KB result for: {query}"
+
+
+class TestDictToolCallArgExtraction:
+    """Tests for tool call argument extraction from dict-style tool calls.
+
+    Regression tests for issue #4495 where Bedrock-style dict tool calls
+    had their arguments silently dropped because the default value '{}' for
+    func_info.get('arguments', '{}') was truthy, preventing fallback to
+    tool_call.get('input').
+    """
+
+    def _create_executor_with_tool(self, tool: BaseTool) -> "CrewAgentExecutor":
+        """Helper to create a minimal executor for testing _handle_native_tool_calls."""
+        from crewai.agents.crew_agent_executor import CrewAgentExecutor
+        from crewai.agents.tools_handler import ToolsHandler
+        agent= Agent(
+            role="Test Agent",
+            goal="Test tool calling",
+            backstory="Testing agent",
+            tools=[tool],
+            llm=LLM(model="gpt-4o-mini"),
+            verbose=False,
+        )
+        task = Task(
+            description="Test task",
+            expected_output="Test output",
+            agent=agent,
+        )
+
+        executor = CrewAgentExecutor(
+            agent=agent,
+            task=task,
+            llm=agent.llm,
+            crew=None,
+            prompt={"system": "You are a test agent", "user": "Execute: {input}"},
+            max_iter=5,
+            tools=[],
+            tools_names="",
+            stop_words=[],
+            tools_description="",
+            tools_handler=ToolsHandler(),
+            original_tools=[tool],
+        )
+        executor.messages = []
+        return executor
+
+    def test_bedrock_dict_tool_call_passes_arguments(self) -> None:
+        """Test that Bedrock-style dict tool calls correctly extract arguments.
+
+        This is the core regression test for issue #4495. Previously, arguments
+        were lost because func_info.get('arguments', '{}') returned the truthy
+        default string '{}', preventing the fallback to tool_call.get('input').
+        """
+        tool = KBRetrieverTool()
+        executor = self._create_executor_with_tool(tool)
+
+        available_functions = {"kb_retrieve": tool.run}
+
+        bedrock_tool_calls = [
+            {
+                "toolUseId": "tooluse_abc123",
+                "name": "kb_retrieve",
+                "input": {"query": "What is the capital of France?"},
+            }
+        ]
+
+        result = executor._handle_native_tool_calls(
+            bedrock_tool_calls, available_functions
+        )
+
+        tool_message = executor.messages[-2]
+        assert tool_message["role"] == "tool"
+        assert "KB result for: What is the capital of France?" in tool_message["content"]
+        assert "Error" not in tool_message["content"]
+
+    def test_openai_dict_tool_call_passes_arguments(self) -> None:
+        """Test that OpenAI-style dict tool calls still work correctly."""
+        tool = KBRetrieverTool()
+        executor = self._create_executor_with_tool(tool)
+
+        available_functions = {"kb_retrieve": tool.run}
+
+        openai_tool_calls = [
+            {
+                "id": "call_abc123",
+                "function": {
+                    "name": "kb_retrieve",
+                    "arguments": '{"query": "What is AI?"}',
+                },
+            }
+        ]
+
+        result = executor._handle_native_tool_calls(
+            openai_tool_calls, available_functions
+        )
+
+        tool_message = executor.messages[-2]
+        assert tool_message["role"] == "tool"
+        assert "KB result for: What is AI?" in tool_message["content"]
+        assert "Error" not in tool_message["content"]
+
+    def test_bedrock_dict_with_empty_input(self) -> None:
+        """Test Bedrock-style dict tool call with empty input dict."""
+        tool = CalculatorTool()
+        executor = self._create_executor_with_tool(tool)
+
+        available_functions = {"calculator": tool.run}
+
+        bedrock_tool_calls = [
+            {
+                "toolUseId": "tooluse_abc123",
+                "name": "calculator",
+                "input": {},
+            }
+        ]
+
+        result = executor._handle_native_tool_calls(
+            bedrock_tool_calls, available_functions
+        )
+
+        tool_message = executor.messages[-2]
+        assert tool_message["role"] == "tool"
+
+    def test_bedrock_dict_tool_call_with_custom_base_tool(self) -> None:
+        """Test that a custom BaseTool wrapper receives arguments correctly via Bedrock format.
+
+        This reproduces the exact scenario from issue #4495 where a custom wrapper
+        around BedrockKBRetrieverTool fails with '_run() missing 1 required positional argument'.
+        """
+        class InnerResult:
+            def __init__(self, content: str):
+                self.content = content
+
+        class ParsedKBTool(BaseTool):
+            name: str = "kb.retrieve"
+            description: str = "Retrieve and parse from knowledge base"
+            args_schema: Type[BaseModel] = KBQueryInput
+
+            def _run(self, query: str) -> str:
+                return f"Parsed result for query: {query}"
+
+        tool = ParsedKBTool()
+        executor = self._create_executor_with_tool(tool)
+
+        available_functions = {"kb_retrieve": tool.run}
+
+        bedrock_tool_calls = [
+            {
+                "toolUseId": "tooluse_xyz789",
+                "name": "kb_retrieve",
+                "input": {"query": "Tell me about CrewAI"},
+            }
+        ]
+
+        result = executor._handle_native_tool_calls(
+            bedrock_tool_calls, available_functions
+        )
+
+        tool_message = executor.messages[-2]
+        assert tool_message["role"] == "tool"
+        assert "Parsed result for query: Tell me about CrewAI" in tool_message["content"]
+        assert "missing 1 required positional argument" not in tool_message["content"]
+
+    def test_dict_tool_call_without_function_or_input_keys(self) -> None:
+        """Test dict tool call with only function key (OpenAI dict format) works."""
+        tool = KBRetrieverTool()
+        executor = self._create_executor_with_tool(tool)
+
+        available_functions = {"kb_retrieve": tool.run}
+
+        dict_tool_calls = [
+            {
+                "id": "call_999",
+                "function": {
+                    "name": "kb_retrieve",
+                    "arguments": '{"query": "test query"}',
+                },
+            }
+        ]
+
+        result = executor._handle_native_tool_calls(
+            dict_tool_calls, available_functions
+        )
+
+        tool_message = executor.messages[-2]
+        assert tool_message["role"] == "tool"
+        assert "KB result for: test query" in tool_message["content"]
+
+    def test_bedrock_dict_tool_call_multiple_args(self) -> None:
+        """Test Bedrock-style dict tool call with multiple arguments."""
+
+        class MultiArgInput(BaseModel):
+            location: str = Field(description="Location to search")
+            radius: int = Field(description="Search radius in km")
+
+        class MultiArgTool(BaseTool):
+            name: str = "location_search"
+            description: str = "Search within a radius of a location"
+            args_schema: Type[BaseModel] = MultiArgInput
+
+            def _run(self, location: str, radius: int) -> str:
+                return f"Found results within {radius}km of {location}"
+
+        tool = MultiArgTool()
+        executor = self._create_executor_with_tool(tool)
+
+        available_functions = {"location_search": tool.run}
+
+        bedrock_tool_calls = [
+            {
+                "toolUseId": "tooluse_multi",
+                "name": "location_search",
+                "input": {"location": "Paris", "radius": 50},
+            }
+        ]
+
+        result = executor._handle_native_tool_calls(
+            bedrock_tool_calls, available_functions
+        )
+
+        tool_message = executor.messages[-2]
+        assert tool_message["role"] == "tool"
+        assert "Found results within 50km of Paris" in tool_message["content"]
+        assert "Error" not in tool_message["content"]
--- a/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,247 +0,0 @@
-interactions:
- request:
-    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
-      This is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '1639'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      x-api-key:
-      - X-API-KEY-XXX
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 0.73.0
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-      x-stainless-timeout:
-      - NOT_GIVEN
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: '{"model":"claude-sonnet-4-6","id":"msg_01XeN1XTXZgmPyLMMGjivabb","type":"message","role":"assistant","content":[{"type":"text","text":"I''ll
-        execute all 3 parallel searches simultaneously right now!"},{"type":"tool_use","id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","name":"parallel_local_search_one","input":{"query":"latest
-        OpenAI model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","name":"parallel_local_search_two","input":{"query":"latest
-        Anthropic model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","name":"parallel_local_search_three","input":{"query":"latest
-        Gemini model release notes"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":914,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":169,"service_tier":"standard","inference_geo":"global"}}'
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Security-Policy:
-      - CSP-FILTERED
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:54:43 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - ANTHROPIC-ORGANIZATION-ID-XXX
-      anthropic-ratelimit-input-tokens-limit:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-input-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-input-tokens-reset:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-output-tokens-limit:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-output-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-output-tokens-reset:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-requests-limit:
-      - '20000'
-      anthropic-ratelimit-requests-remaining:
-      - '19999'
-      anthropic-ratelimit-requests-reset:
-      - '2026-02-18T23:54:41Z'
-      anthropic-ratelimit-tokens-limit:
-      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-tokens-reset:
-      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - REQUEST-ID-XXX
-      strict-transport-security:
-      - STS-XXX
-      x-envoy-upstream-service-time:
-      - '2099'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
-      This is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."},{"role":"assistant","content":[{"type":"tool_use","id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","name":"parallel_local_search_one","input":{"query":"latest
-      OpenAI model release notes"}},{"type":"tool_use","id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","name":"parallel_local_search_two","input":{"query":"latest
-      Anthropic model release notes"}},{"type":"tool_use","id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","name":"parallel_local_search_three","input":{"query":"latest
-      Gemini model release notes"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01NwzvrxEz6tvT3A8ydvMtHu","content":"[one]
-      latest OpenAI model release notes"},{"type":"tool_result","tool_use_id":"toolu_01YCxzSB1suk9uPVC1uwfHz9","content":"[two]
-      latest Anthropic model release notes"},{"type":"tool_result","tool_use_id":"toolu_01Mauvxzv58eDY7pUt9HMKGy","content":"[three]
-      latest Gemini model release notes"}]}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '2517'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      x-api-key:
-      - X-API-KEY-XXX
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 0.73.0
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-      x-stainless-timeout:
-      - NOT_GIVEN
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: "{\"model\":\"claude-sonnet-4-6\",\"id\":\"msg_01PFXqwwdwwHWadPdtNU5tUZ\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"The
-        three parallel searches were executed successfully, each targeting the latest
-        release notes for the leading AI model families. The search results confirm
-        that queries were dispatched simultaneously to retrieve the most recent developments
-        from **OpenAI** (via tool one), **Anthropic** (via tool two), and **Google's
-        Gemini** (via tool three). While the local search tools returned placeholder
-        outputs in this test environment rather than detailed release notes, the structure
-        of the test validates that all three parallel tool calls were emitted correctly
-        and in the specified order \u2014 demonstrating proper concurrent tool-call
-        behavior with no dependencies between the three independent searches.\"}],\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"usage\":{\"input_tokens\":1197,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":131,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}}"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Security-Policy:
-      - CSP-FILTERED
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:54:49 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - ANTHROPIC-ORGANIZATION-ID-XXX
-      anthropic-ratelimit-input-tokens-limit:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-input-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-input-tokens-reset:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-output-tokens-limit:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-output-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-output-tokens-reset:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-requests-limit:
-      - '20000'
-      anthropic-ratelimit-requests-remaining:
-      - '19999'
-      anthropic-ratelimit-requests-reset:
-      - '2026-02-18T23:54:44Z'
-      anthropic-ratelimit-tokens-limit:
-      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-tokens-reset:
-      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - REQUEST-ID-XXX
-      strict-transport-security:
-      - STS-XXX
-      x-envoy-upstream-service-time:
-      - '4092'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAnthropicNativeToolCalling.test_anthropic_parallel_native_tool_calling_test_crew.yaml
@@ -1,254 +0,0 @@
-interactions:
- request:
-    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
-      This is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '1820'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      x-api-key:
-      - X-API-KEY-XXX
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 0.73.0
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-      x-stainless-timeout:
-      - NOT_GIVEN
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: '{"model":"claude-sonnet-4-6","id":"msg_01RJ4CphwpmkmsJFJjeCNvXz","type":"message","role":"assistant","content":[{"type":"text","text":"I''ll
-        execute all 3 parallel tool calls simultaneously right away!"},{"type":"tool_use","id":"toolu_01YWY3cSomRuv4USmq55Prk3","name":"parallel_local_search_one","input":{"query":"latest
-        OpenAI model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","name":"parallel_local_search_two","input":{"query":"latest
-        Anthropic model release notes"},"caller":{"type":"direct"}},{"type":"tool_use","id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","name":"parallel_local_search_three","input":{"query":"latest
-        Gemini model release notes"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":951,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":170,"service_tier":"standard","inference_geo":"global"}}'
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Security-Policy:
-      - CSP-FILTERED
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:54:51 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - ANTHROPIC-ORGANIZATION-ID-XXX
-      anthropic-ratelimit-input-tokens-limit:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-input-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-input-tokens-reset:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-output-tokens-limit:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-output-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-output-tokens-reset:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-requests-limit:
-      - '20000'
-      anthropic-ratelimit-requests-remaining:
-      - '19999'
-      anthropic-ratelimit-requests-reset:
-      - '2026-02-18T23:54:49Z'
-      anthropic-ratelimit-tokens-limit:
-      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-tokens-reset:
-      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - REQUEST-ID-XXX
-      strict-transport-security:
-      - STS-XXX
-      x-envoy-upstream-service-time:
-      - '1967'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"\nCurrent Task:
-      This is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."},{"role":"assistant","content":[{"type":"tool_use","id":"toolu_01YWY3cSomRuv4USmq55Prk3","name":"parallel_local_search_one","input":{"query":"latest
-      OpenAI model release notes"}},{"type":"tool_use","id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","name":"parallel_local_search_two","input":{"query":"latest
-      Anthropic model release notes"}},{"type":"tool_use","id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","name":"parallel_local_search_three","input":{"query":"latest
-      Gemini model release notes"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_01YWY3cSomRuv4USmq55Prk3","content":"[one]
-      latest OpenAI model release notes"},{"type":"tool_result","tool_use_id":"toolu_01Aaqj3LMXksE1nB3pscRhV5","content":"[two]
-      latest Anthropic model release notes"},{"type":"tool_result","tool_use_id":"toolu_01AcYxQvy8aYmAoUg9zx9qfq","content":"[three]
-      latest Gemini model release notes"}]},{"role":"user","content":"Analyze the
-      tool result. If requirements are met, provide the Final Answer. Otherwise, call
-      the next tool. Deliver only the answer without meta-commentary."}],"model":"claude-sonnet-4-6","stop_sequences":["\nObservation:"],"stream":false,"system":"You
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed","tools":[{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}},{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","input_schema":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '2882'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      x-api-key:
-      - X-API-KEY-XXX
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 0.73.0
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-      x-stainless-timeout:
-      - NOT_GIVEN
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: "{\"model\":\"claude-sonnet-4-6\",\"id\":\"msg_0143MHUne1az3Tt69EoLjyZd\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"Here
-        is the complete content returned from all three tool calls:\\n\\n- **parallel_local_search_one**
-        result: `[one] latest OpenAI model release notes`\\n- **parallel_local_search_two**
-        result: `[two] latest Anthropic model release notes`\\n- **parallel_local_search_three**
-        result: `[three] latest Gemini model release notes`\\n\\nAll three parallel
-        tool calls were executed successfully in the same response turn, returning
-        their respective outputs: the first tool searched for the latest OpenAI model
-        release notes, the second tool searched for the latest Anthropic model release
-        notes, and the third tool searched for the latest Gemini model release notes
-        \u2014 confirming that all search queries were dispatched concurrently and
-        their results retrieved as expected.\"}],\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"usage\":{\"input_tokens\":1272,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":172,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}}"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Security-Policy:
-      - CSP-FILTERED
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:54:55 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - ANTHROPIC-ORGANIZATION-ID-XXX
-      anthropic-ratelimit-input-tokens-limit:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-input-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-input-tokens-reset:
-      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-output-tokens-limit:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-output-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-output-tokens-reset:
-      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
-      anthropic-ratelimit-requests-limit:
-      - '20000'
-      anthropic-ratelimit-requests-remaining:
-      - '19999'
-      anthropic-ratelimit-requests-reset:
-      - '2026-02-18T23:54:52Z'
-      anthropic-ratelimit-tokens-limit:
-      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
-      anthropic-ratelimit-tokens-remaining:
-      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
-      anthropic-ratelimit-tokens-reset:
-      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - REQUEST-ID-XXX
-      strict-transport-security:
-      - STS-XXX
-      x-envoy-upstream-service-time:
-      - '3144'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_agent_with_native_tool_calling.yaml
@@ -5,19 +5,20 @@ interactions:
      calculations"}, {"role": "user", "content": "\nCurrent Task: Calculate what
      is 15 * 8\n\nThis is the expected criteria for your final answer: The result
      of the calculation\nyou MUST return the actual complete content as the final
-      answer, not a summary."}], "stream": false, "tool_choice": "auto", "tools":
-      [{"function": {"name": "calculator", "description": "Perform mathematical calculations.
-      Use this for any math operations.", "parameters": {"properties": {"expression":
-      {"description": "Mathematical expression to evaluate", "title": "Expression",
-      "type": "string"}}, "required": ["expression"], "type": "object", "additionalProperties":
-      false}}, "type": "function"}]}'
+      answer, not a summary.\n\nThis is VERY important to you, your job depends on
+      it!"}], "stream": false, "stop": ["\nObservation:"], "tool_choice": "auto",
+      "tools": [{"function": {"name": "calculator", "description": "Perform mathematical
+      calculations. Use this for any math operations.", "parameters": {"properties":
+      {"expression": {"description": "Mathematical expression to evaluate", "title":
+      "Expression", "type": "string"}}, "required": ["expression"], "type": "object"}},
+      "type": "function"}]}'
    headers:
      Accept:
      - application/json
      Connection:
      - keep-alive
      Content-Length:
-      - '828'
+      - '883'
      Content-Type:
      - application/json
      User-Agent:
@@ -31,20 +32,20 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-12-01-preview
  response:
    body:
      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"expression\":\"15
-        * 8\"}","name":"calculator"},"id":"call_Cow46pNllpDx0pxUgZFeqlh1","type":"function"}]}}],"created":1771459544,"id":"chatcmpl-DAlq4osCP9ABJ1HyXFBoYWylMg0bi","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":219,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":208,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":427}}
+        * 8\"}","name":"calculator"},"id":"call_cJWzKh5LdBpY3Sk8GATS3eRe","type":"function"}]}}],"created":1769122114,"id":"chatcmpl-D0xlavS0V3m00B9Fsjyv39xQWUGFV","model":"gpt-4o-mini-2024-07-18","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f97eff32c5","usage":{"completion_tokens":18,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":137,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":155}}

        '
    headers:
      Content-Length:
-      - '1049'
+      - '1058'
      Content-Type:
      - application/json
      Date:
-      - Thu, 19 Feb 2026 00:05:45 GMT
+      - Thu, 22 Jan 2026 22:48:34 GMT
      Strict-Transport-Security:
      - STS-XXX
      apim-request-id:
@@ -58,7 +59,7 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
      x-ms-deployment-name:
-      - gpt-5-nano
+      - gpt-4o-mini
      x-ms-rai-invoked:
      - 'true'
      x-ms-region:
@@ -82,25 +83,26 @@ interactions:
      calculations"}, {"role": "user", "content": "\nCurrent Task: Calculate what
      is 15 * 8\n\nThis is the expected criteria for your final answer: The result
      of the calculation\nyou MUST return the actual complete content as the final
-      answer, not a summary."}, {"role": "assistant", "content": "", "tool_calls":
-      [{"id": "call_Cow46pNllpDx0pxUgZFeqlh1", "type": "function", "function": {"name":
-      "calculator", "arguments": "{\"expression\":\"15 * 8\"}"}}]}, {"role": "tool",
-      "tool_call_id": "call_Cow46pNllpDx0pxUgZFeqlh1", "content": "The result of 15
-      * 8 is 120"}, {"role": "user", "content": "Analyze the tool result. If requirements
-      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
-      the answer without meta-commentary."}], "stream": false, "tool_choice": "auto",
-      "tools": [{"function": {"name": "calculator", "description": "Perform mathematical
-      calculations. Use this for any math operations.", "parameters": {"properties":
-      {"expression": {"description": "Mathematical expression to evaluate", "title":
-      "Expression", "type": "string"}}, "required": ["expression"], "type": "object",
-      "additionalProperties": false}}, "type": "function"}]}'
+      answer, not a summary.\n\nThis is VERY important to you, your job depends on
+      it!"}, {"role": "assistant", "content": "", "tool_calls": [{"id": "call_cJWzKh5LdBpY3Sk8GATS3eRe",
+      "type": "function", "function": {"name": "calculator", "arguments": "{\"expression\":\"15
+      * 8\"}"}}]}, {"role": "tool", "tool_call_id": "call_cJWzKh5LdBpY3Sk8GATS3eRe",
+      "content": "The result of 15 * 8 is 120"}, {"role": "user", "content": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "stream":
+      false, "stop": ["\nObservation:"], "tool_choice": "auto", "tools": [{"function":
+      {"name": "calculator", "description": "Perform mathematical calculations. Use
+      this for any math operations.", "parameters": {"properties": {"expression":
+      {"description": "Mathematical expression to evaluate", "title": "Expression",
+      "type": "string"}}, "required": ["expression"], "type": "object"}}, "type":
+      "function"}]}'
    headers:
      Accept:
      - application/json
      Connection:
      - keep-alive
      Content-Length:
-      - '1320'
+      - '1375'
      Content-Type:
      - application/json
      User-Agent:
@@ -114,19 +116,20 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
+    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-12-01-preview
  response:
    body:
-      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"120","refusal":null,"role":"assistant"}}],"created":1771459547,"id":"chatcmpl-DAlq7zJimnIMoXieNww8jY5f2pIPd","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":203,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":284,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":487}}
+      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
+        result of the calculation is 120.","refusal":null,"role":"assistant"}}],"created":1769122115,"id":"chatcmpl-D0xlbUNVA7RVkn0GsuBGoNhgQTtac","model":"gpt-4o-mini-2024-07-18","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f97eff32c5","usage":{"completion_tokens":11,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":207,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":218}}

        '
    headers:
      Content-Length:
-      - '1207'
+      - '1250'
      Content-Type:
      - application/json
      Date:
-      - Thu, 19 Feb 2026 00:05:49 GMT
+      - Thu, 22 Jan 2026 22:48:34 GMT
      Strict-Transport-Security:
      - STS-XXX
      apim-request-id:
@@ -140,7 +143,7 @@ interactions:
      x-ms-client-request-id:
      - X-MS-CLIENT-REQUEST-ID-XXX
      x-ms-deployment-name:
-      - gpt-5-nano
+      - gpt-4o-mini
      x-ms-rai-invoked:
      - 'true'
      x-ms-region:
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,198 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
-      a tool-calling compliance test. In your next assistant turn, emit exactly 3
-      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}], "stream": false, "tool_choice": "auto", "tools": [{"function":
-      {"name": "parallel_local_search_one", "description": "Local search tool #1 for
-      concurrency testing.", "parameters": {"properties": {"query": {"description":
-      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
-      "type": "object", "additionalProperties": false}}, "type": "function"}, {"function":
-      {"name": "parallel_local_search_two", "description": "Local search tool #2 for
-      concurrency testing.", "parameters": {"properties": {"query": {"description":
-      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
-      "type": "object", "additionalProperties": false}}, "type": "function"}, {"function":
-      {"name": "parallel_local_search_three", "description": "Local search tool #3
-      for concurrency testing.", "parameters": {"properties": {"query": {"description":
-      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
-      "type": "object", "additionalProperties": false}}, "type": "function"}]}'
-    headers:
-      Accept:
-      - application/json
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1763'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      api-key:
-      - X-API-KEY-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
-  response:
-    body:
-      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"query\":
-        \"latest OpenAI model release notes\"}","name":"parallel_local_search_one"},"id":"call_emQmocGydKuxvESfQopNngdm","type":"function"},{"function":{"arguments":"{\"query\":
-        \"latest Anthropic model release notes\"}","name":"parallel_local_search_two"},"id":"call_eNpK9WUYFCX2ZEUPhYCKvdMs","type":"function"},{"function":{"arguments":"{\"query\":
-        \"latest Gemini model release notes\"}","name":"parallel_local_search_three"},"id":"call_Wdtl6jFxGehSUMn5I1O4Mrdx","type":"function"}]}}],"created":1771459550,"id":"chatcmpl-DAlqAyJGnQKDkNCaTcjU2T8BeJaXM","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":666,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":576,"rejected_prediction_tokens":0},"prompt_tokens":343,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":1009}}
-
-        '
-    headers:
-      Content-Length:
-      - '1433'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:05:55 GMT
-      Strict-Transport-Security:
-      - STS-XXX
-      apim-request-id:
-      - APIM-REQUEST-ID-XXX
-      azureml-model-session:
-      - AZUREML-MODEL-SESSION-XXX
-      x-accel-buffering:
-      - 'no'
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-      x-ms-deployment-name:
-      - gpt-5-nano
-      x-ms-rai-invoked:
-      - 'true'
-      x-ms-region:
-      - X-MS-REGION-XXX
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
-      a tool-calling compliance test. In your next assistant turn, emit exactly 3
-      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}, {"role": "assistant", "content": "", "tool_calls": [{"id":
-      "call_emQmocGydKuxvESfQopNngdm", "type": "function", "function": {"name": "parallel_local_search_one",
-      "arguments": "{\"query\": \"latest OpenAI model release notes\"}"}}, {"id":
-      "call_eNpK9WUYFCX2ZEUPhYCKvdMs", "type": "function", "function": {"name": "parallel_local_search_two",
-      "arguments": "{\"query\": \"latest Anthropic model release notes\"}"}}, {"id":
-      "call_Wdtl6jFxGehSUMn5I1O4Mrdx", "type": "function", "function": {"name": "parallel_local_search_three",
-      "arguments": "{\"query\": \"latest Gemini model release notes\"}"}}]}, {"role":
-      "tool", "tool_call_id": "call_emQmocGydKuxvESfQopNngdm", "content": "[one] latest
-      OpenAI model release notes"}, {"role": "tool", "tool_call_id": "call_eNpK9WUYFCX2ZEUPhYCKvdMs",
-      "content": "[two] latest Anthropic model release notes"}, {"role": "tool", "tool_call_id":
-      "call_Wdtl6jFxGehSUMn5I1O4Mrdx", "content": "[three] latest Gemini model release
-      notes"}], "stream": false, "tool_choice": "auto", "tools": [{"function": {"name":
-      "parallel_local_search_one", "description": "Local search tool #1 for concurrency
-      testing.", "parameters": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, "type": "function"}, {"function": {"name":
-      "parallel_local_search_two", "description": "Local search tool #2 for concurrency
-      testing.", "parameters": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, "type": "function"}, {"function": {"name":
-      "parallel_local_search_three", "description": "Local search tool #3 for concurrency
-      testing.", "parameters": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, "type": "function"}]}'
-    headers:
-      Accept:
-      - application/json
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '2727'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      api-key:
-      - X-API-KEY-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
-  response:
-    body:
-      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
-        latest release notes have been published for the OpenAI, Anthropic, and Gemini
-        models, signaling concurrent updates across the leading AI model families.
-        Each set outlines new capabilities and performance improvements, along with
-        changes to APIs, tooling, and deployment guidelines. Users should review the
-        individual notes to understand new features, adjustments to tokenization,
-        latency or throughput, safety and alignment enhancements, pricing or access
-        changes, and any breaking changes or migration steps required to adopt the
-        updated models in existing workflows.","refusal":null,"role":"assistant"}}],"created":1771459556,"id":"chatcmpl-DAlqGKWXfGNlTIbDY9F6oHQp6hbxM","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":747,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":640,"rejected_prediction_tokens":0},"prompt_tokens":467,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":1214}}
-
-        '
-    headers:
-      Content-Length:
-      - '1778'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:06:02 GMT
-      Strict-Transport-Security:
-      - STS-XXX
-      apim-request-id:
-      - APIM-REQUEST-ID-XXX
-      azureml-model-session:
-      - AZUREML-MODEL-SESSION-XXX
-      x-accel-buffering:
-      - 'no'
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-      x-ms-deployment-name:
-      - gpt-5-nano
-      x-ms-rai-invoked:
-      - 'true'
-      x-ms-region:
-      - X-MS-REGION-XXX
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAzureNativeToolCalling.test_azure_parallel_native_tool_calling_test_crew.yaml
@@ -1,201 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
-      a tool-calling compliance test. In your next assistant turn, emit exactly 3
-      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}], "stream": false, "tool_choice":
-      "auto", "tools": [{"function": {"name": "parallel_local_search_one", "description":
-      "Local search tool #1 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}, {"function": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}, {"function": {"name": "parallel_local_search_three", "description":
-      "Local search tool #3 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}]}'
-    headers:
-      Accept:
-      - application/json
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1944'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      api-key:
-      - X-API-KEY-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
-  response:
-    body:
-      string: '{"choices":[{"content_filter_results":{},"finish_reason":"tool_calls","index":0,"logprobs":null,"message":{"annotations":[],"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"{\"query\":
-        \"latest OpenAI model release notes\"}","name":"parallel_local_search_one"},"id":"call_NEvGoF86nhPQfXRoJd5SOyLd","type":"function"},{"function":{"arguments":"{\"query\":
-        \"latest Anthropic model release notes\"}","name":"parallel_local_search_two"},"id":"call_q8Q2du4gAMQLrGTgWgfwfbDZ","type":"function"},{"function":{"arguments":"{\"query\":
-        \"latest Gemini model release notes\"}","name":"parallel_local_search_three"},"id":"call_yTBal9ofZzuo10j0pWqhHCSj","type":"function"}]}}],"created":1771459563,"id":"chatcmpl-DAlqN7kyC5ACI5Yl1Pj63rOH5HIvI","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":2457,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":2368,"rejected_prediction_tokens":0},"prompt_tokens":378,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":2835}}
-
-        '
-    headers:
-      Content-Length:
-      - '1435'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:06:17 GMT
-      Strict-Transport-Security:
-      - STS-XXX
-      apim-request-id:
-      - APIM-REQUEST-ID-XXX
-      azureml-model-session:
-      - AZUREML-MODEL-SESSION-XXX
-      x-accel-buffering:
-      - 'no'
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-      x-ms-deployment-name:
-      - gpt-5-nano
-      x-ms-rai-invoked:
-      - 'true'
-      x-ms-region:
-      - X-MS-REGION-XXX
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}, {"role": "user", "content": "\nCurrent Task: This is
-      a tool-calling compliance test. In your next assistant turn, emit exactly 3
-      tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}, {"role": "assistant", "content":
-      "", "tool_calls": [{"id": "call_NEvGoF86nhPQfXRoJd5SOyLd", "type": "function",
-      "function": {"name": "parallel_local_search_one", "arguments": "{\"query\":
-      \"latest OpenAI model release notes\"}"}}, {"id": "call_q8Q2du4gAMQLrGTgWgfwfbDZ",
-      "type": "function", "function": {"name": "parallel_local_search_two", "arguments":
-      "{\"query\": \"latest Anthropic model release notes\"}"}}, {"id": "call_yTBal9ofZzuo10j0pWqhHCSj",
-      "type": "function", "function": {"name": "parallel_local_search_three", "arguments":
-      "{\"query\": \"latest Gemini model release notes\"}"}}]}, {"role": "tool", "tool_call_id":
-      "call_NEvGoF86nhPQfXRoJd5SOyLd", "content": "[one] latest OpenAI model release
-      notes"}, {"role": "tool", "tool_call_id": "call_q8Q2du4gAMQLrGTgWgfwfbDZ", "content":
-      "[two] latest Anthropic model release notes"}, {"role": "tool", "tool_call_id":
-      "call_yTBal9ofZzuo10j0pWqhHCSj", "content": "[three] latest Gemini model release
-      notes"}, {"role": "user", "content": "Analyze the tool result. If requirements
-      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
-      the answer without meta-commentary."}], "stream": false, "tool_choice": "auto",
-      "tools": [{"function": {"name": "parallel_local_search_one", "description":
-      "Local search tool #1 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}, {"function": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}, {"function": {"name": "parallel_local_search_three", "description":
-      "Local search tool #3 for concurrency testing.", "parameters": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, "type":
-      "function"}]}'
-    headers:
-      Accept:
-      - application/json
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '3096'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      api-key:
-      - X-API-KEY-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-    method: POST
-    uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-12-01-preview
-  response:
-    body:
-      string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"The
-        three tool results indicate the latest release notes are available for OpenAI
-        models, Anthropic models, and Gemini models.","refusal":null,"role":"assistant"}}],"created":1771459579,"id":"chatcmpl-DAlqdRtr8EefmFfazuh4jm7KvVxim","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":1826,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":1792,"rejected_prediction_tokens":0},"prompt_tokens":537,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":2363}}
-
-        '
-    headers:
-      Content-Length:
-      - '1333'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:06:31 GMT
-      Strict-Transport-Security:
-      - STS-XXX
-      apim-request-id:
-      - APIM-REQUEST-ID-XXX
-      azureml-model-session:
-      - AZUREML-MODEL-SESSION-XXX
-      x-accel-buffering:
-      - 'no'
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-ms-client-request-id:
-      - X-MS-CLIENT-REQUEST-ID-XXX
-      x-ms-deployment-name:
-      - gpt-5-nano
-      x-ms-rai-invoked:
-      - 'true'
-      x-ms-region:
-      - X-MS-REGION-XXX
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,63 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
-      is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}]}], "inferenceConfig": {"stopSequences": ["\nObservation:"]},
-      "system": [{"text": "You are Parallel Tool Agent. You follow tool instructions
-      precisely.\nYour personal goal is: Use both tools exactly as instructed"}],
-      "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
-      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
-      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
-      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}}},
-      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
-      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
-    headers:
-      Content-Length:
-      - '1773'
-      Content-Type:
-      - !!binary |
-        YXBwbGljYXRpb24vanNvbg==
-      User-Agent:
-      - X-USER-AGENT-XXX
-      amz-sdk-invocation-id:
-      - AMZ-SDK-INVOCATION-ID-XXX
-      amz-sdk-request:
-      - !!binary |
-        YXR0ZW1wdD0x
-      authorization:
-      - AUTHORIZATION-XXX
-      x-amz-date:
-      - X-AMZ-DATE-XXX
-    method: POST
-    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
-  response:
-    body:
-      string: '{"message":"The security token included in the request is invalid."}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '68'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:00:08 GMT
-      x-amzn-ErrorType:
-      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
-      x-amzn-RequestId:
-      - X-AMZN-REQUESTID-XXX
-    status:
-      code: 403
-      message: Forbidden
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestBedrockNativeToolCalling.test_bedrock_parallel_native_tool_calling_test_crew.yaml
@@ -1,226 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
-      is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}]}], "inferenceConfig": {"stopSequences":
-      ["\nObservation:"]}, "system": [{"text": "You are Parallel Tool Agent. You follow
-      tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"}], "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
-      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
-      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
-      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}}},
-      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
-      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
-    headers:
-      Content-Length:
-      - '1954'
-      Content-Type:
-      - !!binary |
-        YXBwbGljYXRpb24vanNvbg==
-      User-Agent:
-      - X-USER-AGENT-XXX
-      amz-sdk-invocation-id:
-      - AMZ-SDK-INVOCATION-ID-XXX
-      amz-sdk-request:
-      - !!binary |
-        YXR0ZW1wdD0x
-      authorization:
-      - AUTHORIZATION-XXX
-      x-amz-date:
-      - X-AMZ-DATE-XXX
-    method: POST
-    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
-  response:
-    body:
-      string: '{"message":"The security token included in the request is invalid."}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '68'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:00:07 GMT
-      x-amzn-ErrorType:
-      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
-      x-amzn-RequestId:
-      - X-AMZN-REQUESTID-XXX
-    status:
-      code: 403
-      message: Forbidden
- request:
-    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
-      is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}]}, {"role": "user", "content":
-      [{"text": "\nCurrent Task: This is a tool-calling compliance test. In your next
-      assistant turn, emit exactly 3 tool calls in the same response (parallel tool
-      calls), in this order: 1) parallel_local_search_one(query=''latest OpenAI model
-      release notes''), 2) parallel_local_search_two(query=''latest Anthropic model
-      release notes''), 3) parallel_local_search_three(query=''latest Gemini model
-      release notes''). Do not call any other tools and do not answer before those
-      3 tool calls are emitted. After the tool results return, provide a one paragraph
-      summary.\n\nThis is the expected criteria for your final answer: A one sentence
-      summary of both tool outputs\nyou MUST return the actual complete content as
-      the final answer, not a summary."}]}], "inferenceConfig": {"stopSequences":
-      ["\nObservation:"]}, "system": [{"text": "You are Parallel Tool Agent. You follow
-      tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed\n\nYou are Parallel Tool Agent. You follow tool instructions precisely.\nYour
-      personal goal is: Use both tools exactly as instructed"}], "toolConfig": {"tools":
-      [{"toolSpec": {"name": "parallel_local_search_one", "description": "Local search
-      tool #1 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}}}, {"toolSpec":
-      {"name": "parallel_local_search_two", "description": "Local search tool #2 for
-      concurrency testing.", "inputSchema": {"json": {"properties": {"query": {"description":
-      "Search query", "title": "Query", "type": "string"}}, "required": ["query"],
-      "type": "object", "additionalProperties": false}}}}, {"toolSpec": {"name": "parallel_local_search_three",
-      "description": "Local search tool #3 for concurrency testing.", "inputSchema":
-      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
-      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}}}]}}'
-    headers:
-      Content-Length:
-      - '2855'
-      Content-Type:
-      - !!binary |
-        YXBwbGljYXRpb24vanNvbg==
-      User-Agent:
-      - X-USER-AGENT-XXX
-      amz-sdk-invocation-id:
-      - AMZ-SDK-INVOCATION-ID-XXX
-      amz-sdk-request:
-      - !!binary |
-        YXR0ZW1wdD0x
-      authorization:
-      - AUTHORIZATION-XXX
-      x-amz-date:
-      - X-AMZ-DATE-XXX
-    method: POST
-    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
-  response:
-    body:
-      string: '{"message":"The security token included in the request is invalid."}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '68'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:00:07 GMT
-      x-amzn-ErrorType:
-      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
-      x-amzn-RequestId:
-      - X-AMZN-REQUESTID-XXX
-    status:
-      code: 403
-      message: Forbidden
- request:
-    body: '{"messages": [{"role": "user", "content": [{"text": "\nCurrent Task: This
-      is a tool-calling compliance test. In your next assistant turn, emit exactly
-      3 tool calls in the same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}]}, {"role": "user", "content":
-      [{"text": "\nCurrent Task: This is a tool-calling compliance test. In your next
-      assistant turn, emit exactly 3 tool calls in the same response (parallel tool
-      calls), in this order: 1) parallel_local_search_one(query=''latest OpenAI model
-      release notes''), 2) parallel_local_search_two(query=''latest Anthropic model
-      release notes''), 3) parallel_local_search_three(query=''latest Gemini model
-      release notes''). Do not call any other tools and do not answer before those
-      3 tool calls are emitted. After the tool results return, provide a one paragraph
-      summary.\n\nThis is the expected criteria for your final answer: A one sentence
-      summary of both tool outputs\nyou MUST return the actual complete content as
-      the final answer, not a summary."}]}, {"role": "user", "content": [{"text":
-      "\nCurrent Task: This is a tool-calling compliance test. In your next assistant
-      turn, emit exactly 3 tool calls in the same response (parallel tool calls),
-      in this order: 1) parallel_local_search_one(query=''latest OpenAI model release
-      notes''), 2) parallel_local_search_two(query=''latest Anthropic model release
-      notes''), 3) parallel_local_search_three(query=''latest Gemini model release
-      notes''). Do not call any other tools and do not answer before those 3 tool
-      calls are emitted. After the tool results return, provide a one paragraph summary.\n\nThis
-      is the expected criteria for your final answer: A one sentence summary of both
-      tool outputs\nyou MUST return the actual complete content as the final answer,
-      not a summary."}]}], "inferenceConfig": {"stopSequences": ["\nObservation:"]},
-      "system": [{"text": "You are Parallel Tool Agent. You follow tool instructions
-      precisely.\nYour personal goal is: Use both tools exactly as instructed\n\nYou
-      are Parallel Tool Agent. You follow tool instructions precisely.\nYour personal
-      goal is: Use both tools exactly as instructed\n\nYou are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}], "toolConfig": {"tools": [{"toolSpec": {"name": "parallel_local_search_one",
-      "description": "Local search tool #1 for concurrency testing.", "inputSchema":
-      {"json": {"properties": {"query": {"description": "Search query", "title": "Query",
-      "type": "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}}}, {"toolSpec": {"name": "parallel_local_search_two", "description":
-      "Local search tool #2 for concurrency testing.", "inputSchema": {"json": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}}},
-      {"toolSpec": {"name": "parallel_local_search_three", "description": "Local search
-      tool #3 for concurrency testing.", "inputSchema": {"json": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}}}]}}'
-    headers:
-      Content-Length:
-      - '3756'
-      Content-Type:
-      - !!binary |
-        YXBwbGljYXRpb24vanNvbg==
-      User-Agent:
-      - X-USER-AGENT-XXX
-      amz-sdk-invocation-id:
-      - AMZ-SDK-INVOCATION-ID-XXX
-      amz-sdk-request:
-      - !!binary |
-        YXR0ZW1wdD0x
-      authorization:
-      - AUTHORIZATION-XXX
-      x-amz-date:
-      - X-AMZ-DATE-XXX
-    method: POST
-    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
-  response:
-    body:
-      string: '{"message":"The security token included in the request is invalid."}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '68'
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 00:00:07 GMT
-      x-amzn-ErrorType:
-      - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
-      x-amzn-RequestId:
-      - X-AMZN-REQUESTID-XXX
-    status:
-      code: 403
-      message: Forbidden
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_agent_with_native_tool_calling.yaml
@@ -3,14 +3,14 @@ interactions:
    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary."}], "role": "user"}], "systemInstruction": {"parts": [{"text":
-      "You are Math Assistant. You are a helpful math assistant.\nYour personal goal
-      is: Help users with mathematical calculations"}], "role": "user"}, "tools":
-      [{"functionDeclarations": [{"description": "Perform mathematical calculations.
-      Use this for any math operations.", "name": "calculator", "parameters_json_schema":
-      {"properties": {"expression": {"description": "Mathematical expression to evaluate",
-      "title": "Expression", "type": "string"}}, "required": ["expression"], "type":
-      "object", "additionalProperties": false}}]}], "generationConfig": {"stopSequences":
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
      ["\nObservation:"]}}'
    headers:
      User-Agent:
@@ -22,7 +22,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '892'
+      - '907'
      content-type:
      - application/json
      host:
@@ -32,31 +32,31 @@ interactions:
      x-goog-api-key:
      - X-GOOG-API-KEY-XXX
    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
  response:
    body:
      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
-        \           },\n            \"thoughtSignature\": \"Cp8DAb4+9vu74rJ0QQNTa6oMMh3QAlvx3cS4TL0I1od7EdQZtMBbsr5viQiTUR/LKj8nwPvtLjZxib5SXqmV0t2B2ZMdq1nqD62vLPD3i7tmUeRoysODfxomRGRhy/CPysMhobt5HWF1W/n6tNiQz3V36f0/dRx5yJeyN4tJL/RZePv77FUqywOfFlYOkOIyAkrE5LT6FicOjhHm/B9bGV/y7TNmN6TtwQDxoE9nU92Q/UNZ7rNyZE7aSR7KPJZuRXrrBBh+akt5dX5n6N9kGWkyRpWVgUox01+b22RSj4S/QY45IvadtmmkFk8DMVAtAnEiK0WazltC+TOdUJHwVgBD494fngoVcHU+R1yIJrVe7h6Ce3Ts5IYLrRCedDU3wW1ghn/hXx1nvTqQumpsGTGtE2v3KjF/7DmQA96WzB1X7+QUOF2J3pK9HemiKxAQl4U9fP2eNN8shvy2YykBlahWDujEwye7ji4wIWtNHbf0t+uFwGTQ3QruAKXvWB04ExjHM2I/8O9U5tOsH0cwPqnpFR2EaTqaPXXUllZ2K+DaaA==\"\n
-        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
-        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        115,\n    \"candidatesTokenCount\": 17,\n    \"totalTokenCount\": 227,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 115\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 95\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"Y1KWadvNMKz1jMcPiJeJmAI\"\n}\n"
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.00062879999833447594\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 103,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 110,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 103\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"PpByabfUHsih_uMPlu2ysAM\"\n}\n"
    headers:
      Alt-Svc:
      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
      Content-Type:
      - application/json; charset=UTF-8
      Date:
-      - Wed, 18 Feb 2026 23:59:32 GMT
+      - Thu, 22 Jan 2026 21:01:50 GMT
      Server:
      - scaffolding on HTTPServer2
      Server-Timing:
-      - gfet4t7; dur=956
+      - gfet4t7; dur=521
      Transfer-Encoding:
      - chunked
      Vary:
@@ -76,19 +76,18 @@ interactions:
    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary."}], "role": "user"}, {"parts": [{"functionCall": {"args": {"expression":
-      "15 * 8"}, "name": "calculator"}}], "role": "model"}, {"parts": [{"functionResponse":
-      {"name": "calculator", "response": {"result": "The result of 15 * 8 is 120"}}}],
-      "role": "user"}, {"parts": [{"text": "Analyze the tool result. If requirements
-      are met, provide the Final Answer. Otherwise, call the next tool. Deliver only
-      the answer without meta-commentary."}], "role": "user"}], "systemInstruction":
-      {"parts": [{"text": "You are Math Assistant. You are a helpful math assistant.\nYour
-      personal goal is: Help users with mathematical calculations"}], "role": "user"},
-      "tools": [{"functionDeclarations": [{"description": "Perform mathematical calculations.
-      Use this for any math operations.", "name": "calculator", "parameters_json_schema":
-      {"properties": {"expression": {"description": "Mathematical expression to evaluate",
-      "title": "Expression", "type": "string"}}, "required": ["expression"], "type":
-      "object", "additionalProperties": false}}]}], "generationConfig": {"stopSequences":
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
      ["\nObservation:"]}}'
    headers:
      User-Agent:
@@ -100,7 +99,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1326'
+      - '1219'
      content-type:
      - application/json
      host:
@@ -110,28 +109,378 @@ interactions:
      x-goog-api-key:
      - X-GOOG-API-KEY-XXX
    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
  response:
    body:
      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"The result of 15 * 8 is 120\"\n          }\n
-        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        191,\n    \"candidatesTokenCount\": 14,\n    \"totalTokenCount\": 205,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 191\n
-        \     }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.5-flash\",\n  \"responseId\":
-        \"ZFKWaf2BMM6MjMcP6P--kQM\"\n}\n"
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
+        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.013549212898526872\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 149,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 156,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 149\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"P5Byadc8kJT-4w_p99XQAQ\"\n}\n"
    headers:
      Alt-Svc:
      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
      Content-Type:
      - application/json; charset=UTF-8
      Date:
-      - Wed, 18 Feb 2026 23:59:33 GMT
+      - Thu, 22 Jan 2026 21:01:51 GMT
      Server:
      - scaffolding on HTTPServer2
      Server-Timing:
-      - gfet4t7; dur=421
+      - gfet4t7; dur=444
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1531'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
+        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.0409286447933742\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 195,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 202,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 195\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"P5Byadn5HOK6_uMPnvmXwAk\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 22 Jan 2026 21:01:51 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=503
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1843'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
+        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.018002046006066457\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 241,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 248,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 241\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"P5Byafi2PKbn_uMPtIbfuQI\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 22 Jan 2026 21:01:52 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=482
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '2155'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"functionCall\": {\n              \"name\": \"calculator\",\n
+        \             \"args\": {\n                \"expression\": \"15 * 8\"\n              }\n
+        \           }\n          }\n        ],\n        \"role\": \"model\"\n      },\n
+        \     \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.10329001290457589\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 287,\n    \"candidatesTokenCount\":
+        7,\n    \"totalTokenCount\": 294,\n    \"promptTokensDetails\": [\n      {\n
+        \       \"modality\": \"TEXT\",\n        \"tokenCount\": 287\n      }\n    ],\n
+        \   \"candidatesTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n
+        \ \"responseId\": \"QJByaamVIP_g_uMPt6mI0Qg\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 22 Jan 2026 21:01:52 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=534
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],
+      "role": "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text":
+      "The result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze
+      the tool result. If requirements are met, provide the Final Answer. Otherwise,
+      call the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}, {"parts": [{"text": ""}], "role": "model"}, {"parts": [{"text": "The
+      result of 15 * 8 is 120"}], "role": "user"}, {"parts": [{"text": "Analyze the
+      tool result. If requirements are met, provide the Final Answer. Otherwise, call
+      the next tool. Deliver only the answer without meta-commentary."}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Math Assistant.
+      You are a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"}], "role": "user"}, "tools": [{"functionDeclarations": [{"description":
+      "Perform mathematical calculations. Use this for any math operations.", "name":
+      "calculator", "parameters": {"properties": {"expression": {"description": "Mathematical
+      expression to evaluate", "title": "Expression", "type": "STRING"}}, "required":
+      ["expression"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
+      ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '2467'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"text\": \"120\\n\"\n          }\n        ],\n
+        \       \"role\": \"model\"\n      },\n      \"finishReason\": \"STOP\",\n
+        \     \"avgLogprobs\": -0.0097615998238325119\n    }\n  ],\n  \"usageMetadata\":
+        {\n    \"promptTokenCount\": 333,\n    \"candidatesTokenCount\": 4,\n    \"totalTokenCount\":
+        337,\n    \"promptTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 333\n      }\n    ],\n    \"candidatesTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 4\n      }\n
+        \   ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n  \"responseId\":
+        \"QZByaZHABO-i_uMP58aYqAk\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 22 Jan 2026 21:01:53 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=412
      Transfer-Encoding:
      - chunked
      Vary:
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,188 +0,0 @@
-interactions:
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}], "role": "user"}], "systemInstruction": {"parts": [{"text":
-      "You are Parallel Tool Agent. You follow tool instructions precisely.\nYour
-      personal goal is: Use both tools exactly as instructed"}], "role": "user"},
-      "tools": [{"functionDeclarations": [{"description": "Local search tool #1 for
-      concurrency testing.", "name": "parallel_local_search_one", "parameters_json_schema":
-      {"properties": {"query": {"description": "Search query", "title": "Query", "type":
-      "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}, {"description": "Local search tool #2 for concurrency testing.", "name":
-      "parallel_local_search_two", "parameters_json_schema": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}, {"description":
-      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1783'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"args\": {\n                \"query\": \"latest OpenAI model
-        release notes\"\n              }\n            },\n            \"thoughtSignature\":
-        \"CrICAb4+9vtrrkiSatPyOs7fssb9akcgCIiQdJKp/k+hcEZVNFvU/H0e4FFmLIhTCPRyHxmU+AQPtBZ5vg6y9ZCcv11RdcWgYW8rPQzCnC+YTUxPAfDzaObky1QsL5pl9+yglQqVoVM31ZcnoiH02z85pwAv6TSJxdJZEekW6XwcIrCoHNCgY3ghHFEd3y3wLJ5JWL7wmiRNTC9TCT8aJHXKFohYrb+4JMULCx8BqKVxOucZPiDHA8GsoqSlzkYEe2xCh9oSdaZpCFrxhZ9bwoVDbVmPrjaq2hj5BoJ5hNxscHJ/E0EOl4ogeKZW+hIVfdzpjAFZW9Oejkb9G4ZSLbxXsoO7x8bi4LHFRABniGrWvNuOOH0Udh4t57oXHXZO4u5NNTood/GkJGcP+aHqUAH1fwqL\"\n
-        \         },\n          {\n            \"functionCall\": {\n              \"name\":
-        \"parallel_local_search_two\",\n              \"args\": {\n                \"query\":
-        \"latest Anthropic model release notes\"\n              }\n            }\n
-        \         },\n          {\n            \"functionCall\": {\n              \"name\":
-        \"parallel_local_search_three\",\n              \"args\": {\n                \"query\":
-        \"latest Gemini model release notes\"\n              }\n            }\n          }\n
-        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
-        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        291,\n    \"candidatesTokenCount\": 70,\n    \"totalTokenCount\": 428,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 291\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 67\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"alKWacytCLi5jMcPhISaoAI\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Wed, 18 Feb 2026 23:59:39 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=999
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}], "role": "user"}, {"parts": [{"functionCall": {"args":
-      {"query": "latest OpenAI model release notes"}, "name": "parallel_local_search_one"},
-      "thoughtSignature": "CrICAb4-9vtrrkiSatPyOs7fssb9akcgCIiQdJKp_k-hcEZVNFvU_H0e4FFmLIhTCPRyHxmU-AQPtBZ5vg6y9ZCcv11RdcWgYW8rPQzCnC-YTUxPAfDzaObky1QsL5pl9-yglQqVoVM31ZcnoiH02z85pwAv6TSJxdJZEekW6XwcIrCoHNCgY3ghHFEd3y3wLJ5JWL7wmiRNTC9TCT8aJHXKFohYrb-4JMULCx8BqKVxOucZPiDHA8GsoqSlzkYEe2xCh9oSdaZpCFrxhZ9bwoVDbVmPrjaq2hj5BoJ5hNxscHJ_E0EOl4ogeKZW-hIVfdzpjAFZW9Oejkb9G4ZSLbxXsoO7x8bi4LHFRABniGrWvNuOOH0Udh4t57oXHXZO4u5NNTood_GkJGcP-aHqUAH1fwqL"},
-      {"functionCall": {"args": {"query": "latest Anthropic model release notes"},
-      "name": "parallel_local_search_two"}}, {"functionCall": {"args": {"query": "latest
-      Gemini model release notes"}, "name": "parallel_local_search_three"}}], "role":
-      "model"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_one",
-      "response": {"result": "[one] latest OpenAI model release notes"}}}], "role":
-      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_two",
-      "response": {"result": "[two] latest Anthropic model release notes"}}}], "role":
-      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_three",
-      "response": {"result": "[three] latest Gemini model release notes"}}}], "role":
-      "user"}], "systemInstruction": {"parts": [{"text": "You are Parallel Tool Agent.
-      You follow tool instructions precisely.\nYour personal goal is: Use both tools
-      exactly as instructed"}], "role": "user"}, "tools": [{"functionDeclarations":
-      [{"description": "Local search tool #1 for concurrency testing.", "name": "parallel_local_search_one",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, {"description": "Local search tool #2 for concurrency
-      testing.", "name": "parallel_local_search_two", "parameters_json_schema": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, {"description":
-      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '3071'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"Here is a summary of the latest model
-        release notes: I have retrieved information regarding the latest OpenAI model
-        release notes, the latest Anthropic model release notes, and the latest Gemini
-        model release notes. The specific details of these release notes are available
-        through the respective tool outputs.\",\n            \"thoughtSignature\":
-        \"CsoBAb4+9vtPvWFM08lR1S4QrLN+Z1+Zpf04Y/bC8tjOpnxz3EEvHyRNEwkslUX5pftBi8J78Xk4/FUER0xjJZc8clUObTvayxLNup4h1JwJ5ZdatulInNGTEieFnF4w8KjSFB/vqNCZvXWZbiLkpzqAnsoAIf0x4VmMN11V0Ozo+3f2QftD+iBrfu3g21UI5tbG0Z+0QHxjRVKXrQOp7dmoZPzaxI0zalfDEI+A2jGpVl/VvauVNv0jQn0yItcA5tkVeWLq6717CjNoig==\"\n
-        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        435,\n    \"candidatesTokenCount\": 54,\n    \"totalTokenCount\": 524,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 435\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 35\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"bFKWaZOZCqCvjMcPvvGNgAc\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Wed, 18 Feb 2026 23:59:41 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=967
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestGeminiNativeToolCalling.test_gemini_parallel_native_tool_calling_test_crew.yaml
@@ -1,192 +0,0 @@
-interactions:
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}], "role": "user"}], "systemInstruction":
-      {"parts": [{"text": "You are Parallel Tool Agent. You follow tool instructions
-      precisely.\nYour personal goal is: Use both tools exactly as instructed"}],
-      "role": "user"}, "tools": [{"functionDeclarations": [{"description": "Local
-      search tool #1 for concurrency testing.", "name": "parallel_local_search_one",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}, {"description": "Local search tool #2 for concurrency
-      testing.", "name": "parallel_local_search_two", "parameters_json_schema": {"properties":
-      {"query": {"description": "Search query", "title": "Query", "type": "string"}},
-      "required": ["query"], "type": "object", "additionalProperties": false}}, {"description":
-      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1964'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"args\": {\n                \"query\": \"latest OpenAI model
-        release notes\"\n              }\n            },\n            \"thoughtSignature\":
-        \"CuMEAb4+9vu1V1iOC9o/a8+jQqow8F4RTrjlnjnDCwsisMHLLJ+Wj3pZxbFDeIjCJe9pa6+14InyYHh/ezgHrv+xPGIJtX9pJQatDCBAfCmcZ3fDipVIMAHLcl0Q660EVuZ+vRgvNhPSau+uSN9u303wJsaKvdzOQnfww2LfLtJMNtOhSHfkfhfw2bkBOtMa5/FuLqKSr6m94dSdE7HShR6+jLMLbiSXkBLWsRp0jGl85Wvd0hoA7dUyq+uIuyOBr5Myo9uMrLbxfnrRRbPMorOpYTCmHK0HE8mEBRjzh1hNwcBcfRL0VcgA2UnBIurStIeVbq51BJQ1UOq6r1wVi50Wdh1GjIQ/iN9C15T1Ql3adjom5QbmY+XY08RJOiNyVplh1YQ0qlWCVHEpueEfdzcIB+BUauVrLNqBcBr5g6ekO5QZCAdt7PLerQU8jhKjDQy367jCKQyaHir0GmAISS8RlZ8tkLKNZlZhd11D76ui6X8ep9yznViBbqH0AS1R2hMm+ielMVFjhidglTMjqB0X+yk1K2eZXkc+R/xsXRPlnlZWRygnV+IbU8RAnZWtneM464Wccmc1scfF45GKiji5bLYO7Zx+ZF8mSLcQaC8M3z121D6VbFonhaIdkJ3Wb7nI2vEyxFjdinVk3/P0zL8nu3nHeqQviTrQIoHMsZk0yPyqu9NWxg3wGJL5pbcaQh87ROQuTsInkuzzEr0QMzjw9W5iquhMh4/Wy/OKXAgf3maQB9Jb4HoHZlc0io+KYqewFSVx2BvqXbqJbIrTkTo6XRTbK7dkwlCbMmE1wKIwjrrzZQI=\"\n
-        \         },\n          {\n            \"functionCall\": {\n              \"name\":
-        \"parallel_local_search_two\",\n              \"args\": {\n                \"query\":
-        \"latest Anthropic model release notes\"\n              }\n            }\n
-        \         },\n          {\n            \"functionCall\": {\n              \"name\":
-        \"parallel_local_search_three\",\n              \"args\": {\n                \"query\":
-        \"latest Gemini model release notes\"\n              }\n            }\n          }\n
-        \       ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0,\n      \"finishMessage\": \"Model generated
-        function call(s).\"\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        327,\n    \"candidatesTokenCount\": 70,\n    \"totalTokenCount\": 536,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 327\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 139\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"ZVKWabziF7bcjMcP3r2SuAg\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Wed, 18 Feb 2026 23:59:34 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=1262
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}], "role": "user"}, {"parts": [{"functionCall":
-      {"args": {"query": "latest OpenAI model release notes"}, "name": "parallel_local_search_one"}},
-      {"functionCall": {"args": {"query": "latest Anthropic model release notes"},
-      "name": "parallel_local_search_two"}}, {"functionCall": {"args": {"query": "latest
-      Gemini model release notes"}, "name": "parallel_local_search_three"}}], "role":
-      "model"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_one",
-      "response": {"result": "[one] latest OpenAI model release notes"}}}], "role":
-      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_two",
-      "response": {"result": "[two] latest Anthropic model release notes"}}}], "role":
-      "user"}, {"parts": [{"functionResponse": {"name": "parallel_local_search_three",
-      "response": {"result": "[three] latest Gemini model release notes"}}}], "role":
-      "user"}, {"parts": [{"text": "Analyze the tool result. If requirements are met,
-      provide the Final Answer. Otherwise, call the next tool. Deliver only the answer
-      without meta-commentary."}], "role": "user"}], "systemInstruction": {"parts":
-      [{"text": "You are Parallel Tool Agent. You follow tool instructions precisely.\nYour
-      personal goal is: Use both tools exactly as instructed"}], "role": "user"},
-      "tools": [{"functionDeclarations": [{"description": "Local search tool #1 for
-      concurrency testing.", "name": "parallel_local_search_one", "parameters_json_schema":
-      {"properties": {"query": {"description": "Search query", "title": "Query", "type":
-      "string"}}, "required": ["query"], "type": "object", "additionalProperties":
-      false}}, {"description": "Local search tool #2 for concurrency testing.", "name":
-      "parallel_local_search_two", "parameters_json_schema": {"properties": {"query":
-      {"description": "Search query", "title": "Query", "type": "string"}}, "required":
-      ["query"], "type": "object", "additionalProperties": false}}, {"description":
-      "Local search tool #3 for concurrency testing.", "name": "parallel_local_search_three",
-      "parameters_json_schema": {"properties": {"query": {"description": "Search query",
-      "title": "Query", "type": "string"}}, "required": ["query"], "type": "object",
-      "additionalProperties": false}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '3014'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"The search results indicate the latest
-        model release notes for OpenAI, Anthropic, and Gemini are: [one] latest OpenAI
-        model release notes[two] latest Anthropic model release notes[three] latest
-        Gemini model release notes.\",\n            \"thoughtSignature\": \"CsUPAb4+9vs4hkuatQAakl1FSHx5DIde9nHYobJdlWs2HEzES9gHn7uwjMIlFPTzJUbnZqxpAK93hqsCofdfGANr8dwK+/IbZAiMSikpAq2ZjEbWADjfalU3ke4LcQMh6TEYFVGz1QCinjne3jZx5jOVaL8YdAtjOYnBZWA6KqdvfKjD7+Ct/BLoEqvu4LW6kxhXQgcV+D3M1QxGlr1dxpajj4wyYFI9LXchE2vCdAMPYTkPQ4WPbS3xjz0jJb6qFAwwg+BY5kGemkWWVHsvq28t09pd7FEH0bod5cEpR65qEefpJfhHsXYqmOwHDkfNePYnYC+5qmn7kvkN+fhF41SoMRZahMZGDjIo+q6vvru3eXKmZiuLsrh8AqQIks/4S3sSuxt16ogYKE+LlFxml2ygXFPww59nRAtc+xK6VW8jB2vyv9Eo5cpnG9ZBv1dOznJnmj4AWA1ddMlp+yq8AdaboTSo5dysYMwFcSXS3kuU+xi92dC+7GqZZbDr5frvnc+MnSuzYwHhNjSQqvTo5DKGit53zDwlFJT74kLBXk36BOFQp4xlfs+BpKkw11bow6qQoTvC68D023ZHami+McO1WYBDoO5CrDoosU8fAYljqaGArBoMlssF4O7VKHEaEbEZnYCr0Wxo6XP/mtPIpHQE4OyCz/GAJSJtQv1hO7DNCMzpSpkLyuemB1SOZGl3mlLQhosh3TAGP0xgqmHpKccdCSWoXGWjO48VluFuV9E1FwW1Xi++XhMRcUaljJXPZaNVjGcAG1uAxeVkUMsY8tBvQ0vaumUK2jkzbyQTWeStEWwl1yKmklI8JDXske/k6tYJOyF+8t0mF7oCEqNHSNicj7TomihpPlVjNl1Mm4l5fvwlKtAPJwiKrchCunlZB3uGN1AR0h0Hvznffutc/lV/FWFbNgFAaNJZKRs40vMk1xmRZyH2rs+Ob2fZriQ3BSwzzNeiwDLXxm0m/ytOai+K9ObFuC/IEh5fJfvQbNeo3TmiCAMCZPNXMDtlOyLqQzzKwmMFH4c53Ol+kkTiuAKECNQR1dOCufAL0U5lzEUFRxFvOq67lp6xqG8m+WzCIkbnF8QyJHfujtXVMJACaevUkM7+kAVyTwETEKQsanp0tBwzV42ieChp/h7pivcC++cFXdSG5dvR94BgkHmtpC9+jfNH32RREPLuyWfU5aBXiOkxjRs9fDexAFjrkGjM18I+jqHZNeuUR20BKe2jFsU8xJS3Fa4eXabm/YPL1t8R5jr572Ch/r4bspFp8MQ5RcFo8Nn/HiBmW8uZ2BcLEY1RPWUBvxVhfvh/hNxaRKu21x8vGz72RoiNuOjNbeADYAaBJqBGLp0MALxZ/rnXPzDLQUt6Mv07fWHAZr5p3r/skleot25lr2Tcl4qJCPM4/cfs6U0x4CY26ktBiCs4bWKqSEV1Q05nf5kpxVOIRSTgxqFOj/rWIAF3uw7mvsuRKd3YXILV5OrvEoETdQvf7BdYPbQbIQYDf7DBKhf51O8RKQgcfl6mVQswamdJ+PyqLbozTkFCjXMKI0PwJdy8tfKfCeeEe0TbOXSfeTczKQkL8WyWkBg4tS81JnWAVzfVlNjbvo/fk+wv7FyfJJS1HJGlxZ0kUlWi1369rSlldYPoSqopuekOxtYnpYpz92y/jVLNQXE1IVLqWYh9o3gTwjeyaHG7fCaWF2QRGrCUvejT8eJjevhj/sgadjPVcEP5o7Zcw5yTBCgc0+FX1j5KpCmfZ/dVvT4iIX8bOkhxjHQ8ifOx39BMM4EObgCA+g+BFN+Ra7kOf4hJ6tPNhqvJa4E4fyISlVrRiBqSt59ZkuLyWuY9SYy0nvbklP30WDUHSAvcuEwVMSuT524afHISfO/+tSgE7JAKzEPSOoVO3Z5NS9kcAqHuBSe/LL4XJbCKF9Oggm9/gwdAulnBANd4ydQ/raTPE/QUu/CGqqGhBd+wo8x0Jg/BMZWkwhz0fEzsh+OjnrEkHv4QIqZ9v/j1Rv9uc+cDeK7eGi62okGLrPFX2pNQtsZRdUM9aBSlTBUVSdCDpkvieENzLnR257EDZy1EV2HxGRfOFZVVdaW1n8XvL73pcFoQ5XABpfYuigOS8i4S8g43Qfe77GosnuXR5rcJCrL03q3hptb97K5ysKFLgumsaaWo92MBhZYKvQ6SwStgyWRlb22uQGQJYsS8OTD/uVNiQzFjOMsR/l71c9RI1Eb7SQJT6WWvL1YhA7sQw/lQf8soLKfWshoky6mMrGopjRak8xHpJe5VWbqK8PK6iXDd403JrHICyh4M3FpEja3eX2V3SN6U+EgIWKIE8lE/iQZakhLtG2KL7nNQy/cksxzIh5ElQCe5NkrQZO0fai6ek8qwbmz07RVg2FknD7F2hvmxZBqoJSXhsFVn/9+fnkcsZekEtUevFmlQQNspPc63XgO0XmpTye9uM/BbTEsNEWeHSFZTEQLLx1l+pgwsYO3NlNSIUN24/GIR7JrZFG4fAoljkDKjhrYQzr1Fiy3t5G+CmadZ0TcjRQQdDw36ETlf7cizcrQc4FNtnx5rNWEaf54vUvlsd2DD19UIkzP9omITsiuNPPcUNq0A6v1TkgnSNYfhb26nxJIg34r8MmCAhWzB2eCy54gvOHDGLFAwfFZrQdvl\"\n
-        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
-        \"STOP\",\n      \"index\": 0\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        504,\n    \"candidatesTokenCount\": 45,\n    \"totalTokenCount\": 973,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 504\n
-        \     }\n    ],\n    \"thoughtsTokenCount\": 424\n  },\n  \"modelVersion\":
-        \"gemini-2.5-flash\",\n  \"responseId\": \"Z1KWaYbTKZvnjMcP7piEoAg\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Wed, 18 Feb 2026 23:59:37 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=2283
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_agent_with_native_tool_calling.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_agent_with_native_tool_calling.yaml
@@ -5,9 +5,9 @@ interactions:
      calculations"},{"role":"user","content":"\nCurrent Task: Calculate what is 15
      * 8\n\nThis is the expected criteria for your final answer: The result of the
      calculation\nyou MUST return the actual complete content as the final answer,
-      not a summary."}],"model":"gpt-5-nano","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
-      mathematical calculations. Use this for any math operations.","strict":true,"parameters":{"properties":{"expression":{"description":"Mathematical
-      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object","additionalProperties":false}}}]}'
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
+      mathematical calculations. Use this for any math operations.","parameters":{"properties":{"expression":{"description":"Mathematical
+      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object"}}}]}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -20,7 +20,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '813'
+      - '829'
      content-type:
      - application/json
      host:
@@ -47,17 +47,21 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-DAlG9W2mJYuOgpf3FwCRgbqaiHWf3\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771457317,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
+      string: "{\n  \"id\": \"chatcmpl-D0vm7joOuDBPcMpfmOnftOoTCPtc8\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1769114459,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"120\",\n        \"refusal\": null,\n
-        \       \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 208,\n    \"completion_tokens\":
-        138,\n    \"total_tokens\": 346,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_G73UZDvL4wC9EEdvm1UcRIRM\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"calculator\",\n
+        \             \"arguments\": \"{\\\"expression\\\":\\\"15 * 8\\\"}\"\n            }\n
+        \         }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 137,\n    \"completion_tokens\":
+        17,\n    \"total_tokens\": 154,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 128,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_c4585b5b9c\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -66,7 +70,126 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Wed, 18 Feb 2026 23:28:39 GMT
+      - Thu, 22 Jan 2026 20:40:59 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '761'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '1080'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Assistant. You are
+      a helpful math assistant.\nYour personal goal is: Help users with mathematical
+      calculations"},{"role":"user","content":"\nCurrent Task: Calculate what is 15
+      * 8\n\nThis is the expected criteria for your final answer: The result of the
+      calculation\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nThis is VERY important to you, your job depends on it!"},{"role":"assistant","content":null,"tool_calls":[{"id":"call_G73UZDvL4wC9EEdvm1UcRIRM","type":"function","function":{"name":"calculator","arguments":"{\"expression\":\"15
+      * 8\"}"}}]},{"role":"tool","tool_call_id":"call_G73UZDvL4wC9EEdvm1UcRIRM","content":"The
+      result of 15 * 8 is 120"},{"role":"user","content":"Analyze the tool result.
+      If requirements are met, provide the Final Answer. Otherwise, call the next
+      tool. Deliver only the answer without meta-commentary."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"calculator","description":"Perform
+      mathematical calculations. Use this for any math operations.","parameters":{"properties":{"expression":{"description":"Mathematical
+      expression to evaluate","title":"Expression","type":"string"}},"required":["expression"],"type":"object"}}}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1299'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D0vm8mUnzLxu9pf1rc7MODkrMsCmf\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1769114460,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"120\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
+        \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 207,\n    \"completion_tokens\":
+        2,\n    \"total_tokens\": 209,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_c4585b5b9c\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 22 Jan 2026 20:41:00 GMT
      Server:
      - cloudflare
      Strict-Transport-Security:
@@ -84,13 +207,13 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '1869'
+      - '262'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
+      x-envoy-upstream-service-time:
+      - '496'
      x-openai-proxy-wasm:
      - v0.1
      x-ratelimit-limit-requests:
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_agent_kickoff.yaml
@@ -1,265 +0,0 @@
-interactions:
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1733'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DAldZHfQGVcV3FNwAJAtNooU3PAU7\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771458769,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_kz1qLLRsugXwWiQMeH9oFAep\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
-        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_yNouGq1Kv6P5W9fhTng6acZi\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
-        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_O7MqnuniDmyT6a0BS31GTunB\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
-        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
-        \     \"finish_reason\": \"tool_calls\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        259,\n    \"completion_tokens\": 78,\n    \"total_tokens\": 337,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_414ba99a04\"\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:52:50 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '1418'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_kz1qLLRsugXwWiQMeH9oFAep","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
-      \"latest OpenAI model release notes\"}"}},{"id":"call_yNouGq1Kv6P5W9fhTng6acZi","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
-      \"latest Anthropic model release notes\"}"}},{"id":"call_O7MqnuniDmyT6a0BS31GTunB","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
-      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_kz1qLLRsugXwWiQMeH9oFAep","name":"parallel_local_search_one","content":"[one]
-      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_yNouGq1Kv6P5W9fhTng6acZi","name":"parallel_local_search_two","content":"[two]
-      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_O7MqnuniDmyT6a0BS31GTunB","name":"parallel_local_search_three","content":"[three]
-      latest Gemini model release notes"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '2756'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DAldbawkFNpOeXbaJTkTlsSi7OiII\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771458771,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"The latest release notes for OpenAI,
-        Anthropic, and Gemini models highlight significant updates and improvements
-        in each respective technology. OpenAI's notes detail new features and optimizations
-        that enhance user interaction and performance. Anthropic's release emphasizes
-        their focus on safety and alignment in AI development, showcasing advancements
-        in responsible AI practices. Gemini's notes underline their innovative approaches
-        and cutting-edge functionalities designed to push the boundaries of current
-        AI capabilities.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 377,\n    \"completion_tokens\":
-        85,\n    \"total_tokens\": 462,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_414ba99a04\"\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:52:53 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '1755'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_test_crew.yaml
@@ -1,265 +0,0 @@
-interactions:
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1929'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DAlddfEozIpgleBufPaffZMQWK0Hj\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771458773,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_Putc2jV5GhiIZMwx8mDcI61Q\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
-        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_iyjwcvkL3PdoOddxsqkHCT9T\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
-        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_G728RseEU7SbGk5YTiyyp9IH\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
-        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"tool_calls\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 378,\n    \"completion_tokens\":
-        1497,\n    \"total_tokens\": 1875,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 1408,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:53:08 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '14853'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_Putc2jV5GhiIZMwx8mDcI61Q","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
-      \"latest OpenAI model release notes\"}"}},{"id":"call_iyjwcvkL3PdoOddxsqkHCT9T","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
-      \"latest Anthropic model release notes\"}"}},{"id":"call_G728RseEU7SbGk5YTiyyp9IH","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
-      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_Putc2jV5GhiIZMwx8mDcI61Q","name":"parallel_local_search_one","content":"[one]
-      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_iyjwcvkL3PdoOddxsqkHCT9T","name":"parallel_local_search_two","content":"[two]
-      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_G728RseEU7SbGk5YTiyyp9IH","name":"parallel_local_search_three","content":"[three]
-      latest Gemini model release notes"},{"role":"user","content":"Analyze the tool
-      result. If requirements are met, provide the Final Answer. Otherwise, call the
-      next tool. Deliver only the answer without meta-commentary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '3136'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DAldt2BXNqiYYLPgInjHCpYKfk2VK\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771458789,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"The results show the latest model release
-        notes for OpenAI, Anthropic, and Gemini.\",\n        \"refusal\": null,\n
-        \       \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 537,\n    \"completion_tokens\":
-        2011,\n    \"total_tokens\": 2548,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 1984,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 18 Feb 2026 23:53:25 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '15368'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_tool_hook_parity_agent_kickoff.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_tool_hook_parity_agent_kickoff.yaml
@@ -1,264 +0,0 @@
-interactions:
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1748'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DB244zBgA66fzl8TNcIPRWoE4lDIQ\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771521916,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_D2ojRWqkng6krQ51vWQEU8wR\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
-        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_v1tpTKw1sYcI75SWG1LCkAC3\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
-        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_RrbyZClymnngoNLhlkQLLpwM\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
-        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"tool_calls\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 343,\n    \"completion_tokens\":
-        855,\n    \"total_tokens\": 1198,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 768,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 17:25:23 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '6669'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_D2ojRWqkng6krQ51vWQEU8wR","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
-      \"latest OpenAI model release notes\"}"}},{"id":"call_v1tpTKw1sYcI75SWG1LCkAC3","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
-      \"latest Anthropic model release notes\"}"}},{"id":"call_RrbyZClymnngoNLhlkQLLpwM","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
-      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_D2ojRWqkng6krQ51vWQEU8wR","name":"parallel_local_search_one","content":"[one]
-      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_v1tpTKw1sYcI75SWG1LCkAC3","name":"parallel_local_search_two","content":"[two]
-      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_RrbyZClymnngoNLhlkQLLpwM","name":"parallel_local_search_three","content":"[three]
-      latest Gemini model release notes"}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '2771'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DB24DjyYsIHiQJ7hHXob8tQFfeXBs\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771521925,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"The three latest release-note references
-        retrieved encompass OpenAI, Anthropic, and Gemini, indicating that all three
-        major model families are actively updating their offerings. These notes typically
-        cover improvements to capabilities, safety measures, performance enhancements,
-        and any new APIs or features, suggesting a trend of ongoing refinement across
-        providers. If you\u2019d like, I can pull the full release notes or extract
-        and compare the key changes across the three sources.\",\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 467,\n    \"completion_tokens\":
-        1437,\n    \"total_tokens\": 1904,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 1344,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 17:25:35 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '10369'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_tool_hook_parity_crew.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestOpenAINativeToolCalling.test_openai_parallel_native_tool_calling_tool_hook_parity_crew.yaml
@@ -1,339 +0,0 @@
-interactions:
- request:
-    body: '{"trace_id": "e456cc10-ce7b-4e68-a2cc-ddb806a2e7b9", "execution_type":
-      "crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
-      "crew_name": "crew", "flow_name": null, "crewai_version": "1.9.3", "privacy_level":
-      "standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
-      0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2026-02-19T17:24:41.723158+00:00"},
-      "ephemeral_trace_id": "e456cc10-ce7b-4e68-a2cc-ddb806a2e7b9"}'
-    headers:
-      Accept:
-      - '*/*'
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '488'
-      Content-Type:
-      - application/json
-      User-Agent:
-      - X-USER-AGENT-XXX
-      X-Crewai-Organization-Id:
-      - 3433f0ee-8a94-4aa4-822b-2ac71aa38b18
-      X-Crewai-Version:
-      - 1.9.3
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-    method: POST
-    uri: https://app.crewai.com/crewai_plus/api/v1/tracing/ephemeral/batches
-  response:
-    body:
-      string: '{"id":"a78f2aca-0525-47c7-8f37-b3fca0ad6672","ephemeral_trace_id":"e456cc10-ce7b-4e68-a2cc-ddb806a2e7b9","execution_type":"crew","crew_name":"crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"1.9.3","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"crew","flow_name":null,"crewai_version":"1.9.3","privacy_level":"standard"},"created_at":"2026-02-19T17:24:41.989Z","updated_at":"2026-02-19T17:24:41.989Z","access_code":"TRACE-bd80d6be74","user_identifier":null}'
-    headers:
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '515'
-      Content-Type:
-      - application/json; charset=utf-8
-      Date:
-      - Thu, 19 Feb 2026 17:24:41 GMT
-      cache-control:
-      - no-store
-      content-security-policy:
-      - CSP-FILTERED
-      etag:
-      - ETAG-XXX
-      expires:
-      - '0'
-      permissions-policy:
-      - PERMISSIONS-POLICY-XXX
-      pragma:
-      - no-cache
-      referrer-policy:
-      - REFERRER-POLICY-XXX
-      strict-transport-security:
-      - STS-XXX
-      vary:
-      - Accept
-      x-content-type-options:
-      - X-CONTENT-TYPE-XXX
-      x-frame-options:
-      - X-FRAME-OPTIONS-XXX
-      x-permitted-cross-domain-policies:
-      - X-PERMITTED-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-      x-runtime:
-      - X-RUNTIME-XXX
-      x-xss-protection:
-      - X-XSS-PROTECTION-XXX
-    status:
-      code: 201
-      message: Created
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1929'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DB23W8RBF6zlxweiHYGb6maVfyctt\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771521882,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
-        \           \"id\": \"call_sge1FXUkpmPEDe8nTOgn0tQG\",\n            \"type\":
-        \"function\",\n            \"function\": {\n              \"name\": \"parallel_local_search_one\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest OpenAI model release
-        notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_z5jRPH4DQ7Wp3HdDUlZe8gGh\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_two\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Anthropic model
-        release notes\\\"}\"\n            }\n          },\n          {\n            \"id\":
-        \"call_DNlgqnadODDsyQkSuLcXZCX2\",\n            \"type\": \"function\",\n
-        \           \"function\": {\n              \"name\": \"parallel_local_search_three\",\n
-        \             \"arguments\": \"{\\\"query\\\": \\\"latest Gemini model release
-        notes\\\"}\"\n            }\n          }\n        ],\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"finish_reason\": \"tool_calls\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 378,\n    \"completion_tokens\":
-        2456,\n    \"total_tokens\": 2834,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 2368,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 17:25:02 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '19582'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      set-cookie:
-      - SET-COOKIE-XXX
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages":[{"role":"system","content":"You are Parallel Tool Agent. You
-      follow tool instructions precisely.\nYour personal goal is: Use both tools exactly
-      as instructed"},{"role":"user","content":"\nCurrent Task: This is a tool-calling
-      compliance test. In your next assistant turn, emit exactly 3 tool calls in the
-      same response (parallel tool calls), in this order: 1) parallel_local_search_one(query=''latest
-      OpenAI model release notes''), 2) parallel_local_search_two(query=''latest Anthropic
-      model release notes''), 3) parallel_local_search_three(query=''latest Gemini
-      model release notes''). Do not call any other tools and do not answer before
-      those 3 tool calls are emitted. After the tool results return, provide a one
-      paragraph summary.\n\nThis is the expected criteria for your final answer: A
-      one sentence summary of both tool outputs\nyou MUST return the actual complete
-      content as the final answer, not a summary."},{"role":"assistant","content":null,"tool_calls":[{"id":"call_sge1FXUkpmPEDe8nTOgn0tQG","type":"function","function":{"name":"parallel_local_search_one","arguments":"{\"query\":
-      \"latest OpenAI model release notes\"}"}},{"id":"call_z5jRPH4DQ7Wp3HdDUlZe8gGh","type":"function","function":{"name":"parallel_local_search_two","arguments":"{\"query\":
-      \"latest Anthropic model release notes\"}"}},{"id":"call_DNlgqnadODDsyQkSuLcXZCX2","type":"function","function":{"name":"parallel_local_search_three","arguments":"{\"query\":
-      \"latest Gemini model release notes\"}"}}]},{"role":"tool","tool_call_id":"call_sge1FXUkpmPEDe8nTOgn0tQG","name":"parallel_local_search_one","content":"[one]
-      latest OpenAI model release notes"},{"role":"tool","tool_call_id":"call_z5jRPH4DQ7Wp3HdDUlZe8gGh","name":"parallel_local_search_two","content":"[two]
-      latest Anthropic model release notes"},{"role":"tool","tool_call_id":"call_DNlgqnadODDsyQkSuLcXZCX2","name":"parallel_local_search_three","content":"[three]
-      latest Gemini model release notes"},{"role":"user","content":"Analyze the tool
-      result. If requirements are met, provide the Final Answer. Otherwise, call the
-      next tool. Deliver only the answer without meta-commentary."}],"model":"gpt-5-nano","temperature":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"parallel_local_search_one","description":"Local
-      search tool #1 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_two","description":"Local
-      search tool #2 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}},{"type":"function","function":{"name":"parallel_local_search_three","description":"Local
-      search tool #3 for concurrency testing.","strict":true,"parameters":{"properties":{"query":{"description":"Search
-      query","title":"Query","type":"string"}},"required":["query"],"type":"object","additionalProperties":false}}}]}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '3136'
-      content-type:
-      - application/json
-      cookie:
-      - COOKIE-XXX
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-DB23sY0Ahpd1yAgLZ882KkA50Zljx\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1771521904,\n  \"model\": \"gpt-5-nano-2025-08-07\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"Results returned three items: the latest
-        OpenAI model release notes, the latest Anthropic model release notes, and
-        the latest Gemini model release notes.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\":
-        {\n    \"prompt_tokens\": 537,\n    \"completion_tokens\": 1383,\n    \"total_tokens\":
-        1920,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\": 0,\n      \"audio_tokens\":
-        0\n    },\n    \"completion_tokens_details\": {\n      \"reasoning_tokens\":
-        1344,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\": 0,\n
-        \     \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": null\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 19 Feb 2026 17:25:16 GMT
-      Server:
-      - cloudflare
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '12339'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cli/test_cli.py
+++ b/lib/crewai/tests/cli/test_cli.py
@@ -66,9 +66,7 @@ def mock_crew():
 def mock_get_crews(mock_crew):
    with mock.patch(
        "crewai.cli.reset_memories_command.get_crews", return_value=[mock_crew]
-    ) as mock_get_crew, mock.patch(
-        "crewai.cli.reset_memories_command.get_flows", return_value=[]
-    ):
+    ) as mock_get_crew:
        yield mock_get_crew


@@ -195,79 +193,6 @@ def test_reset_memory_from_many_crews(mock_get_crews, runner):
    assert call_count == 2, "reset_memories should have been called twice"


-@pytest.fixture
-def mock_flow():
-    _mock = mock.Mock()
-    _mock.name = "TestFlow"
-    _mock.memory = mock.Mock()
-    _mock.memory.reset = mock.Mock()
-    return _mock
-
-
-@pytest.fixture
-def mock_get_flows(mock_flow):
-    with mock.patch(
-        "crewai.cli.reset_memories_command.get_flows", return_value=[mock_flow]
-    ) as mock_get_flow, mock.patch(
-        "crewai.cli.reset_memories_command.get_crews", return_value=[]
-    ):
-        yield mock_get_flow
-
-
-def test_reset_flow_memory(mock_get_flows, mock_flow, runner):
-    result = runner.invoke(reset_memories, ["-m"])
-    mock_flow.memory.reset.assert_called_once()
-    assert "[Flow (TestFlow)] Memory has been reset." in result.output
-
-
-def test_reset_flow_all_memories(mock_get_flows, mock_flow, runner):
-    result = runner.invoke(reset_memories, ["-a"])
-    mock_flow.memory.reset.assert_called_once()
-    assert "[Flow (TestFlow)] Reset memories command has been completed." in result.output
-
-
-def test_reset_flow_knowledge_no_effect(mock_get_flows, mock_flow, runner):
-    result = runner.invoke(reset_memories, ["--knowledge"])
-    mock_flow.memory.reset.assert_not_called()
-    assert "[Flow (TestFlow)]" not in result.output
-
-
-def test_reset_no_crew_or_flow_found(runner):
-    with mock.patch(
-        "crewai.cli.reset_memories_command.get_crews", return_value=[]
-    ), mock.patch(
-        "crewai.cli.reset_memories_command.get_flows", return_value=[]
-    ):
-        result = runner.invoke(reset_memories, ["-m"])
-        assert "No crew or flow found." in result.output
-
-
-def test_reset_crew_and_flow_memory(mock_crew, mock_flow, runner):
-    with mock.patch(
-        "crewai.cli.reset_memories_command.get_crews", return_value=[mock_crew]
-    ), mock.patch(
-        "crewai.cli.reset_memories_command.get_flows", return_value=[mock_flow]
-    ):
-        result = runner.invoke(reset_memories, ["-m"])
-        mock_crew.reset_memories.assert_called_once_with(command_type="memory")
-        mock_flow.memory.reset.assert_called_once()
-        assert f"[Crew ({mock_crew.name})] Memory has been reset." in result.output
-        assert "[Flow (TestFlow)] Memory has been reset." in result.output
-
-
-def test_reset_flow_memory_none(runner):
-    mock_flow = mock.Mock()
-    mock_flow.name = "NoMemFlow"
-    mock_flow.memory = None
-    with mock.patch(
-        "crewai.cli.reset_memories_command.get_crews", return_value=[]
-    ), mock.patch(
-        "crewai.cli.reset_memories_command.get_flows", return_value=[mock_flow]
-    ):
-        result = runner.invoke(reset_memories, ["-m"])
-        assert "[Flow (NoMemFlow)] Memory has been reset." in result.output
-
-
 def test_reset_no_memory_flags(runner):
    result = runner.invoke(
        reset_memories,
--- a/lib/crewai/tests/test_context.py
+++ b/lib/crewai/tests/test_context.py
@@ -7,139 +7,215 @@ import pytest
 from crewai.context import (
    _platform_integration_token,
    get_platform_integration_token,
-    platform_integration_context,
-    reset_platform_integration_token,
+    platform_context,
    set_platform_integration_token,
 )


-@pytest.fixture
-def clean_context():
-    """Fixture to ensure clean context state for each test."""
-    _platform_integration_token.set(None)
-    yield
-    _platform_integration_token.set(None)
+class TestPlatformIntegrationToken:
+    def setup_method(self):
+        _platform_integration_token.set(None)

+    def teardown_method(self):
+        _platform_integration_token.set(None)

-class TestContextVariableCore:
-    """Test core context variable functionality (set/get/reset)."""
-
-    def test_set_and_get_token(self, clean_context):
-        """Test basic token setting and retrieval."""
+    @patch.dict(os.environ, {}, clear=True)
+    def test_set_platform_integration_token(self):
        test_token = "test-token-123"

        assert get_platform_integration_token() is None

-        context_token = set_platform_integration_token(test_token)
+        set_platform_integration_token(test_token)
+
        assert get_platform_integration_token() == test_token
-        assert context_token is not None

-    def test_reset_token_restores_previous_state(self, clean_context):
-        """Test that reset properly restores previous context state."""
-        token1 = "token-1"
-        token2 = "token-2"
+    def test_get_platform_integration_token_from_context_var(self):
+        test_token = "context-var-token"

-        context_token1 = set_platform_integration_token(token1)
-        assert get_platform_integration_token() == token1
+        _platform_integration_token.set(test_token)

-        context_token2 = set_platform_integration_token(token2)
-        assert get_platform_integration_token() == token2
+        assert get_platform_integration_token() == test_token

-        reset_platform_integration_token(context_token2)
-        assert get_platform_integration_token() == token1
+    @patch.dict(os.environ, {"CREWAI_PLATFORM_INTEGRATION_TOKEN": "env-token-456"})
+    def test_get_platform_integration_token_from_env_var(self):
+        assert _platform_integration_token.get() is None

-        reset_platform_integration_token(context_token1)
-        assert get_platform_integration_token() is None
-
-    def test_nested_token_management(self, clean_context):
-        """Test proper token management with deeply nested contexts."""
-        tokens = ["token-1", "token-2", "token-3"]
-        context_tokens = []
-
-        for token in tokens:
-            context_tokens.append(set_platform_integration_token(token))
-            assert get_platform_integration_token() == token
-
-        for i in range(len(tokens) - 1, 0, -1):
-            reset_platform_integration_token(context_tokens[i])
-            assert get_platform_integration_token() == tokens[i - 1]
-
-        reset_platform_integration_token(context_tokens[0])
-        assert get_platform_integration_token() is None
+        assert get_platform_integration_token() == "env-token-456"

    @patch.dict(os.environ, {"CREWAI_PLATFORM_INTEGRATION_TOKEN": "env-token"})
-    def test_context_module_ignores_environment_variables(self, clean_context):
-        """Test that context module only returns context values, not env vars."""
-        # Context module should not read environment variables
-        assert get_platform_integration_token() is None
+    def test_context_var_takes_precedence_over_env_var(self):
+        context_token = "context-token"

-        # Only context variable should be returned
-        set_platform_integration_token("context-token")
-        assert get_platform_integration_token() == "context-token"
+        set_platform_integration_token(context_token)

+        assert get_platform_integration_token() == context_token

-class TestPlatformIntegrationContext:
-    """Test platform integration context manager behavior."""
-
-    def test_basic_context_manager_usage(self, clean_context):
-        """Test basic context manager functionality."""
-        test_token = "context-token"
+    @patch.dict(os.environ, {}, clear=True)
+    def test_get_platform_integration_token_returns_none_when_not_set(self):
+        assert _platform_integration_token.get() is None

        assert get_platform_integration_token() is None

-        with platform_integration_context(test_token):
+    @patch.dict(os.environ, {}, clear=True)
+    def test_platform_context_manager_basic_usage(self):
+        test_token = "context-manager-token"
+
+        assert get_platform_integration_token() is None
+
+        with platform_context(test_token):
            assert get_platform_integration_token() == test_token

        assert get_platform_integration_token() is None

-    @pytest.mark.parametrize("falsy_value", [None, "", False, 0])
-    def test_falsy_values_return_nullcontext(self, clean_context, falsy_value):
-        """Test that falsy values return nullcontext (no-op)."""
-        # Set initial token to verify nullcontext doesn't affect it
-        initial_token = "initial-token"
-        initial_context_token = set_platform_integration_token(initial_token)
+    @patch.dict(os.environ, {}, clear=True)
+    def test_platform_context_manager_nested_contexts(self):
+        """Test nested platform_context context managers."""
+        outer_token = "outer-token"
+        inner_token = "inner-token"

-        try:
-            with platform_integration_context(falsy_value):
-                # Should preserve existing context (nullcontext behavior)
-                assert get_platform_integration_token() == initial_token
-
-            # Should still have initial token after nullcontext
-            assert get_platform_integration_token() == initial_token
-        finally:
-            reset_platform_integration_token(initial_context_token)
-
-    @pytest.mark.parametrize("truthy_value", ["token", "123", " ", "0"])
-    def test_truthy_values_create_context(self, clean_context, truthy_value):
-        """Test that truthy values create proper context."""
-        with platform_integration_context(truthy_value):
-            assert get_platform_integration_token() == truthy_value
-
-        # Should be cleaned up
        assert get_platform_integration_token() is None

-    def test_context_preserves_existing_token(self, clean_context):
-        """Test that context manager preserves existing token when exiting."""
-        existing_token = "existing-token"
+        with platform_context(outer_token):
+            assert get_platform_integration_token() == outer_token
+
+            with platform_context(inner_token):
+                assert get_platform_integration_token() == inner_token
+
+            assert get_platform_integration_token() == outer_token
+
+        assert get_platform_integration_token() is None
+
+    def test_platform_context_manager_preserves_existing_token(self):
+        """Test that platform_context preserves existing token when exiting."""
+        initial_token = "initial-token"
        context_token = "context-token"

-        existing_context_token = set_platform_integration_token(existing_token)
+        set_platform_integration_token(initial_token)
+        assert get_platform_integration_token() == initial_token

-        try:
-            with platform_integration_context(context_token):
+        with platform_context(context_token):
+            assert get_platform_integration_token() == context_token
+
+        assert get_platform_integration_token() == initial_token
+
+    def test_platform_context_manager_exception_handling(self):
+        """Test that platform_context properly resets token even when exception occurs."""
+        initial_token = "initial-token"
+        context_token = "context-token"
+
+        set_platform_integration_token(initial_token)
+
+        with pytest.raises(ValueError):
+            with platform_context(context_token):
                assert get_platform_integration_token() == context_token
+                raise ValueError("Test exception")

-            assert get_platform_integration_token() == existing_token
-        finally:
-            reset_platform_integration_token(existing_context_token)
+        assert get_platform_integration_token() == initial_token

-    def test_context_manager_return_type(self, clean_context):
-        """Test that context manager returns proper types for both cases."""
-        # Both should be usable as context managers
-        valid_ctx = platform_integration_context("token")
-        none_ctx = platform_integration_context(None)
+    @patch.dict(os.environ, {}, clear=True)
+    def test_platform_context_manager_with_none_initial_state(self):
+        """Test platform_context when initial state is None."""
+        context_token = "context-token"

-        assert hasattr(valid_ctx, '__enter__')
-        assert hasattr(valid_ctx, '__exit__')
-        assert hasattr(none_ctx, '__enter__')
-        assert hasattr(none_ctx, '__exit__')
+        assert get_platform_integration_token() is None
+
+        with pytest.raises(RuntimeError):
+            with platform_context(context_token):
+                assert get_platform_integration_token() == context_token
+                raise RuntimeError("Test exception")
+
+        assert get_platform_integration_token() is None
+
+    @patch.dict(os.environ, {"CREWAI_PLATFORM_INTEGRATION_TOKEN": "env-backup"})
+    def test_platform_context_with_env_fallback(self):
+        """Test platform_context interaction with environment variable fallback."""
+        context_token = "context-token"
+
+        assert get_platform_integration_token() == "env-backup"
+
+        with platform_context(context_token):
+            assert get_platform_integration_token() == context_token
+
+        assert get_platform_integration_token() == "env-backup"
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_multiple_sequential_context_managers(self):
+        """Test multiple sequential uses of platform_context."""
+        token1 = "token-1"
+        token2 = "token-2"
+        token3 = "token-3"
+
+        with platform_context(token1):
+            assert get_platform_integration_token() == token1
+
+        assert get_platform_integration_token() is None
+
+        with platform_context(token2):
+            assert get_platform_integration_token() == token2
+
+        assert get_platform_integration_token() is None
+
+        with platform_context(token3):
+            assert get_platform_integration_token() == token3
+
+        assert get_platform_integration_token() is None
+
+    def test_empty_string_token(self):
+        empty_token = ""
+
+        set_platform_integration_token(empty_token)
+        assert get_platform_integration_token() == ""
+
+        with platform_context(empty_token):
+            assert get_platform_integration_token() == ""
+
+    def test_special_characters_in_token(self):
+        special_token = "token-with-!@#$%^&*()_+-={}[]|\\:;\"'<>?,./"
+
+        set_platform_integration_token(special_token)
+        assert get_platform_integration_token() == special_token
+
+        with platform_context(special_token):
+            assert get_platform_integration_token() == special_token
+
+    def test_very_long_token(self):
+        long_token = "a" * 10000
+
+        set_platform_integration_token(long_token)
+        assert get_platform_integration_token() == long_token
+
+        with platform_context(long_token):
+            assert get_platform_integration_token() == long_token
+
+    @patch.dict(os.environ, {"CREWAI_PLATFORM_INTEGRATION_TOKEN": ""})
+    def test_empty_env_var(self):
+        assert _platform_integration_token.get() is None
+        assert get_platform_integration_token() == ""
+
+    @patch("crewai.context.os.getenv")
+    def test_env_var_access_error_handling(self, mock_getenv):
+        mock_getenv.side_effect = OSError("Environment access error")
+
+        with pytest.raises(OSError):
+            get_platform_integration_token()
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_context_var_isolation_between_tests(self):
+        """Test that context variable changes don't leak between test methods."""
+        test_token = "isolation-test-token"
+
+        assert get_platform_integration_token() is None
+
+        set_platform_integration_token(test_token)
+        assert get_platform_integration_token() == test_token
+
+    def test_context_manager_return_value(self):
+        """Test that platform_context can be used in with statement with return value."""
+        test_token = "return-value-token"
+
+        with platform_context(test_token):
+            assert get_platform_integration_token() == test_token
+
+        with platform_context(test_token) as ctx:
+            assert ctx is None
+            assert get_platform_integration_token() == test_token
--- a/lib/crewai/tests/test_flow.py
+++ b/lib/crewai/tests/test_flow.py
@@ -1772,74 +1772,3 @@ def test_cyclic_flow_multiple_or_listeners_fire_every_iteration():
            f"'{method}' should fire every iteration, "
            f"got {len(events)} fires: {execution_order}"
        )
-
-
-def test_cyclic_flow_works_with_persist_and_id_input():
-    """Cyclic router flows must complete all iterations when persistence is
-    enabled and 'id' is passed in inputs.
-
-    Regression test: passing ``inputs={"id": ...}`` with a persistence backend
-    previously caused ``_is_execution_resuming`` to be set even though
-    ``_completed_methods`` was empty.  The flag was never cleared during
-    execution, so on the second cycle iteration the resumption path in
-    ``_execute_single_listener`` short-circuited the router with ``(None, None)``
-    and the flow silently terminated after a single iteration.
-    """
-    from uuid import uuid4
-
-    from crewai.flow.persistence import SQLiteFlowPersistence
-
-    execution_order: list[str] = []
-
-    class PersistCyclicFlow(Flow):
-        iteration: int = 0
-        max_iterations: int = 3
-
-        @start()
-        def begin(self):
-            execution_order.append("begin")
-
-        @router(or_(begin, "capture"))
-        def classify(self):
-            self.iteration += 1
-            execution_order.append(f"classify_{self.iteration}")
-            if self.iteration <= self.max_iterations:
-                return "type_a"
-            return "exit"
-
-        @listen("type_a")
-        def handle(self):
-            execution_order.append(f"handle_{self.iteration}")
-
-        @listen(or_(handle,))
-        def send(self):
-            execution_order.append(f"send_{self.iteration}")
-
-        @listen("send")
-        def capture(self):
-            execution_order.append(f"capture_{self.iteration}")
-
-        @listen("exit")
-        def finish(self):
-            execution_order.append("finish")
-
-    persistence = SQLiteFlowPersistence()
-    flow = PersistCyclicFlow(persistence=persistence)
-    flow.kickoff(inputs={"id": str(uuid4())})
-
-    assert "finish" in execution_order, (
-        f"Flow should have reached 'finish', got: {execution_order}"
-    )
-    # The router fires max_iterations+1 times (3 cycles + the final "exit")
-    classify_events = [e for e in execution_order if e.startswith("classify_")]
-    assert len(classify_events) == 4, (
-        f"'classify' should fire 4 times (3 cycles + exit), "
-        f"got {len(classify_events)}: {execution_order}"
-    )
-    # The other methods fire once per "type_a" cycle
-    for method in ["handle", "send", "capture"]:
-        events = [e for e in execution_order if e.startswith(f"{method}_")]
-        assert len(events) == 3, (
-            f"'{method}' should fire 3 times, "
-            f"got {len(events)}: {execution_order}"
-        )
--- a/lib/crewai/tests/test_flow_ask.py
+++ b/lib/crewai/tests/test_flow_ask.py