diff --git a/lib/crewai/tests/cassettes/utilities/TestAgentExecuteTaskCompaction.test_agent_execute_task_compaction.yaml b/lib/crewai/tests/cassettes/utilities/TestAgentExecuteTaskCompaction.test_agent_execute_task_compaction.yaml
new file mode 100644
index 000000000..c1c03b9db
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestAgentExecuteTaskCompaction.test_agent_execute_task_compaction.yaml
@@ -0,0 +1,113 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Writer. You are a skilled
+      writer.\nYour personal goal is: Write concise content"},{"role":"user","content":"\nCurrent
+      Task: Write one sentence about the sun.\n\nThis is the expected criteria for
+      your final answer: A single sentence about the sun.\nyou MUST return the actual
+      complete content as the final answer, not a summary.\n\nProvide your complete
+      response:"}],"model":"gpt-4o-mini","temperature":0}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '453'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D7RxEngFVCbqdc7tNjV3VjeteqcwT\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770668124,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The sun is a massive ball of glowing
+        gas at the center of our solar system, providing light and warmth essential
+        for life on Earth.\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 78,\n    \"completion_tokens\":
+        27,\n    \"total_tokens\": 105,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 09 Feb 2026 20:15:25 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '664'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - SET-COOKIE-XXX
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestCrewKickoffCompaction.test_crew_kickoff_compaction_openai.yaml b/lib/crewai/tests/cassettes/utilities/TestCrewKickoffCompaction.test_crew_kickoff_compaction_openai.yaml
new file mode 100644
index 000000000..f565781f8
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestCrewKickoffCompaction.test_crew_kickoff_compaction_openai.yaml
@@ -0,0 +1,120 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Researcher. You are an
+      expert researcher.\nYour personal goal is: Find information about Python programming"},{"role":"user","content":"\nCurrent
+      Task: What is Python? Give a brief answer.\n\nThis is the expected criteria
+      for your final answer: A short description of Python.\nyou MUST return the actual
+      complete content as the final answer, not a summary.\n\nProvide your complete
+      response:"}],"model":"gpt-4o-mini","temperature":0}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '482'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D7RxRv3U0LCLf2iqf40wxOQsuiYFR\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770668137,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Python is a high-level, interpreted
+        programming language known for its readability and simplicity. It was created
+        by Guido van Rossum and first released in 1991. Python supports multiple programming
+        paradigms, including procedural, object-oriented, and functional programming.
+        It has a large standard library and is widely used for web development, data
+        analysis, artificial intelligence, scientific computing, and automation, among
+        other applications. Python's syntax emphasizes code readability, allowing
+        developers to express concepts in fewer lines of code compared to other languages.
+        Its active community and extensive ecosystem of libraries and frameworks make
+        it a popular choice for both beginners and experienced programmers.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        82,\n    \"completion_tokens\": 123,\n    \"total_tokens\": 205,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 09 Feb 2026 20:15:39 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '2467'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - SET-COOKIE-XXX
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectAnthropic.test_summarize_direct_anthropic.yaml b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectAnthropic.test_summarize_direct_anthropic.yaml
new file mode 100644
index 000000000..87260beb8
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectAnthropic.test_summarize_direct_anthropic.yaml
@@ -0,0 +1,136 @@
+interactions:
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Analyze the following
+      conversation and create a structured summary that preserves all information
+      needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
+      the latest developments in large language models. Focus on architecture improvements
+      and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
+      in large language models. Based on my knowledge, recent advances include:\n1.
+      Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
+      Flash Attention\n3. Better training data curation techniques\n4. Constitutional
+      AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
+      What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
+      Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
+      to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
+      advantage is computational efficiency: only a subset of experts is activated
+      per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
+      Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
+      has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
+      Key facts, data, tool results, or findings that must not be lost.\n4. **Next
+      Steps**: What should the agent do next based on the conversation?\n5. **Context
+      to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
+      in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
+      structured summary here]\n</summary>"}],"model":"claude-3-5-haiku-latest","stream":false,"system":"You
+      are a precise assistant that creates structured summaries of agent conversations.
+      You preserve critical context needed for seamless task continuation.","temperature":0}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '1870'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      x-api-key:
+      - X-API-KEY-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01SK3LP6RedPBmpvD1HfKD23","type":"message","role":"assistant","content":[{"type":"text","text":"<summary>\n1.
+        **Task Overview**:\n- Research latest developments in large language models\n-
+        Focus on architecture improvements and training techniques\n\n2. **Current
+        State**:\n- Initial research completed on broad developments\n- Currently
+        exploring Mixture of Experts (MoE) architectures in depth\n- Detailed discussion
+        of key MoE research papers initiated\n\n3. **Important Discoveries**:\nMoE
+        Architecture Insights:\n- Computational efficiency through selective expert
+        activation\n- Key research papers:\n  * Switch Transformers (Google, 2021)\n  *
+        GShard\n  * Mixtral (Mistral AI)\n- Main benefit: Only subset of experts activated
+        per token\n\n4. **Next Steps**:\n- Conduct deeper analysis of MoE architecture
+        mechanisms\n- Compare routing strategies across different MoE implementations\n-
+        Investigate performance metrics and scalability of MoE models\n\n5. **Context
+        to Preserve**:\n- Research Focus: Large Language Model Architectures\n- Specific
+        Interest: Mixture of Experts (MoE) Architectures\n- Key Researchers/Organizations:
+        Google, Mistral AI\n- Years of Significant Papers: 2021 onwards\n</summary>"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":400,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":270,"service_tier":"standard","inference_geo":"not_available"}}'
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 09 Feb 2026 20:18:41 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '4000'
+      anthropic-ratelimit-requests-remaining:
+      - '3999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-09T20:18:35Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '5639'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectGemini.test_summarize_direct_gemini.yaml b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectGemini.test_summarize_direct_gemini.yaml
new file mode 100644
index 000000000..9c2ac6795
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectGemini.test_summarize_direct_gemini.yaml
@@ -0,0 +1,103 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "Analyze the following conversation and
+      create a structured summary that preserves all information needed to continue
+      the task seamlessly.\n\n<conversation>\n[USER]: Research the latest developments
+      in large language models. Focus on architecture improvements and training techniques.\n\n[ASSISTANT]:
+      I''ll research the latest developments in large language models. Based on my
+      knowledge, recent advances include:\n1. Mixture of Experts (MoE) architectures\n2.
+      Improved attention mechanisms like Flash Attention\n3. Better training data
+      curation techniques\n4. Constitutional AI and RLHF improvements\n\n[USER]: Can
+      you go deeper on the MoE architectures? What are the key papers?\n\n[ASSISTANT]:
+      Key papers on Mixture of Experts:\n- Switch Transformers (Google, 2021) - simplified
+      MoE routing\n- GShard - scaling to 600B parameters\n- Mixtral (Mistral AI) -
+      open-source MoE model\nThe main advantage is computational efficiency: only
+      a subset of experts is activated per token.\n</conversation>\n\nCreate a summary
+      with these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
+      **Current State**: What has been completed so far? What step is the agent on?\n3.
+      **Important Discoveries**: Key facts, data, tool results, or findings that must
+      not be lost.\n4. **Next Steps**: What should the agent do next based on the
+      conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
+      code snippets, or details referenced in the conversation.\n\nWrap your entire
+      summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>"}],
+      "role": "user"}], "systemInstruction": {"parts": [{"text": "You are a precise
+      assistant that creates structured summaries of agent conversations. You preserve
+      critical context needed for seamless task continuation."}], "role": "user"},
+      "generationConfig": {"temperature": 0.0}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1895'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"text\": \"```xml\\n\\u003csummary\\u003e\\n**Task
+        Overview**: Research the latest developments in large language models, focusing
+        on architecture improvements and training techniques.\\n\\n**Current State**:
+        The agent has identified several key areas of advancement in LLMs: Mixture
+        of Experts (MoE) architectures, improved attention mechanisms (Flash Attention),
+        better training data curation, and Constitutional AI/RLHF improvements. The
+        user has requested a deeper dive into MoE architectures. The agent has provided
+        an initial overview of MoE architectures and listed some key papers.\\n\\n**Important
+        Discoveries**:\\n*   Key MoE papers: Switch Transformers (Google, 2021), GShard,
+        Mixtral (Mistral AI).\\n*   MoE advantage: Computational efficiency through
+        selective activation of experts.\\n\\n**Next Steps**: Continue researching
+        MoE architectures based on the user's request for more detail. The agent should
+        elaborate further on the listed papers and potentially find more recent or
+        relevant publications.\\n\\n**Context to Preserve**:\\n*   Focus areas: Architecture
+        improvements and training techniques for LLMs.\\n*   Specific architectures:
+        Mixture of Experts (MoE), Flash Attention.\\n*   Training techniques: Data
+        curation, Constitutional AI, RLHF.\\n*   Specific papers: Switch Transformers
+        (Google, 2021), GShard, Mixtral (Mistral AI).\\n\\u003c/summary\\u003e\\n```\"\n
+        \         }\n        ],\n        \"role\": \"model\"\n      },\n      \"finishReason\":
+        \"STOP\",\n      \"avgLogprobs\": -0.14186729703630721\n    }\n  ],\n  \"usageMetadata\":
+        {\n    \"promptTokenCount\": 373,\n    \"candidatesTokenCount\": 280,\n    \"totalTokenCount\":
+        653,\n    \"promptTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 373\n      }\n    ],\n    \"candidatesTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 280\n
+        \     }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash\",\n  \"responseId\":
+        \"GEGKabP3OcGH-8YPzZCj2Ao\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Mon, 09 Feb 2026 20:18:35 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=2310
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectOpenAI.test_summarize_direct_openai.yaml b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectOpenAI.test_summarize_direct_openai.yaml
new file mode 100644
index 000000000..931feb4ae
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectOpenAI.test_summarize_direct_openai.yaml
@@ -0,0 +1,148 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"You are a precise assistant that
+      creates structured summaries of agent conversations. You preserve critical context
+      needed for seamless task continuation."},{"role":"user","content":"Analyze the
+      following conversation and create a structured summary that preserves all information
+      needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
+      the latest developments in large language models. Focus on architecture improvements
+      and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
+      in large language models. Based on my knowledge, recent advances include:\n1.
+      Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
+      Flash Attention\n3. Better training data curation techniques\n4. Constitutional
+      AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
+      What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
+      Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
+      to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
+      advantage is computational efficiency: only a subset of experts is activated
+      per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
+      Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
+      has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
+      Key facts, data, tool results, or findings that must not be lost.\n4. **Next
+      Steps**: What should the agent do next based on the conversation?\n5. **Context
+      to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
+      in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
+      structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1844'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D7RxGISdQet8JsWImiwzHQ2S9gSD4\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770668126,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"<summary>\\n1. **Task Overview**: The
+        agent is tasked with researching the latest developments in large language
+        models, specifically focusing on architecture improvements and training techniques.\\n\\n2.
+        **Current State**: The agent has identified several recent advances in large
+        language models, including Mixture of Experts (MoE) architectures, improved
+        attention mechanisms, better training data curation techniques, and advancements
+        in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
+        **Important Discoveries**: \\n   - Recent advances in large language models
+        include:\\n     1. Mixture of Experts (MoE) architectures\\n     2. Improved
+        attention mechanisms like Flash Attention\\n     3. Better training data curation
+        techniques\\n     4. Constitutional AI and RLHF improvements\\n   - Key papers
+        on Mixture of Experts:\\n     - Switch Transformers (Google, 2021) - simplified
+        MoE routing\\n     - GShard - scaling to 600B parameters\\n     - Mixtral
+        (Mistral AI) - open-source MoE model\\n   - The main advantage of MoE architectures
+        is computational efficiency, as only a subset of experts is activated per
+        token.\\n\\n4. **Next Steps**: The agent should delve deeper into the Mixture
+        of Experts architectures, reviewing the key papers mentioned and summarizing
+        their contributions and implications for large language models.\\n\\n5. **Context
+        to Preserve**: \\n   - Key papers: \\n     - Switch Transformers (Google,
+        2021)\\n     - GShard\\n     - Mixtral (Mistral AI)\\n   - Focus on computational
+        efficiency of MoE architectures.\\n</summary>\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
+        \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 368,\n    \"completion_tokens\":
+        328,\n    \"total_tokens\": 696,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 09 Feb 2026 20:15:32 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '5395'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - SET-COOKIE-XXX
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestSummarizePreservesFiles.test_summarize_preserves_files_integration.yaml b/lib/crewai/tests/cassettes/utilities/TestSummarizePreservesFiles.test_summarize_preserves_files_integration.yaml
new file mode 100644
index 000000000..2d00899cf
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestSummarizePreservesFiles.test_summarize_preserves_files_integration.yaml
@@ -0,0 +1,145 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"You are a precise assistant that
+      creates structured summaries of agent conversations. You preserve critical context
+      needed for seamless task continuation."},{"role":"user","content":"Analyze the
+      following conversation and create a structured summary that preserves all information
+      needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
+      the latest developments in large language models. Focus on architecture improvements
+      and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
+      in large language models. Based on my knowledge, recent advances include:\n1.
+      Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
+      Flash Attention\n3. Better training data curation techniques\n4. Constitutional
+      AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
+      What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
+      Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
+      to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
+      advantage is computational efficiency: only a subset of experts is activated
+      per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
+      Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
+      has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
+      Key facts, data, tool results, or findings that must not be lost.\n4. **Next
+      Steps**: What should the agent do next based on the conversation?\n5. **Context
+      to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
+      in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
+      structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1844'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D7RxM4n36QoACHrC0QocV1pXIwvtD\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770668132,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"<summary>\\n1. **Task Overview**: The
+        user has requested research on the latest developments in large language models,
+        specifically focusing on architecture improvements and training techniques.\\n\\n2.
+        **Current State**: The assistant has identified several recent advances in
+        large language models, including Mixture of Experts (MoE) architectures, improved
+        attention mechanisms, better training data curation techniques, and advancements
+        in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
+        **Important Discoveries**: \\n   - Key papers on Mixture of Experts (MoE)
+        architectures:\\n     - \\\"Switch Transformers\\\" (Google, 2021) - simplified
+        MoE routing.\\n     - \\\"GShard\\\" - scaling to 600B parameters.\\n     -
+        \\\"Mixtral\\\" (Mistral AI) - open-source MoE model.\\n   - The main advantage
+        of MoE architectures is computational efficiency, as only a subset of experts
+        is activated per token.\\n\\n4. **Next Steps**: The assistant should delve
+        deeper into the Mixture of Experts architectures, potentially summarizing
+        the findings from the key papers mentioned.\\n\\n5. **Context to Preserve**:
+        \\n   - Key papers: \\\"Switch Transformers,\\\" \\\"GShard,\\\" \\\"Mixtral.\\\"\\n
+        \  - Notable organizations: Google, Mistral AI.\\n   - Focus areas: MoE architectures,
+        computational efficiency.\\n</summary>\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 368,\n    \"completion_tokens\":
+        275,\n    \"total_tokens\": 643,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 09 Feb 2026 20:15:36 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '4188'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - SET-COOKIE-XXX
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1