diff --git a/lib/crewai/tests/cassettes/utilities/TestAgentExecuteTaskCompaction.test_agent_execute_task_compaction.yaml b/lib/crewai/tests/cassettes/utilities/TestAgentExecuteTaskCompaction.test_agent_execute_task_compaction.yaml
new file mode 100644
index 000000000..c1c03b9db
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestAgentExecuteTaskCompaction.test_agent_execute_task_compaction.yaml
@@ -0,0 +1,113 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","content":"You are Writer. You are a skilled
+ writer.\nYour personal goal is: Write concise content"},{"role":"user","content":"\nCurrent
+ Task: Write one sentence about the sun.\n\nThis is the expected criteria for
+ your final answer: A single sentence about the sun.\nyou MUST return the actual
+ complete content as the final answer, not a summary.\n\nProvide your complete
+ response:"}],"model":"gpt-4o-mini","temperature":0}'
+ headers:
+ User-Agent:
+ - X-USER-AGENT-XXX
+ accept:
+ - application/json
+ accept-encoding:
+ - ACCEPT-ENCODING-XXX
+ authorization:
+ - AUTHORIZATION-XXX
+ connection:
+ - keep-alive
+ content-length:
+ - '453'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ x-stainless-arch:
+ - X-STAINLESS-ARCH-XXX
+ x-stainless-async:
+ - 'false'
+ x-stainless-lang:
+ - python
+ x-stainless-os:
+ - X-STAINLESS-OS-XXX
+ x-stainless-package-version:
+ - 1.83.0
+ x-stainless-read-timeout:
+ - X-STAINLESS-READ-TIMEOUT-XXX
+ x-stainless-retry-count:
+ - '0'
+ x-stainless-runtime:
+ - CPython
+ x-stainless-runtime-version:
+ - 3.13.3
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: "{\n \"id\": \"chatcmpl-D7RxEngFVCbqdc7tNjV3VjeteqcwT\",\n \"object\":
+ \"chat.completion\",\n \"created\": 1770668124,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
+ \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
+ \"assistant\",\n \"content\": \"The sun is a massive ball of glowing
+ gas at the center of our solar system, providing light and warmth essential
+ for life on Earth.\",\n \"refusal\": null,\n \"annotations\":
+ []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
+ \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 78,\n \"completion_tokens\":
+ 27,\n \"total_tokens\": 105,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
+ 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
+ {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
+ 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
+ \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+ headers:
+ CF-RAY:
+ - CF-RAY-XXX
+ Connection:
+ - keep-alive
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 09 Feb 2026 20:15:25 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - STS-XXX
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - X-CONTENT-TYPE-XXX
+ access-control-expose-headers:
+ - ACCESS-CONTROL-XXX
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - OPENAI-ORG-XXX
+ openai-processing-ms:
+ - '664'
+ openai-project:
+ - OPENAI-PROJECT-XXX
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - SET-COOKIE-XXX
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - X-RATELIMIT-LIMIT-REQUESTS-XXX
+ x-ratelimit-limit-tokens:
+ - X-RATELIMIT-LIMIT-TOKENS-XXX
+ x-ratelimit-remaining-requests:
+ - X-RATELIMIT-REMAINING-REQUESTS-XXX
+ x-ratelimit-remaining-tokens:
+ - X-RATELIMIT-REMAINING-TOKENS-XXX
+ x-ratelimit-reset-requests:
+ - X-RATELIMIT-RESET-REQUESTS-XXX
+ x-ratelimit-reset-tokens:
+ - X-RATELIMIT-RESET-TOKENS-XXX
+ x-request-id:
+ - X-REQUEST-ID-XXX
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestCrewKickoffCompaction.test_crew_kickoff_compaction_openai.yaml b/lib/crewai/tests/cassettes/utilities/TestCrewKickoffCompaction.test_crew_kickoff_compaction_openai.yaml
new file mode 100644
index 000000000..f565781f8
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestCrewKickoffCompaction.test_crew_kickoff_compaction_openai.yaml
@@ -0,0 +1,120 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","content":"You are Researcher. You are an
+ expert researcher.\nYour personal goal is: Find information about Python programming"},{"role":"user","content":"\nCurrent
+ Task: What is Python? Give a brief answer.\n\nThis is the expected criteria
+ for your final answer: A short description of Python.\nyou MUST return the actual
+ complete content as the final answer, not a summary.\n\nProvide your complete
+ response:"}],"model":"gpt-4o-mini","temperature":0}'
+ headers:
+ User-Agent:
+ - X-USER-AGENT-XXX
+ accept:
+ - application/json
+ accept-encoding:
+ - ACCEPT-ENCODING-XXX
+ authorization:
+ - AUTHORIZATION-XXX
+ connection:
+ - keep-alive
+ content-length:
+ - '482'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ x-stainless-arch:
+ - X-STAINLESS-ARCH-XXX
+ x-stainless-async:
+ - 'false'
+ x-stainless-lang:
+ - python
+ x-stainless-os:
+ - X-STAINLESS-OS-XXX
+ x-stainless-package-version:
+ - 1.83.0
+ x-stainless-read-timeout:
+ - X-STAINLESS-READ-TIMEOUT-XXX
+ x-stainless-retry-count:
+ - '0'
+ x-stainless-runtime:
+ - CPython
+ x-stainless-runtime-version:
+ - 3.13.3
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: "{\n \"id\": \"chatcmpl-D7RxRv3U0LCLf2iqf40wxOQsuiYFR\",\n \"object\":
+ \"chat.completion\",\n \"created\": 1770668137,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
+ \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
+ \"assistant\",\n \"content\": \"Python is a high-level, interpreted
+ programming language known for its readability and simplicity. It was created
+ by Guido van Rossum and first released in 1991. Python supports multiple programming
+ paradigms, including procedural, object-oriented, and functional programming.
+ It has a large standard library and is widely used for web development, data
+ analysis, artificial intelligence, scientific computing, and automation, among
+ other applications. Python's syntax emphasizes code readability, allowing
+ developers to express concepts in fewer lines of code compared to other languages.
+ Its active community and extensive ecosystem of libraries and frameworks make
+ it a popular choice for both beginners and experienced programmers.\",\n \"refusal\":
+ null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
+ \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
+ 82,\n \"completion_tokens\": 123,\n \"total_tokens\": 205,\n \"prompt_tokens_details\":
+ {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
+ {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
+ 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
+ \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+ headers:
+ CF-RAY:
+ - CF-RAY-XXX
+ Connection:
+ - keep-alive
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 09 Feb 2026 20:15:39 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - STS-XXX
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - X-CONTENT-TYPE-XXX
+ access-control-expose-headers:
+ - ACCESS-CONTROL-XXX
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - OPENAI-ORG-XXX
+ openai-processing-ms:
+ - '2467'
+ openai-project:
+ - OPENAI-PROJECT-XXX
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - SET-COOKIE-XXX
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - X-RATELIMIT-LIMIT-REQUESTS-XXX
+ x-ratelimit-limit-tokens:
+ - X-RATELIMIT-LIMIT-TOKENS-XXX
+ x-ratelimit-remaining-requests:
+ - X-RATELIMIT-REMAINING-REQUESTS-XXX
+ x-ratelimit-remaining-tokens:
+ - X-RATELIMIT-REMAINING-TOKENS-XXX
+ x-ratelimit-reset-requests:
+ - X-RATELIMIT-RESET-REQUESTS-XXX
+ x-ratelimit-reset-tokens:
+ - X-RATELIMIT-RESET-TOKENS-XXX
+ x-request-id:
+ - X-REQUEST-ID-XXX
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectAnthropic.test_summarize_direct_anthropic.yaml b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectAnthropic.test_summarize_direct_anthropic.yaml
new file mode 100644
index 000000000..87260beb8
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectAnthropic.test_summarize_direct_anthropic.yaml
@@ -0,0 +1,136 @@
+interactions:
+- request:
+ body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Analyze the following
+ conversation and create a structured summary that preserves all information
+ needed to continue the task seamlessly.\n\n\n[USER]: Research
+ the latest developments in large language models. Focus on architecture improvements
+ and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
+ in large language models. Based on my knowledge, recent advances include:\n1.
+ Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
+ Flash Attention\n3. Better training data curation techniques\n4. Constitutional
+ AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
+ What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
+ Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
+ to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
+ advantage is computational efficiency: only a subset of experts is activated
+ per token.\n\n\nCreate a summary with these sections:\n1. **Task
+ Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
+ has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
+ Key facts, data, tool results, or findings that must not be lost.\n4. **Next
+ Steps**: What should the agent do next based on the conversation?\n5. **Context
+ to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
+ in the conversation.\n\nWrap your entire summary in tags.\n\n\n[Your
+ structured summary here]\n"}],"model":"claude-3-5-haiku-latest","stream":false,"system":"You
+ are a precise assistant that creates structured summaries of agent conversations.
+ You preserve critical context needed for seamless task continuation.","temperature":0}'
+ headers:
+ User-Agent:
+ - X-USER-AGENT-XXX
+ accept:
+ - application/json
+ accept-encoding:
+ - ACCEPT-ENCODING-XXX
+ anthropic-version:
+ - '2023-06-01'
+ connection:
+ - keep-alive
+ content-length:
+ - '1870'
+ content-type:
+ - application/json
+ host:
+ - api.anthropic.com
+ x-api-key:
+ - X-API-KEY-XXX
+ x-stainless-arch:
+ - X-STAINLESS-ARCH-XXX
+ x-stainless-async:
+ - 'false'
+ x-stainless-lang:
+ - python
+ x-stainless-os:
+ - X-STAINLESS-OS-XXX
+ x-stainless-package-version:
+ - 0.73.0
+ x-stainless-retry-count:
+ - '0'
+ x-stainless-runtime:
+ - CPython
+ x-stainless-runtime-version:
+ - 3.13.3
+ x-stainless-timeout:
+ - NOT_GIVEN
+ method: POST
+ uri: https://api.anthropic.com/v1/messages
+ response:
+ body:
+ string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01SK3LP6RedPBmpvD1HfKD23","type":"message","role":"assistant","content":[{"type":"text","text":"\n1.
+ **Task Overview**:\n- Research latest developments in large language models\n-
+ Focus on architecture improvements and training techniques\n\n2. **Current
+ State**:\n- Initial research completed on broad developments\n- Currently
+ exploring Mixture of Experts (MoE) architectures in depth\n- Detailed discussion
+ of key MoE research papers initiated\n\n3. **Important Discoveries**:\nMoE
+ Architecture Insights:\n- Computational efficiency through selective expert
+ activation\n- Key research papers:\n * Switch Transformers (Google, 2021)\n *
+ GShard\n * Mixtral (Mistral AI)\n- Main benefit: Only subset of experts activated
+ per token\n\n4. **Next Steps**:\n- Conduct deeper analysis of MoE architecture
+ mechanisms\n- Compare routing strategies across different MoE implementations\n-
+ Investigate performance metrics and scalability of MoE models\n\n5. **Context
+ to Preserve**:\n- Research Focus: Large Language Model Architectures\n- Specific
+ Interest: Mixture of Experts (MoE) Architectures\n- Key Researchers/Organizations:
+ Google, Mistral AI\n- Years of Significant Papers: 2021 onwards\n"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":400,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":270,"service_tier":"standard","inference_geo":"not_available"}}'
+ headers:
+ CF-RAY:
+ - CF-RAY-XXX
+ Connection:
+ - keep-alive
+ Content-Security-Policy:
+ - CSP-FILTERED
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 09 Feb 2026 20:18:41 GMT
+ Server:
+ - cloudflare
+ Transfer-Encoding:
+ - chunked
+ X-Robots-Tag:
+ - none
+ anthropic-organization-id:
+ - ANTHROPIC-ORGANIZATION-ID-XXX
+ anthropic-ratelimit-input-tokens-limit:
+ - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+ anthropic-ratelimit-input-tokens-remaining:
+ - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+ anthropic-ratelimit-input-tokens-reset:
+ - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+ anthropic-ratelimit-output-tokens-limit:
+ - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+ anthropic-ratelimit-output-tokens-remaining:
+ - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+ anthropic-ratelimit-output-tokens-reset:
+ - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+ anthropic-ratelimit-requests-limit:
+ - '4000'
+ anthropic-ratelimit-requests-remaining:
+ - '3999'
+ anthropic-ratelimit-requests-reset:
+ - '2026-02-09T20:18:35Z'
+ anthropic-ratelimit-tokens-limit:
+ - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+ anthropic-ratelimit-tokens-remaining:
+ - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+ anthropic-ratelimit-tokens-reset:
+ - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+ cf-cache-status:
+ - DYNAMIC
+ request-id:
+ - REQUEST-ID-XXX
+ strict-transport-security:
+ - STS-XXX
+ x-envoy-upstream-service-time:
+ - '5639'
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectGemini.test_summarize_direct_gemini.yaml b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectGemini.test_summarize_direct_gemini.yaml
new file mode 100644
index 000000000..9c2ac6795
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectGemini.test_summarize_direct_gemini.yaml
@@ -0,0 +1,103 @@
+interactions:
+- request:
+ body: '{"contents": [{"parts": [{"text": "Analyze the following conversation and
+ create a structured summary that preserves all information needed to continue
+ the task seamlessly.\n\n\n[USER]: Research the latest developments
+ in large language models. Focus on architecture improvements and training techniques.\n\n[ASSISTANT]:
+ I''ll research the latest developments in large language models. Based on my
+ knowledge, recent advances include:\n1. Mixture of Experts (MoE) architectures\n2.
+ Improved attention mechanisms like Flash Attention\n3. Better training data
+ curation techniques\n4. Constitutional AI and RLHF improvements\n\n[USER]: Can
+ you go deeper on the MoE architectures? What are the key papers?\n\n[ASSISTANT]:
+ Key papers on Mixture of Experts:\n- Switch Transformers (Google, 2021) - simplified
+ MoE routing\n- GShard - scaling to 600B parameters\n- Mixtral (Mistral AI) -
+ open-source MoE model\nThe main advantage is computational efficiency: only
+ a subset of experts is activated per token.\n\n\nCreate a summary
+ with these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
+ **Current State**: What has been completed so far? What step is the agent on?\n3.
+ **Important Discoveries**: Key facts, data, tool results, or findings that must
+ not be lost.\n4. **Next Steps**: What should the agent do next based on the
+ conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
+ code snippets, or details referenced in the conversation.\n\nWrap your entire
+ summary in tags.\n\n\n[Your structured summary here]\n"}],
+ "role": "user"}], "systemInstruction": {"parts": [{"text": "You are a precise
+ assistant that creates structured summaries of agent conversations. You preserve
+ critical context needed for seamless task continuation."}], "role": "user"},
+ "generationConfig": {"temperature": 0.0}}'
+ headers:
+ User-Agent:
+ - X-USER-AGENT-XXX
+ accept:
+ - '*/*'
+ accept-encoding:
+ - ACCEPT-ENCODING-XXX
+ connection:
+ - keep-alive
+ content-length:
+ - '1895'
+ content-type:
+ - application/json
+ host:
+ - generativelanguage.googleapis.com
+ x-goog-api-client:
+ - google-genai-sdk/1.49.0 gl-python/3.13.3
+ x-goog-api-key:
+ - X-GOOG-API-KEY-XXX
+ method: POST
+ uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
+ response:
+ body:
+ string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
+ [\n {\n \"text\": \"```xml\\n\\u003csummary\\u003e\\n**Task
+ Overview**: Research the latest developments in large language models, focusing
+ on architecture improvements and training techniques.\\n\\n**Current State**:
+ The agent has identified several key areas of advancement in LLMs: Mixture
+ of Experts (MoE) architectures, improved attention mechanisms (Flash Attention),
+ better training data curation, and Constitutional AI/RLHF improvements. The
+ user has requested a deeper dive into MoE architectures. The agent has provided
+ an initial overview of MoE architectures and listed some key papers.\\n\\n**Important
+ Discoveries**:\\n* Key MoE papers: Switch Transformers (Google, 2021), GShard,
+ Mixtral (Mistral AI).\\n* MoE advantage: Computational efficiency through
+ selective activation of experts.\\n\\n**Next Steps**: Continue researching
+ MoE architectures based on the user's request for more detail. The agent should
+ elaborate further on the listed papers and potentially find more recent or
+ relevant publications.\\n\\n**Context to Preserve**:\\n* Focus areas: Architecture
+ improvements and training techniques for LLMs.\\n* Specific architectures:
+ Mixture of Experts (MoE), Flash Attention.\\n* Training techniques: Data
+ curation, Constitutional AI, RLHF.\\n* Specific papers: Switch Transformers
+ (Google, 2021), GShard, Mixtral (Mistral AI).\\n\\u003c/summary\\u003e\\n```\"\n
+ \ }\n ],\n \"role\": \"model\"\n },\n \"finishReason\":
+ \"STOP\",\n \"avgLogprobs\": -0.14186729703630721\n }\n ],\n \"usageMetadata\":
+ {\n \"promptTokenCount\": 373,\n \"candidatesTokenCount\": 280,\n \"totalTokenCount\":
+ 653,\n \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n
+ \ \"tokenCount\": 373\n }\n ],\n \"candidatesTokensDetails\":
+ [\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 280\n
+ \ }\n ]\n },\n \"modelVersion\": \"gemini-2.0-flash\",\n \"responseId\":
+ \"GEGKabP3OcGH-8YPzZCj2Ao\"\n}\n"
+ headers:
+ Alt-Svc:
+ - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+ Content-Type:
+ - application/json; charset=UTF-8
+ Date:
+ - Mon, 09 Feb 2026 20:18:35 GMT
+ Server:
+ - scaffolding on HTTPServer2
+ Server-Timing:
+ - gfet4t7; dur=2310
+ Transfer-Encoding:
+ - chunked
+ Vary:
+ - Origin
+ - X-Origin
+ - Referer
+ X-Content-Type-Options:
+ - X-CONTENT-TYPE-XXX
+ X-Frame-Options:
+ - X-FRAME-OPTIONS-XXX
+ X-XSS-Protection:
+ - '0'
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectOpenAI.test_summarize_direct_openai.yaml b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectOpenAI.test_summarize_direct_openai.yaml
new file mode 100644
index 000000000..931feb4ae
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestSummarizeDirectOpenAI.test_summarize_direct_openai.yaml
@@ -0,0 +1,148 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","content":"You are a precise assistant that
+ creates structured summaries of agent conversations. You preserve critical context
+ needed for seamless task continuation."},{"role":"user","content":"Analyze the
+ following conversation and create a structured summary that preserves all information
+ needed to continue the task seamlessly.\n\n\n[USER]: Research
+ the latest developments in large language models. Focus on architecture improvements
+ and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
+ in large language models. Based on my knowledge, recent advances include:\n1.
+ Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
+ Flash Attention\n3. Better training data curation techniques\n4. Constitutional
+ AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
+ What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
+ Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
+ to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
+ advantage is computational efficiency: only a subset of experts is activated
+ per token.\n\n\nCreate a summary with these sections:\n1. **Task
+ Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
+ has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
+ Key facts, data, tool results, or findings that must not be lost.\n4. **Next
+ Steps**: What should the agent do next based on the conversation?\n5. **Context
+ to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
+ in the conversation.\n\nWrap your entire summary in tags.\n\n\n[Your
+ structured summary here]\n"}],"model":"gpt-4o-mini","temperature":0}'
+ headers:
+ User-Agent:
+ - X-USER-AGENT-XXX
+ accept:
+ - application/json
+ accept-encoding:
+ - ACCEPT-ENCODING-XXX
+ authorization:
+ - AUTHORIZATION-XXX
+ connection:
+ - keep-alive
+ content-length:
+ - '1844'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ x-stainless-arch:
+ - X-STAINLESS-ARCH-XXX
+ x-stainless-async:
+ - 'false'
+ x-stainless-lang:
+ - python
+ x-stainless-os:
+ - X-STAINLESS-OS-XXX
+ x-stainless-package-version:
+ - 1.83.0
+ x-stainless-read-timeout:
+ - X-STAINLESS-READ-TIMEOUT-XXX
+ x-stainless-retry-count:
+ - '0'
+ x-stainless-runtime:
+ - CPython
+ x-stainless-runtime-version:
+ - 3.13.3
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: "{\n \"id\": \"chatcmpl-D7RxGISdQet8JsWImiwzHQ2S9gSD4\",\n \"object\":
+ \"chat.completion\",\n \"created\": 1770668126,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
+ \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
+ \"assistant\",\n \"content\": \"\\n1. **Task Overview**: The
+ agent is tasked with researching the latest developments in large language
+ models, specifically focusing on architecture improvements and training techniques.\\n\\n2.
+ **Current State**: The agent has identified several recent advances in large
+ language models, including Mixture of Experts (MoE) architectures, improved
+ attention mechanisms, better training data curation techniques, and advancements
+ in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
+ **Important Discoveries**: \\n - Recent advances in large language models
+ include:\\n 1. Mixture of Experts (MoE) architectures\\n 2. Improved
+ attention mechanisms like Flash Attention\\n 3. Better training data curation
+ techniques\\n 4. Constitutional AI and RLHF improvements\\n - Key papers
+ on Mixture of Experts:\\n - Switch Transformers (Google, 2021) - simplified
+ MoE routing\\n - GShard - scaling to 600B parameters\\n - Mixtral
+ (Mistral AI) - open-source MoE model\\n - The main advantage of MoE architectures
+ is computational efficiency, as only a subset of experts is activated per
+ token.\\n\\n4. **Next Steps**: The agent should delve deeper into the Mixture
+ of Experts architectures, reviewing the key papers mentioned and summarizing
+ their contributions and implications for large language models.\\n\\n5. **Context
+ to Preserve**: \\n - Key papers: \\n - Switch Transformers (Google,
+ 2021)\\n - GShard\\n - Mixtral (Mistral AI)\\n - Focus on computational
+ efficiency of MoE architectures.\\n\",\n \"refusal\": null,\n
+ \ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\":
+ \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 368,\n \"completion_tokens\":
+ 328,\n \"total_tokens\": 696,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
+ 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
+ {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
+ 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
+ \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+ headers:
+ CF-RAY:
+ - CF-RAY-XXX
+ Connection:
+ - keep-alive
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 09 Feb 2026 20:15:32 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - STS-XXX
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - X-CONTENT-TYPE-XXX
+ access-control-expose-headers:
+ - ACCESS-CONTROL-XXX
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - OPENAI-ORG-XXX
+ openai-processing-ms:
+ - '5395'
+ openai-project:
+ - OPENAI-PROJECT-XXX
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - SET-COOKIE-XXX
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - X-RATELIMIT-LIMIT-REQUESTS-XXX
+ x-ratelimit-limit-tokens:
+ - X-RATELIMIT-LIMIT-TOKENS-XXX
+ x-ratelimit-remaining-requests:
+ - X-RATELIMIT-REMAINING-REQUESTS-XXX
+ x-ratelimit-remaining-tokens:
+ - X-RATELIMIT-REMAINING-TOKENS-XXX
+ x-ratelimit-reset-requests:
+ - X-RATELIMIT-RESET-REQUESTS-XXX
+ x-ratelimit-reset-tokens:
+ - X-RATELIMIT-RESET-TOKENS-XXX
+ x-request-id:
+ - X-REQUEST-ID-XXX
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/utilities/TestSummarizePreservesFiles.test_summarize_preserves_files_integration.yaml b/lib/crewai/tests/cassettes/utilities/TestSummarizePreservesFiles.test_summarize_preserves_files_integration.yaml
new file mode 100644
index 000000000..2d00899cf
--- /dev/null
+++ b/lib/crewai/tests/cassettes/utilities/TestSummarizePreservesFiles.test_summarize_preserves_files_integration.yaml
@@ -0,0 +1,145 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","content":"You are a precise assistant that
+ creates structured summaries of agent conversations. You preserve critical context
+ needed for seamless task continuation."},{"role":"user","content":"Analyze the
+ following conversation and create a structured summary that preserves all information
+ needed to continue the task seamlessly.\n\n\n[USER]: Research
+ the latest developments in large language models. Focus on architecture improvements
+ and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
+ in large language models. Based on my knowledge, recent advances include:\n1.
+ Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
+ Flash Attention\n3. Better training data curation techniques\n4. Constitutional
+ AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
+ What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
+ Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
+ to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
+ advantage is computational efficiency: only a subset of experts is activated
+ per token.\n\n\nCreate a summary with these sections:\n1. **Task
+ Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
+ has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
+ Key facts, data, tool results, or findings that must not be lost.\n4. **Next
+ Steps**: What should the agent do next based on the conversation?\n5. **Context
+ to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
+ in the conversation.\n\nWrap your entire summary in tags.\n\n\n[Your
+ structured summary here]\n"}],"model":"gpt-4o-mini","temperature":0}'
+ headers:
+ User-Agent:
+ - X-USER-AGENT-XXX
+ accept:
+ - application/json
+ accept-encoding:
+ - ACCEPT-ENCODING-XXX
+ authorization:
+ - AUTHORIZATION-XXX
+ connection:
+ - keep-alive
+ content-length:
+ - '1844'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ x-stainless-arch:
+ - X-STAINLESS-ARCH-XXX
+ x-stainless-async:
+ - 'false'
+ x-stainless-lang:
+ - python
+ x-stainless-os:
+ - X-STAINLESS-OS-XXX
+ x-stainless-package-version:
+ - 1.83.0
+ x-stainless-read-timeout:
+ - X-STAINLESS-READ-TIMEOUT-XXX
+ x-stainless-retry-count:
+ - '0'
+ x-stainless-runtime:
+ - CPython
+ x-stainless-runtime-version:
+ - 3.13.3
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: "{\n \"id\": \"chatcmpl-D7RxM4n36QoACHrC0QocV1pXIwvtD\",\n \"object\":
+ \"chat.completion\",\n \"created\": 1770668132,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
+ \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
+ \"assistant\",\n \"content\": \"\\n1. **Task Overview**: The
+ user has requested research on the latest developments in large language models,
+ specifically focusing on architecture improvements and training techniques.\\n\\n2.
+ **Current State**: The assistant has identified several recent advances in
+ large language models, including Mixture of Experts (MoE) architectures, improved
+ attention mechanisms, better training data curation techniques, and advancements
+ in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
+ **Important Discoveries**: \\n - Key papers on Mixture of Experts (MoE)
+ architectures:\\n - \\\"Switch Transformers\\\" (Google, 2021) - simplified
+ MoE routing.\\n - \\\"GShard\\\" - scaling to 600B parameters.\\n -
+ \\\"Mixtral\\\" (Mistral AI) - open-source MoE model.\\n - The main advantage
+ of MoE architectures is computational efficiency, as only a subset of experts
+ is activated per token.\\n\\n4. **Next Steps**: The assistant should delve
+ deeper into the Mixture of Experts architectures, potentially summarizing
+ the findings from the key papers mentioned.\\n\\n5. **Context to Preserve**:
+ \\n - Key papers: \\\"Switch Transformers,\\\" \\\"GShard,\\\" \\\"Mixtral.\\\"\\n
+ \ - Notable organizations: Google, Mistral AI.\\n - Focus areas: MoE architectures,
+ computational efficiency.\\n\",\n \"refusal\": null,\n \"annotations\":
+ []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
+ \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 368,\n \"completion_tokens\":
+ 275,\n \"total_tokens\": 643,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
+ 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
+ {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
+ 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
+ \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+ headers:
+ CF-RAY:
+ - CF-RAY-XXX
+ Connection:
+ - keep-alive
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 09 Feb 2026 20:15:36 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - STS-XXX
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - X-CONTENT-TYPE-XXX
+ access-control-expose-headers:
+ - ACCESS-CONTROL-XXX
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - OPENAI-ORG-XXX
+ openai-processing-ms:
+ - '4188'
+ openai-project:
+ - OPENAI-PROJECT-XXX
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - SET-COOKIE-XXX
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - X-RATELIMIT-LIMIT-REQUESTS-XXX
+ x-ratelimit-limit-tokens:
+ - X-RATELIMIT-LIMIT-TOKENS-XXX
+ x-ratelimit-remaining-requests:
+ - X-RATELIMIT-REMAINING-REQUESTS-XXX
+ x-ratelimit-remaining-tokens:
+ - X-RATELIMIT-REMAINING-TOKENS-XXX
+ x-ratelimit-reset-requests:
+ - X-RATELIMIT-RESET-REQUESTS-XXX
+ x-ratelimit-reset-tokens:
+ - X-RATELIMIT-RESET-TOKENS-XXX
+ x-request-id:
+ - X-REQUEST-ID-XXX
+ status:
+ code: 200
+ message: OK
+version: 1