mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-02 07:42:40 +00:00
imp compaction (#4399)
Some checks failed
Some checks failed
* imp compaction * fix lint * cassette gen * cassette gen * improve assert * adding azure * fix global docstring
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are Writer. You are a skilled
|
||||
writer.\nYour personal goal is: Write concise content"},{"role":"user","content":"\nCurrent
|
||||
Task: Write one sentence about the sun.\n\nThis is the expected criteria for
|
||||
your final answer: A single sentence about the sun.\nyou MUST return the actual
|
||||
complete content as the final answer, not a summary.\n\nProvide your complete
|
||||
response:"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '453'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7RxEngFVCbqdc7tNjV3VjeteqcwT\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668124,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"The sun is a massive ball of glowing
|
||||
gas at the center of our solar system, providing light and warmth essential
|
||||
for life on Earth.\",\n \"refusal\": null,\n \"annotations\":
|
||||
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
|
||||
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 78,\n \"completion_tokens\":
|
||||
27,\n \"total_tokens\": 105,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:15:25 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '664'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,120 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are Researcher. You are an
|
||||
expert researcher.\nYour personal goal is: Find information about Python programming"},{"role":"user","content":"\nCurrent
|
||||
Task: What is Python? Give a brief answer.\n\nThis is the expected criteria
|
||||
for your final answer: A short description of Python.\nyou MUST return the actual
|
||||
complete content as the final answer, not a summary.\n\nProvide your complete
|
||||
response:"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '482'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7RxRv3U0LCLf2iqf40wxOQsuiYFR\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668137,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"Python is a high-level, interpreted
|
||||
programming language known for its readability and simplicity. It was created
|
||||
by Guido van Rossum and first released in 1991. Python supports multiple programming
|
||||
paradigms, including procedural, object-oriented, and functional programming.
|
||||
It has a large standard library and is widely used for web development, data
|
||||
analysis, artificial intelligence, scientific computing, and automation, among
|
||||
other applications. Python's syntax emphasizes code readability, allowing
|
||||
developers to express concepts in fewer lines of code compared to other languages.
|
||||
Its active community and extensive ecosystem of libraries and frameworks make
|
||||
it a popular choice for both beginners and experienced programmers.\",\n \"refusal\":
|
||||
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
|
||||
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
82,\n \"completion_tokens\": 123,\n \"total_tokens\": 205,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:15:39 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '2467'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,435 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Explain the
|
||||
Python package ecosystem. How does pip work, what is PyPI, and what are virtual
|
||||
environments? Compare pip with conda and uv.\n\n[ASSISTANT]: PyPI (Python Package
|
||||
Index) is the official repository hosting 400k+ packages. pip is the standard
|
||||
package installer that downloads from PyPI. Virtual environments (venv) create
|
||||
isolated Python installations to avoid dependency conflicts between projects.
|
||||
conda is a cross-language package manager popular in data science that can manage
|
||||
non-Python dependencies. uv is a new Rust-based tool that is 10-100x faster
|
||||
than pip and aims to replace pip, pip-tools, and virtualenv with a single unified
|
||||
tool.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task Overview**:
|
||||
What is the agent trying to accomplish?\n2. **Current State**: What has been
|
||||
completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1687'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- async:asyncio
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7S93xpUu9d5twM82uJOZpurQTD5u\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668857,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
user is seeking an explanation of the Python package ecosystem, specifically
|
||||
focusing on how pip works, the role of PyPI, the concept of virtual environments,
|
||||
and a comparison between pip, conda, and uv.\\n\\n2. **Current State**: The
|
||||
assistant has provided a comprehensive overview of the Python package ecosystem,
|
||||
including definitions and comparisons of pip, PyPI, virtual environments,
|
||||
conda, and uv.\\n\\n3. **Important Discoveries**:\\n - PyPI (Python Package
|
||||
Index) is the official repository with over 400,000 packages.\\n - pip is
|
||||
the standard package installer that downloads packages from PyPI.\\n - Virtual
|
||||
environments (venv) allow for isolated Python installations to prevent dependency
|
||||
conflicts.\\n - conda is a cross-language package manager, particularly
|
||||
popular in data science, that can manage non-Python dependencies.\\n - uv
|
||||
is a new Rust-based tool that is significantly faster than pip (10-100x) and
|
||||
aims to unify the functionalities of pip, pip-tools, and virtualenv.\\n\\n4.
|
||||
**Next Steps**: The agent should consider providing further details on how
|
||||
to use pip, conda, and uv, including installation commands, examples of creating
|
||||
virtual environments, and any specific use cases for each tool.\\n\\n5. **Context
|
||||
to Preserve**: \\n - PyPI: Python Package Index, hosting 400k+ packages.\\n
|
||||
\ - pip: Standard package installer for Python.\\n - Virtual environments
|
||||
(venv): Isolated Python installations.\\n - conda: Cross-language package
|
||||
manager for data science.\\n - uv: Rust-based tool, 10-100x faster than
|
||||
pip, aims to replace pip, pip-tools, and virtualenv.\\n</summary>\",\n \"refusal\":
|
||||
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
|
||||
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
333,\n \"completion_tokens\": 354,\n \"total_tokens\": 687,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:27:42 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '4879'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Tell me about
|
||||
the history of the Python programming language. Who created it, when was it
|
||||
first released, and what were the main design goals? Please provide a detailed
|
||||
overview covering the major milestones from its inception through Python 3.\n\n[ASSISTANT]:
|
||||
Python was created by Guido van Rossum and first released in 1991. The main
|
||||
design goals were code readability and simplicity. Key milestones: Python 1.0
|
||||
(1994) introduced functional programming tools like lambda and map. Python 2.0
|
||||
(2000) added list comprehensions and garbage collection. Python 3.0 (2008) was
|
||||
a major backward-incompatible release that fixed fundamental design flaws. Python
|
||||
2 reached end-of-life in January 2020.\n</conversation>\n\nCreate a summary
|
||||
with these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
|
||||
**Current State**: What has been completed so far? What step is the agent on?\n3.
|
||||
**Important Discoveries**: Key facts, data, tool results, or findings that must
|
||||
not be lost.\n4. **Next Steps**: What should the agent do next based on the
|
||||
conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
|
||||
code snippets, or details referenced in the conversation.\n\nWrap your entire
|
||||
summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1726'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- async:asyncio
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7S93rBUMAtEdwdI6Y2ga0s50IFtv\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668857,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
user is seeking a detailed overview of the history of the Python programming
|
||||
language, including its creator, initial release date, main design goals,
|
||||
and major milestones up to Python 3.\\n\\n2. **Current State**: The assistant
|
||||
has provided a comprehensive response detailing the history of Python, including
|
||||
its creator (Guido van Rossum), first release (1991), main design goals (code
|
||||
readability and simplicity), and key milestones (Python 1.0 in 1994, Python
|
||||
2.0 in 2000, and Python 3.0 in 2008).\\n\\n3. **Important Discoveries**: \\n
|
||||
\ - Python was created by Guido van Rossum.\\n - First released in 1991.\\n
|
||||
\ - Main design goals: code readability and simplicity.\\n - Key milestones:\\n
|
||||
\ - Python 1.0 (1994): Introduced functional programming tools like lambda
|
||||
and map.\\n - Python 2.0 (2000): Added list comprehensions and garbage
|
||||
collection.\\n - Python 3.0 (2008): Major backward-incompatible release
|
||||
that fixed fundamental design flaws.\\n - Python 2 reached end-of-life in
|
||||
January 2020.\\n\\n4. **Next Steps**: The agent should be prepared to provide
|
||||
additional details or answer follow-up questions regarding Python's features,
|
||||
community, or specific use cases if the user requests more information.\\n\\n5.
|
||||
**Context to Preserve**: \\n - Creator: Guido van Rossum\\n - Initial
|
||||
release: 1991\\n - Milestones: \\n - Python 1.0 (1994)\\n - Python
|
||||
2.0 (2000)\\n - Python 3.0 (2008)\\n - End-of-life for Python 2: January
|
||||
2020\\n</summary>\",\n \"refusal\": null,\n \"annotations\":
|
||||
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
|
||||
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 346,\n \"completion_tokens\":
|
||||
372,\n \"total_tokens\": 718,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_7e4bf6ad56\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:27:42 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '5097'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: What about
|
||||
the async/await features? When were they introduced and how do they compare
|
||||
to similar features in JavaScript and C#? Also explain the Global Interpreter
|
||||
Lock and its implications.\n\n[ASSISTANT]: Async/await was introduced in Python
|
||||
3.5 (PEP 492, 2015). Unlike JavaScript which is single-threaded by design, Python''s
|
||||
asyncio is an opt-in framework. C# introduced async/await in 2012 (C# 5.0) and
|
||||
was a major inspiration for Python''s implementation. The GIL (Global Interpreter
|
||||
Lock) is a mutex that protects access to Python objects, preventing multiple
|
||||
threads from executing Python bytecodes simultaneously. This means CPU-bound
|
||||
multithreaded programs don''t benefit from multiple cores. PEP 703 proposes
|
||||
making the GIL optional in CPython.\n</conversation>\n\nCreate a summary with
|
||||
these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
|
||||
**Current State**: What has been completed so far? What step is the agent on?\n3.
|
||||
**Important Discoveries**: Key facts, data, tool results, or findings that must
|
||||
not be lost.\n4. **Next Steps**: What should the agent do next based on the
|
||||
conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
|
||||
code snippets, or details referenced in the conversation.\n\nWrap your entire
|
||||
summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1786'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- async:asyncio
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7S94auQYOLDTKfRzdluGiWAomSqd\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668858,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
user is seeking information about the async/await features in Python, their
|
||||
introduction timeline, comparisons with similar features in JavaScript and
|
||||
C#, and an explanation of the Global Interpreter Lock (GIL) and its implications.\\n\\n2.
|
||||
**Current State**: The assistant has provided information regarding the introduction
|
||||
of async/await in Python (version 3.5, PEP 492 in 2015), comparisons with
|
||||
JavaScript and C# (C# introduced async/await in 2012), and an explanation
|
||||
of the GIL.\\n\\n3. **Important Discoveries**: \\n - Async/await was introduced
|
||||
in Python 3.5 (PEP 492, 2015).\\n - JavaScript is single-threaded, while
|
||||
Python's asyncio is an opt-in framework.\\n - C# introduced async/await
|
||||
in 2012 (C# 5.0) and influenced Python's implementation.\\n - The GIL (Global
|
||||
Interpreter Lock) is a mutex that prevents multiple threads from executing
|
||||
Python bytecodes simultaneously, affecting CPU-bound multithreaded programs.\\n
|
||||
\ - PEP 703 proposes making the GIL optional in CPython.\\n\\n4. **Next Steps**:
|
||||
The agent should consider providing more detailed comparisons of async/await
|
||||
features between Python, JavaScript, and C#, as well as further implications
|
||||
of the GIL and PEP 703.\\n\\n5. **Context to Preserve**: \\n - Python async/await
|
||||
introduction: 3.5 (PEP 492, 2015)\\n - C# async/await introduction: 2012
|
||||
(C# 5.0)\\n - GIL (Global Interpreter Lock) explanation and implications.\\n
|
||||
\ - Reference to PEP 703 regarding the GIL.\\n</summary>\",\n \"refusal\":
|
||||
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
|
||||
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
364,\n \"completion_tokens\": 368,\n \"total_tokens\": 732,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:27:44 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '6339'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,435 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Explain the
|
||||
Python package ecosystem. How does pip work, what is PyPI, and what are virtual
|
||||
environments? Compare pip with conda and uv.\n\n[ASSISTANT]: PyPI (Python Package
|
||||
Index) is the official repository hosting 400k+ packages. pip is the standard
|
||||
package installer that downloads from PyPI. Virtual environments (venv) create
|
||||
isolated Python installations to avoid dependency conflicts between projects.
|
||||
conda is a cross-language package manager popular in data science that can manage
|
||||
non-Python dependencies. uv is a new Rust-based tool that is 10-100x faster
|
||||
than pip and aims to replace pip, pip-tools, and virtualenv with a single unified
|
||||
tool.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task Overview**:
|
||||
What is the agent trying to accomplish?\n2. **Current State**: What has been
|
||||
completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1687'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- async:asyncio
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7S9PnjkuCMHqU912kcH8G5zIIxQU\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668879,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
user is seeking an explanation of the Python package ecosystem, specifically
|
||||
focusing on how pip works, the role of PyPI, the concept of virtual environments,
|
||||
and a comparison between pip, conda, and uv.\\n\\n2. **Current State**: The
|
||||
assistant has provided a comprehensive overview of the Python package ecosystem,
|
||||
including definitions and comparisons of pip, PyPI, virtual environments,
|
||||
conda, and uv.\\n\\n3. **Important Discoveries**:\\n - PyPI (Python Package
|
||||
Index) is the official repository with over 400,000 packages.\\n - pip is
|
||||
the standard package installer that downloads packages from PyPI.\\n - Virtual
|
||||
environments (venv) allow for isolated Python installations to prevent dependency
|
||||
conflicts.\\n - conda is a cross-language package manager, particularly
|
||||
popular in data science, that can manage non-Python dependencies.\\n - uv
|
||||
is a new Rust-based tool that is significantly faster than pip (10-100x) and
|
||||
aims to unify the functionalities of pip, pip-tools, and virtualenv.\\n\\n4.
|
||||
**Next Steps**: The agent should consider providing further details or examples
|
||||
on how to use pip, conda, and uv, as well as practical applications of virtual
|
||||
environments in Python projects.\\n\\n5. **Context to Preserve**: \\n -
|
||||
PyPI: Python Package Index, hosting 400k+ packages.\\n - pip: Standard package
|
||||
installer for Python.\\n - Virtual environments (venv): Isolated Python
|
||||
installations.\\n - conda: Cross-language package manager for data science.\\n
|
||||
\ - uv: Rust-based tool, 10-100x faster than pip, aims to replace pip, pip-tools,
|
||||
and virtualenv.\\n</summary>\",\n \"refusal\": null,\n \"annotations\":
|
||||
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
|
||||
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 333,\n \"completion_tokens\":
|
||||
349,\n \"total_tokens\": 682,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:28:04 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '4979'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Tell me about
|
||||
the history of the Python programming language. Who created it, when was it
|
||||
first released, and what were the main design goals? Please provide a detailed
|
||||
overview covering the major milestones from its inception through Python 3.\n\n[ASSISTANT]:
|
||||
Python was created by Guido van Rossum and first released in 1991. The main
|
||||
design goals were code readability and simplicity. Key milestones: Python 1.0
|
||||
(1994) introduced functional programming tools like lambda and map. Python 2.0
|
||||
(2000) added list comprehensions and garbage collection. Python 3.0 (2008) was
|
||||
a major backward-incompatible release that fixed fundamental design flaws. Python
|
||||
2 reached end-of-life in January 2020.\n</conversation>\n\nCreate a summary
|
||||
with these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
|
||||
**Current State**: What has been completed so far? What step is the agent on?\n3.
|
||||
**Important Discoveries**: Key facts, data, tool results, or findings that must
|
||||
not be lost.\n4. **Next Steps**: What should the agent do next based on the
|
||||
conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
|
||||
code snippets, or details referenced in the conversation.\n\nWrap your entire
|
||||
summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1726'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- async:asyncio
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7S9PqglWRu0PEoMRHyOiRnpn3yqU\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668879,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
user is seeking a detailed overview of the history of the Python programming
|
||||
language, including its creator, initial release date, main design goals,
|
||||
and major milestones up to Python 3.\\n\\n2. **Current State**: The assistant
|
||||
has provided a comprehensive response detailing the history of Python, including
|
||||
its creator (Guido van Rossum), first release (1991), main design goals (code
|
||||
readability and simplicity), and key milestones (Python 1.0 in 1994, Python
|
||||
2.0 in 2000, and Python 3.0 in 2008).\\n\\n3. **Important Discoveries**: \\n
|
||||
\ - Python was created by Guido van Rossum.\\n - First released in 1991.\\n
|
||||
\ - Main design goals: code readability and simplicity.\\n - Key milestones:\\n
|
||||
\ - Python 1.0 (1994): Introduced functional programming tools like lambda
|
||||
and map.\\n - Python 2.0 (2000): Added list comprehensions and garbage
|
||||
collection.\\n - Python 3.0 (2008): Major backward-incompatible release
|
||||
that fixed fundamental design flaws.\\n - Python 2 reached end-of-life in
|
||||
January 2020.\\n\\n4. **Next Steps**: The agent should be prepared to provide
|
||||
further details or answer any follow-up questions the user may have regarding
|
||||
Python's history or its features.\\n\\n5. **Context to Preserve**: \\n -
|
||||
Creator: Guido van Rossum\\n - First release: 1991\\n - Milestones: \\n
|
||||
\ - Python 1.0 (1994)\\n - Python 2.0 (2000)\\n - Python 3.0 (2008)\\n
|
||||
\ - End-of-life for Python 2: January 2020\\n</summary>\",\n \"refusal\":
|
||||
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
|
||||
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
346,\n \"completion_tokens\": 367,\n \"total_tokens\": 713,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_7e4bf6ad56\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:28:04 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '5368'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: What about
|
||||
the async/await features? When were they introduced and how do they compare
|
||||
to similar features in JavaScript and C#? Also explain the Global Interpreter
|
||||
Lock and its implications.\n\n[ASSISTANT]: Async/await was introduced in Python
|
||||
3.5 (PEP 492, 2015). Unlike JavaScript which is single-threaded by design, Python''s
|
||||
asyncio is an opt-in framework. C# introduced async/await in 2012 (C# 5.0) and
|
||||
was a major inspiration for Python''s implementation. The GIL (Global Interpreter
|
||||
Lock) is a mutex that protects access to Python objects, preventing multiple
|
||||
threads from executing Python bytecodes simultaneously. This means CPU-bound
|
||||
multithreaded programs don''t benefit from multiple cores. PEP 703 proposes
|
||||
making the GIL optional in CPython.\n</conversation>\n\nCreate a summary with
|
||||
these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
|
||||
**Current State**: What has been completed so far? What step is the agent on?\n3.
|
||||
**Important Discoveries**: Key facts, data, tool results, or findings that must
|
||||
not be lost.\n4. **Next Steps**: What should the agent do next based on the
|
||||
conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
|
||||
code snippets, or details referenced in the conversation.\n\nWrap your entire
|
||||
summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1786'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- async:asyncio
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7S9Pcl5ybKLH8cSEZ6hgPuvj5iCv\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668879,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
user is seeking information about the async/await features in Python, their
|
||||
introduction timeline, comparisons with similar features in JavaScript and
|
||||
C#, and an explanation of the Global Interpreter Lock (GIL) and its implications.\\n\\n2.
|
||||
**Current State**: The assistant has provided information regarding the introduction
|
||||
of async/await in Python (version 3.5, PEP 492 in 2015), comparisons with
|
||||
JavaScript and C# (C# introduced async/await in 2012), and an explanation
|
||||
of the GIL.\\n\\n3. **Important Discoveries**: \\n - Async/await was introduced
|
||||
in Python 3.5 (PEP 492, 2015).\\n - JavaScript is single-threaded, while
|
||||
Python's asyncio is an opt-in framework.\\n - C# introduced async/await
|
||||
in 2012 (C# 5.0) and influenced Python's implementation.\\n - The GIL (Global
|
||||
Interpreter Lock) is a mutex that prevents multiple threads from executing
|
||||
Python bytecodes simultaneously, affecting CPU-bound multithreaded programs.\\n
|
||||
\ - PEP 703 proposes making the GIL optional in CPython.\\n\\n4. **Next Steps**:
|
||||
The agent should consider providing further details on how async/await is
|
||||
implemented in Python, JavaScript, and C#, and explore the implications of
|
||||
the GIL in more depth, including potential alternatives or workarounds.\\n\\n5.
|
||||
**Context to Preserve**: \\n - Python async/await introduction: version
|
||||
3.5, PEP 492, 2015.\\n - C# async/await introduction: 2012, C# 5.0.\\n -
|
||||
GIL (Global Interpreter Lock) and its implications on multithreading in Python.\\n
|
||||
\ - Reference to PEP 703 regarding the GIL.\\n</summary>\",\n \"refusal\":
|
||||
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
|
||||
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
364,\n \"completion_tokens\": 381,\n \"total_tokens\": 745,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:28:04 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '5489'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,136 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Analyze the following
|
||||
conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
|
||||
the latest developments in large language models. Focus on architecture improvements
|
||||
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
|
||||
in large language models. Based on my knowledge, recent advances include:\n1.
|
||||
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
|
||||
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
|
||||
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
|
||||
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
|
||||
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
|
||||
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
|
||||
advantage is computational efficiency: only a subset of experts is activated
|
||||
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
|
||||
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
|
||||
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}],"model":"claude-3-5-haiku-latest","stream":false,"system":"You
|
||||
are a precise assistant that creates structured summaries of agent conversations.
|
||||
You preserve critical context needed for seamless task continuation.","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
anthropic-version:
|
||||
- '2023-06-01'
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1870'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.anthropic.com
|
||||
x-api-key:
|
||||
- X-API-KEY-XXX
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 0.73.0
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
x-stainless-timeout:
|
||||
- NOT_GIVEN
|
||||
method: POST
|
||||
uri: https://api.anthropic.com/v1/messages
|
||||
response:
|
||||
body:
|
||||
string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01SK3LP6RedPBmpvD1HfKD23","type":"message","role":"assistant","content":[{"type":"text","text":"<summary>\n1.
|
||||
**Task Overview**:\n- Research latest developments in large language models\n-
|
||||
Focus on architecture improvements and training techniques\n\n2. **Current
|
||||
State**:\n- Initial research completed on broad developments\n- Currently
|
||||
exploring Mixture of Experts (MoE) architectures in depth\n- Detailed discussion
|
||||
of key MoE research papers initiated\n\n3. **Important Discoveries**:\nMoE
|
||||
Architecture Insights:\n- Computational efficiency through selective expert
|
||||
activation\n- Key research papers:\n * Switch Transformers (Google, 2021)\n *
|
||||
GShard\n * Mixtral (Mistral AI)\n- Main benefit: Only subset of experts activated
|
||||
per token\n\n4. **Next Steps**:\n- Conduct deeper analysis of MoE architecture
|
||||
mechanisms\n- Compare routing strategies across different MoE implementations\n-
|
||||
Investigate performance metrics and scalability of MoE models\n\n5. **Context
|
||||
to Preserve**:\n- Research Focus: Large Language Model Architectures\n- Specific
|
||||
Interest: Mixture of Experts (MoE) Architectures\n- Key Researchers/Organizations:
|
||||
Google, Mistral AI\n- Years of Significant Papers: 2021 onwards\n</summary>"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":400,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":270,"service_tier":"standard","inference_geo":"not_available"}}'
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Security-Policy:
|
||||
- CSP-FILTERED
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:18:41 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Robots-Tag:
|
||||
- none
|
||||
anthropic-organization-id:
|
||||
- ANTHROPIC-ORGANIZATION-ID-XXX
|
||||
anthropic-ratelimit-input-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-input-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-input-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
|
||||
anthropic-ratelimit-output-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-output-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-output-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
|
||||
anthropic-ratelimit-requests-limit:
|
||||
- '4000'
|
||||
anthropic-ratelimit-requests-remaining:
|
||||
- '3999'
|
||||
anthropic-ratelimit-requests-reset:
|
||||
- '2026-02-09T20:18:35Z'
|
||||
anthropic-ratelimit-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
request-id:
|
||||
- REQUEST-ID-XXX
|
||||
strict-transport-security:
|
||||
- STS-XXX
|
||||
x-envoy-upstream-service-time:
|
||||
- '5639'
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,110 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are a precise assistant
|
||||
that creates structured summaries of agent conversations. You preserve critical
|
||||
context needed for seamless task continuation."}, {"role": "user", "content":
|
||||
"Analyze the following conversation and create a structured summary that preserves
|
||||
all information needed to continue the task seamlessly.\n\n<conversation>\n[USER]:
|
||||
Research the latest developments in large language models. Focus on architecture
|
||||
improvements and training techniques.\n\n[ASSISTANT]: I''ll research the latest
|
||||
developments in large language models. Based on my knowledge, recent advances
|
||||
include:\n1. Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms
|
||||
like Flash Attention\n3. Better training data curation techniques\n4. Constitutional
|
||||
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
|
||||
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
|
||||
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
|
||||
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
|
||||
advantage is computational efficiency: only a subset of experts is activated
|
||||
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
|
||||
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
|
||||
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}], "stream": false, "temperature": 0}'
|
||||
headers:
|
||||
Accept:
|
||||
- application/json
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Length:
|
||||
- '1849'
|
||||
Content-Type:
|
||||
- application/json
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
api-key:
|
||||
- X-API-KEY-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
x-ms-client-request-id:
|
||||
- X-MS-CLIENT-REQUEST-ID-XXX
|
||||
method: POST
|
||||
uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-12-01-preview
|
||||
response:
|
||||
body:
|
||||
string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"\u003csummary\u003e\n1.
|
||||
**Task Overview**: The user has requested research on the latest developments
|
||||
in large language models, specifically focusing on architecture improvements
|
||||
and training techniques.\n\n2. **Current State**: The assistant has provided
|
||||
an initial overview of recent advances in large language models, including
|
||||
Mixture of Experts (MoE) architectures, improved attention mechanisms, better
|
||||
training data curation techniques, and advancements in Constitutional AI and
|
||||
Reinforcement Learning from Human Feedback (RLHF).\n\n3. **Important Discoveries**:
|
||||
\n - Recent advances in large language models include:\n 1. Mixture
|
||||
of Experts (MoE) architectures\n 2. Improved attention mechanisms like
|
||||
Flash Attention\n 3. Better training data curation techniques\n 4.
|
||||
Constitutional AI and RLHF improvements\n - Key papers on Mixture of Experts:\n -
|
||||
Switch Transformers (Google, 2021) - simplified MoE routing\n - GShard
|
||||
- scaling to 600B parameters\n - Mixtral (Mistral AI) - open-source MoE
|
||||
model\n - The main advantage of MoE architectures is computational efficiency,
|
||||
as only a subset of experts is activated per token.\n\n4. **Next Steps**:
|
||||
The assistant should delve deeper into the Mixture of Experts architectures,
|
||||
potentially summarizing the key findings and implications from the identified
|
||||
papers.\n\n5. **Context to Preserve**: \n - Key papers: \n - Switch
|
||||
Transformers (Google, 2021)\n - GShard\n - Mixtral (Mistral AI)\n -
|
||||
Focus on computational efficiency of MoE architectures.\n\u003c/summary\u003e","refusal":null,"role":"assistant"}}],"created":1770849953,"id":"chatcmpl-D8DFx1H1zzEerW5H0BWfuwmio2sz1","model":"gpt-4o-mini-2024-07-18","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":"fp_f97eff32c5","usage":{"completion_tokens":328,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":368,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":696}}
|
||||
|
||||
'
|
||||
headers:
|
||||
Content-Length:
|
||||
- '2786'
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Wed, 11 Feb 2026 22:45:56 GMT
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
apim-request-id:
|
||||
- APIM-REQUEST-ID-XXX
|
||||
azureml-model-session:
|
||||
- AZUREML-MODEL-SESSION-XXX
|
||||
x-accel-buffering:
|
||||
- 'no'
|
||||
x-content-type-options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
x-ms-client-request-id:
|
||||
- X-MS-CLIENT-REQUEST-ID-XXX
|
||||
x-ms-deployment-name:
|
||||
- gpt-4o-mini
|
||||
x-ms-rai-invoked:
|
||||
- 'true'
|
||||
x-ms-region:
|
||||
- X-MS-REGION-XXX
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,103 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"contents": [{"parts": [{"text": "Analyze the following conversation and
|
||||
create a structured summary that preserves all information needed to continue
|
||||
the task seamlessly.\n\n<conversation>\n[USER]: Research the latest developments
|
||||
in large language models. Focus on architecture improvements and training techniques.\n\n[ASSISTANT]:
|
||||
I''ll research the latest developments in large language models. Based on my
|
||||
knowledge, recent advances include:\n1. Mixture of Experts (MoE) architectures\n2.
|
||||
Improved attention mechanisms like Flash Attention\n3. Better training data
|
||||
curation techniques\n4. Constitutional AI and RLHF improvements\n\n[USER]: Can
|
||||
you go deeper on the MoE architectures? What are the key papers?\n\n[ASSISTANT]:
|
||||
Key papers on Mixture of Experts:\n- Switch Transformers (Google, 2021) - simplified
|
||||
MoE routing\n- GShard - scaling to 600B parameters\n- Mixtral (Mistral AI) -
|
||||
open-source MoE model\nThe main advantage is computational efficiency: only
|
||||
a subset of experts is activated per token.\n</conversation>\n\nCreate a summary
|
||||
with these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
|
||||
**Current State**: What has been completed so far? What step is the agent on?\n3.
|
||||
**Important Discoveries**: Key facts, data, tool results, or findings that must
|
||||
not be lost.\n4. **Next Steps**: What should the agent do next based on the
|
||||
conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
|
||||
code snippets, or details referenced in the conversation.\n\nWrap your entire
|
||||
summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>"}],
|
||||
"role": "user"}], "systemInstruction": {"parts": [{"text": "You are a precise
|
||||
assistant that creates structured summaries of agent conversations. You preserve
|
||||
critical context needed for seamless task continuation."}], "role": "user"},
|
||||
"generationConfig": {"temperature": 0.0}}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- '*/*'
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1895'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- generativelanguage.googleapis.com
|
||||
x-goog-api-client:
|
||||
- google-genai-sdk/1.49.0 gl-python/3.13.3
|
||||
x-goog-api-key:
|
||||
- X-GOOG-API-KEY-XXX
|
||||
method: POST
|
||||
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
|
||||
[\n {\n \"text\": \"```xml\\n\\u003csummary\\u003e\\n**Task
|
||||
Overview**: Research the latest developments in large language models, focusing
|
||||
on architecture improvements and training techniques.\\n\\n**Current State**:
|
||||
The agent has identified several key areas of advancement in LLMs: Mixture
|
||||
of Experts (MoE) architectures, improved attention mechanisms (Flash Attention),
|
||||
better training data curation, and Constitutional AI/RLHF improvements. The
|
||||
user has requested a deeper dive into MoE architectures. The agent has provided
|
||||
an initial overview of MoE architectures and listed some key papers.\\n\\n**Important
|
||||
Discoveries**:\\n* Key MoE papers: Switch Transformers (Google, 2021), GShard,
|
||||
Mixtral (Mistral AI).\\n* MoE advantage: Computational efficiency through
|
||||
selective activation of experts.\\n\\n**Next Steps**: Continue researching
|
||||
MoE architectures based on the user's request for more detail. The agent should
|
||||
elaborate further on the listed papers and potentially find more recent or
|
||||
relevant publications.\\n\\n**Context to Preserve**:\\n* Focus areas: Architecture
|
||||
improvements and training techniques for LLMs.\\n* Specific architectures:
|
||||
Mixture of Experts (MoE), Flash Attention.\\n* Training techniques: Data
|
||||
curation, Constitutional AI, RLHF.\\n* Specific papers: Switch Transformers
|
||||
(Google, 2021), GShard, Mixtral (Mistral AI).\\n\\u003c/summary\\u003e\\n```\"\n
|
||||
\ }\n ],\n \"role\": \"model\"\n },\n \"finishReason\":
|
||||
\"STOP\",\n \"avgLogprobs\": -0.14186729703630721\n }\n ],\n \"usageMetadata\":
|
||||
{\n \"promptTokenCount\": 373,\n \"candidatesTokenCount\": 280,\n \"totalTokenCount\":
|
||||
653,\n \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n
|
||||
\ \"tokenCount\": 373\n }\n ],\n \"candidatesTokensDetails\":
|
||||
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 280\n
|
||||
\ }\n ]\n },\n \"modelVersion\": \"gemini-2.0-flash\",\n \"responseId\":
|
||||
\"GEGKabP3OcGH-8YPzZCj2Ao\"\n}\n"
|
||||
headers:
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
||||
Content-Type:
|
||||
- application/json; charset=UTF-8
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:18:35 GMT
|
||||
Server:
|
||||
- scaffolding on HTTPServer2
|
||||
Server-Timing:
|
||||
- gfet4t7; dur=2310
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
Vary:
|
||||
- Origin
|
||||
- X-Origin
|
||||
- Referer
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
X-Frame-Options:
|
||||
- X-FRAME-OPTIONS-XXX
|
||||
X-XSS-Protection:
|
||||
- '0'
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,148 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
|
||||
the latest developments in large language models. Focus on architecture improvements
|
||||
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
|
||||
in large language models. Based on my knowledge, recent advances include:\n1.
|
||||
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
|
||||
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
|
||||
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
|
||||
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
|
||||
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
|
||||
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
|
||||
advantage is computational efficiency: only a subset of experts is activated
|
||||
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
|
||||
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
|
||||
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1844'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7RxGISdQet8JsWImiwzHQ2S9gSD4\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668126,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
agent is tasked with researching the latest developments in large language
|
||||
models, specifically focusing on architecture improvements and training techniques.\\n\\n2.
|
||||
**Current State**: The agent has identified several recent advances in large
|
||||
language models, including Mixture of Experts (MoE) architectures, improved
|
||||
attention mechanisms, better training data curation techniques, and advancements
|
||||
in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
|
||||
**Important Discoveries**: \\n - Recent advances in large language models
|
||||
include:\\n 1. Mixture of Experts (MoE) architectures\\n 2. Improved
|
||||
attention mechanisms like Flash Attention\\n 3. Better training data curation
|
||||
techniques\\n 4. Constitutional AI and RLHF improvements\\n - Key papers
|
||||
on Mixture of Experts:\\n - Switch Transformers (Google, 2021) - simplified
|
||||
MoE routing\\n - GShard - scaling to 600B parameters\\n - Mixtral
|
||||
(Mistral AI) - open-source MoE model\\n - The main advantage of MoE architectures
|
||||
is computational efficiency, as only a subset of experts is activated per
|
||||
token.\\n\\n4. **Next Steps**: The agent should delve deeper into the Mixture
|
||||
of Experts architectures, reviewing the key papers mentioned and summarizing
|
||||
their contributions and implications for large language models.\\n\\n5. **Context
|
||||
to Preserve**: \\n - Key papers: \\n - Switch Transformers (Google,
|
||||
2021)\\n - GShard\\n - Mixtral (Mistral AI)\\n - Focus on computational
|
||||
efficiency of MoE architectures.\\n</summary>\",\n \"refusal\": null,\n
|
||||
\ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\":
|
||||
\"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 368,\n \"completion_tokens\":
|
||||
328,\n \"total_tokens\": 696,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:15:32 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '5395'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,145 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
|
||||
the latest developments in large language models. Focus on architecture improvements
|
||||
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
|
||||
in large language models. Based on my knowledge, recent advances include:\n1.
|
||||
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
|
||||
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
|
||||
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
|
||||
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
|
||||
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
|
||||
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
|
||||
advantage is computational efficiency: only a subset of experts is activated
|
||||
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
|
||||
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
|
||||
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1844'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7RxM4n36QoACHrC0QocV1pXIwvtD\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668132,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
user has requested research on the latest developments in large language models,
|
||||
specifically focusing on architecture improvements and training techniques.\\n\\n2.
|
||||
**Current State**: The assistant has identified several recent advances in
|
||||
large language models, including Mixture of Experts (MoE) architectures, improved
|
||||
attention mechanisms, better training data curation techniques, and advancements
|
||||
in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
|
||||
**Important Discoveries**: \\n - Key papers on Mixture of Experts (MoE)
|
||||
architectures:\\n - \\\"Switch Transformers\\\" (Google, 2021) - simplified
|
||||
MoE routing.\\n - \\\"GShard\\\" - scaling to 600B parameters.\\n -
|
||||
\\\"Mixtral\\\" (Mistral AI) - open-source MoE model.\\n - The main advantage
|
||||
of MoE architectures is computational efficiency, as only a subset of experts
|
||||
is activated per token.\\n\\n4. **Next Steps**: The assistant should delve
|
||||
deeper into the Mixture of Experts architectures, potentially summarizing
|
||||
the findings from the key papers mentioned.\\n\\n5. **Context to Preserve**:
|
||||
\\n - Key papers: \\\"Switch Transformers,\\\" \\\"GShard,\\\" \\\"Mixtral.\\\"\\n
|
||||
\ - Notable organizations: Google, Mistral AI.\\n - Focus areas: MoE architectures,
|
||||
computational efficiency.\\n</summary>\",\n \"refusal\": null,\n \"annotations\":
|
||||
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
|
||||
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 368,\n \"completion_tokens\":
|
||||
275,\n \"total_tokens\": 643,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:15:36 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '4188'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -2,13 +2,23 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai.tools.base_tool import BaseTool
|
||||
from crewai.utilities.agent_utils import convert_tools_to_openai_schema, summarize_messages
|
||||
from crewai.utilities.agent_utils import (
|
||||
_asummarize_chunks,
|
||||
_estimate_token_count,
|
||||
_extract_summary_tags,
|
||||
_format_messages_for_summary,
|
||||
_split_messages_into_chunks,
|
||||
convert_tools_to_openai_schema,
|
||||
summarize_messages,
|
||||
)
|
||||
|
||||
|
||||
class CalculatorInput(BaseModel):
|
||||
@@ -214,6 +224,17 @@ class TestConvertToolsToOpenaiSchema:
|
||||
assert max_results_prop["default"] == 10
|
||||
|
||||
|
||||
def _make_mock_i18n() -> MagicMock:
|
||||
"""Create a mock i18n with the new structured prompt keys."""
|
||||
mock_i18n = MagicMock()
|
||||
mock_i18n.slice.side_effect = lambda key: {
|
||||
"summarizer_system_message": "You are a precise assistant that creates structured summaries.",
|
||||
"summarize_instruction": "Summarize the conversation:\n{conversation}",
|
||||
"summary": "<summary>\n{merged_summary}\n</summary>\nContinue the task.",
|
||||
}.get(key, "")
|
||||
return mock_i18n
|
||||
|
||||
|
||||
class TestSummarizeMessages:
|
||||
"""Tests for summarize_messages function."""
|
||||
|
||||
@@ -229,26 +250,22 @@ class TestSummarizeMessages:
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
mock_llm.call.return_value = "Summarized conversation about image analysis."
|
||||
|
||||
mock_i18n = MagicMock()
|
||||
mock_i18n.slice.side_effect = lambda key: {
|
||||
"summarizer_system_message": "Summarize the following.",
|
||||
"summarize_instruction": "Summarize: {group}",
|
||||
"summary": "Summary: {merged_summary}",
|
||||
}.get(key, "")
|
||||
mock_llm.call.return_value = "<summary>Summarized conversation about image analysis.</summary>"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=mock_i18n,
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
assert len(messages) == 1
|
||||
assert messages[0]["role"] == "user"
|
||||
assert "files" in messages[0]
|
||||
assert messages[0]["files"] == mock_files
|
||||
# System message preserved + summary message = 2
|
||||
assert len(messages) == 2
|
||||
assert messages[0]["role"] == "system"
|
||||
summary_msg = messages[1]
|
||||
assert summary_msg["role"] == "user"
|
||||
assert "files" in summary_msg
|
||||
assert summary_msg["files"] == mock_files
|
||||
|
||||
def test_merges_files_from_multiple_user_messages(self) -> None:
|
||||
"""Test that files from multiple user messages are merged."""
|
||||
@@ -264,20 +281,13 @@ class TestSummarizeMessages:
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
mock_llm.call.return_value = "Summarized conversation."
|
||||
|
||||
mock_i18n = MagicMock()
|
||||
mock_i18n.slice.side_effect = lambda key: {
|
||||
"summarizer_system_message": "Summarize the following.",
|
||||
"summarize_instruction": "Summarize: {group}",
|
||||
"summary": "Summary: {merged_summary}",
|
||||
}.get(key, "")
|
||||
mock_llm.call.return_value = "<summary>Summarized conversation.</summary>"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=mock_i18n,
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
assert len(messages) == 1
|
||||
@@ -297,20 +307,13 @@ class TestSummarizeMessages:
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
mock_llm.call.return_value = "A greeting exchange."
|
||||
|
||||
mock_i18n = MagicMock()
|
||||
mock_i18n.slice.side_effect = lambda key: {
|
||||
"summarizer_system_message": "Summarize the following.",
|
||||
"summarize_instruction": "Summarize: {group}",
|
||||
"summary": "Summary: {merged_summary}",
|
||||
}.get(key, "")
|
||||
mock_llm.call.return_value = "<summary>A greeting exchange.</summary>"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=mock_i18n,
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
assert len(messages) == 1
|
||||
@@ -327,21 +330,595 @@ class TestSummarizeMessages:
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
mock_llm.call.return_value = "Summary"
|
||||
|
||||
mock_i18n = MagicMock()
|
||||
mock_i18n.slice.side_effect = lambda key: {
|
||||
"summarizer_system_message": "Summarize.",
|
||||
"summarize_instruction": "Summarize: {group}",
|
||||
"summary": "Summary: {merged_summary}",
|
||||
}.get(key, "")
|
||||
mock_llm.call.return_value = "<summary>Summary</summary>"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=mock_i18n,
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
assert id(messages) == original_list_id
|
||||
assert len(messages) == 1
|
||||
|
||||
def test_preserves_system_messages(self) -> None:
|
||||
"""Test that system messages are preserved and not summarized."""
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "system", "content": "You are a research assistant."},
|
||||
{"role": "user", "content": "Find information about AI."},
|
||||
{"role": "assistant", "content": "I found several resources on AI."},
|
||||
]
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
mock_llm.call.return_value = "<summary>User asked about AI, assistant found resources.</summary>"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
assert len(messages) == 2
|
||||
assert messages[0]["role"] == "system"
|
||||
assert messages[0]["content"] == "You are a research assistant."
|
||||
assert messages[1]["role"] == "user"
|
||||
|
||||
def test_formats_conversation_with_role_labels(self) -> None:
|
||||
"""Test that the LLM receives role-labeled conversation text."""
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "system", "content": "System prompt."},
|
||||
{"role": "user", "content": "Hello there"},
|
||||
{"role": "assistant", "content": "Hi! How can I help?"},
|
||||
]
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
mock_llm.call.return_value = "<summary>Greeting exchange.</summary>"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
# Check what was passed to llm.call
|
||||
call_args = mock_llm.call.call_args[0][0]
|
||||
user_msg_content = call_args[1]["content"]
|
||||
assert "[USER]:" in user_msg_content
|
||||
assert "[ASSISTANT]:" in user_msg_content
|
||||
# System content should NOT appear in summarization input
|
||||
assert "System prompt." not in user_msg_content
|
||||
|
||||
def test_extracts_summary_from_tags(self) -> None:
|
||||
"""Test that <summary> tags are extracted from LLM response."""
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "user", "content": "Do something."},
|
||||
{"role": "assistant", "content": "Done."},
|
||||
]
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
mock_llm.call.return_value = "Here is the summary:\n<summary>The extracted summary content.</summary>\nExtra text."
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
assert "The extracted summary content." in messages[0]["content"]
|
||||
|
||||
def test_handles_tool_messages(self) -> None:
|
||||
"""Test that tool messages are properly formatted in summarization."""
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "user", "content": "Search for Python."},
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"function": {"name": "web_search", "arguments": '{"query": "Python"}'}}
|
||||
]},
|
||||
{"role": "tool", "content": "Python is a programming language.", "name": "web_search"},
|
||||
{"role": "assistant", "content": "Python is a programming language."},
|
||||
]
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
mock_llm.call.return_value = "<summary>User searched for Python info.</summary>"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
# Verify the conversation text sent to LLM contains tool labels
|
||||
call_args = mock_llm.call.call_args[0][0]
|
||||
user_msg_content = call_args[1]["content"]
|
||||
assert "[TOOL_RESULT (web_search)]:" in user_msg_content
|
||||
|
||||
def test_only_system_messages_no_op(self) -> None:
|
||||
"""Test that only system messages results in no-op (no summarization)."""
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "system", "content": "Additional system instructions."},
|
||||
]
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 1000
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
# No LLM call should have been made
|
||||
mock_llm.call.assert_not_called()
|
||||
# System messages should remain untouched
|
||||
assert len(messages) == 2
|
||||
assert messages[0]["content"] == "You are a helpful assistant."
|
||||
assert messages[1]["content"] == "Additional system instructions."
|
||||
|
||||
|
||||
class TestFormatMessagesForSummary:
|
||||
"""Tests for _format_messages_for_summary helper."""
|
||||
|
||||
def test_skips_system_messages(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "system", "content": "System prompt"},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
result = _format_messages_for_summary(messages)
|
||||
assert "System prompt" not in result
|
||||
assert "[USER]: Hello" in result
|
||||
|
||||
def test_formats_user_and_assistant(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "user", "content": "Question"},
|
||||
{"role": "assistant", "content": "Answer"},
|
||||
]
|
||||
result = _format_messages_for_summary(messages)
|
||||
assert "[USER]: Question" in result
|
||||
assert "[ASSISTANT]: Answer" in result
|
||||
|
||||
def test_formats_tool_messages(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "tool", "content": "Result data", "name": "search_tool"},
|
||||
]
|
||||
result = _format_messages_for_summary(messages)
|
||||
assert "[TOOL_RESULT (search_tool)]:" in result
|
||||
assert "Result data" in result
|
||||
|
||||
def test_handles_none_content_with_tool_calls(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"function": {"name": "calculator", "arguments": "{}"}}
|
||||
]},
|
||||
]
|
||||
result = _format_messages_for_summary(messages)
|
||||
assert "[Called tools: calculator]" in result
|
||||
|
||||
def test_handles_none_content_without_tool_calls(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "assistant", "content": None},
|
||||
]
|
||||
result = _format_messages_for_summary(messages)
|
||||
assert "[ASSISTANT]:" in result
|
||||
|
||||
def test_handles_multimodal_content(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "user", "content": [
|
||||
{"type": "text", "text": "Describe this image"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
|
||||
]},
|
||||
]
|
||||
result = _format_messages_for_summary(messages)
|
||||
assert "[USER]: Describe this image" in result
|
||||
|
||||
def test_empty_messages(self) -> None:
|
||||
result = _format_messages_for_summary([])
|
||||
assert result == ""
|
||||
|
||||
|
||||
class TestExtractSummaryTags:
|
||||
"""Tests for _extract_summary_tags helper."""
|
||||
|
||||
def test_extracts_content_from_tags(self) -> None:
|
||||
text = "Preamble\n<summary>The actual summary.</summary>\nPostamble"
|
||||
assert _extract_summary_tags(text) == "The actual summary."
|
||||
|
||||
def test_handles_multiline_content(self) -> None:
|
||||
text = "<summary>\nLine 1\nLine 2\nLine 3\n</summary>"
|
||||
result = _extract_summary_tags(text)
|
||||
assert "Line 1" in result
|
||||
assert "Line 2" in result
|
||||
assert "Line 3" in result
|
||||
|
||||
def test_falls_back_when_no_tags(self) -> None:
|
||||
text = "Just a plain summary without tags."
|
||||
assert _extract_summary_tags(text) == text
|
||||
|
||||
def test_handles_empty_string(self) -> None:
|
||||
assert _extract_summary_tags("") == ""
|
||||
|
||||
def test_extracts_first_match(self) -> None:
|
||||
text = "<summary>First</summary> text <summary>Second</summary>"
|
||||
assert _extract_summary_tags(text) == "First"
|
||||
|
||||
|
||||
class TestSplitMessagesIntoChunks:
|
||||
"""Tests for _split_messages_into_chunks helper."""
|
||||
|
||||
def test_single_chunk_when_under_limit(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
]
|
||||
chunks = _split_messages_into_chunks(messages, max_tokens=1000)
|
||||
assert len(chunks) == 1
|
||||
assert len(chunks[0]) == 2
|
||||
|
||||
def test_splits_at_message_boundaries(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "user", "content": "A" * 100}, # ~25 tokens
|
||||
{"role": "assistant", "content": "B" * 100}, # ~25 tokens
|
||||
{"role": "user", "content": "C" * 100}, # ~25 tokens
|
||||
]
|
||||
# max_tokens=30 should cause splits
|
||||
chunks = _split_messages_into_chunks(messages, max_tokens=30)
|
||||
assert len(chunks) == 3
|
||||
|
||||
def test_excludes_system_messages(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "system", "content": "System prompt"},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
chunks = _split_messages_into_chunks(messages, max_tokens=1000)
|
||||
assert len(chunks) == 1
|
||||
# The system message should not be in any chunk
|
||||
for chunk in chunks:
|
||||
for msg in chunk:
|
||||
assert msg.get("role") != "system"
|
||||
|
||||
def test_empty_messages(self) -> None:
|
||||
chunks = _split_messages_into_chunks([], max_tokens=1000)
|
||||
assert chunks == []
|
||||
|
||||
def test_only_system_messages(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "system", "content": "System prompt"},
|
||||
]
|
||||
chunks = _split_messages_into_chunks(messages, max_tokens=1000)
|
||||
assert chunks == []
|
||||
|
||||
def test_handles_none_content(self) -> None:
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "assistant", "content": None},
|
||||
{"role": "user", "content": "Follow up"},
|
||||
]
|
||||
chunks = _split_messages_into_chunks(messages, max_tokens=1000)
|
||||
assert len(chunks) == 1
|
||||
assert len(chunks[0]) == 2
|
||||
|
||||
|
||||
class TestEstimateTokenCount:
|
||||
"""Tests for _estimate_token_count helper."""
|
||||
|
||||
def test_empty_string(self) -> None:
|
||||
assert _estimate_token_count("") == 0
|
||||
|
||||
def test_short_string(self) -> None:
|
||||
assert _estimate_token_count("hello") == 1 # 5 // 4 = 1
|
||||
|
||||
def test_longer_string(self) -> None:
|
||||
assert _estimate_token_count("a" * 100) == 25 # 100 // 4 = 25
|
||||
|
||||
def test_approximation_is_conservative(self) -> None:
|
||||
# For English text, actual token count is typically lower than char/4
|
||||
text = "The quick brown fox jumps over the lazy dog."
|
||||
estimated = _estimate_token_count(text)
|
||||
assert estimated > 0
|
||||
assert estimated == len(text) // 4
|
||||
|
||||
|
||||
class TestParallelSummarization:
|
||||
"""Tests for parallel chunk summarization via asyncio."""
|
||||
|
||||
def _make_messages_for_n_chunks(self, n: int) -> list[dict[str, Any]]:
|
||||
"""Build a message list that will produce exactly *n* chunks.
|
||||
|
||||
Each message has 400 chars (~100 tokens). With max_tokens=100 returned
|
||||
by the mock LLM, each message lands in its own chunk.
|
||||
"""
|
||||
msgs: list[dict[str, Any]] = []
|
||||
for i in range(n):
|
||||
msgs.append({"role": "user", "content": f"msg-{i} " + "x" * 400})
|
||||
return msgs
|
||||
|
||||
def test_multiple_chunks_use_acall(self) -> None:
|
||||
"""When there are multiple chunks, summarize_messages should use
|
||||
llm.acall (parallel) instead of llm.call (sequential)."""
|
||||
messages = self._make_messages_for_n_chunks(3)
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 100 # force multiple chunks
|
||||
mock_llm.acall = AsyncMock(
|
||||
side_effect=[
|
||||
"<summary>Summary chunk 1</summary>",
|
||||
"<summary>Summary chunk 2</summary>",
|
||||
"<summary>Summary chunk 3</summary>",
|
||||
]
|
||||
)
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
# acall should have been awaited once per chunk
|
||||
assert mock_llm.acall.await_count == 3
|
||||
# sync call should NOT have been used for chunk summarization
|
||||
mock_llm.call.assert_not_called()
|
||||
|
||||
def test_single_chunk_uses_sync_call(self) -> None:
|
||||
"""When there is only one chunk, summarize_messages should use
|
||||
the sync llm.call path (no async overhead)."""
|
||||
messages: list[dict[str, Any]] = [
|
||||
{"role": "user", "content": "Short message"},
|
||||
{"role": "assistant", "content": "Short reply"},
|
||||
]
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 100_000
|
||||
mock_llm.call.return_value = "<summary>Short summary</summary>"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
mock_llm.call.assert_called_once()
|
||||
|
||||
def test_parallel_results_preserve_order(self) -> None:
|
||||
"""Summaries must appear in the same order as the original chunks,
|
||||
regardless of which async call finishes first."""
|
||||
messages = self._make_messages_for_n_chunks(3)
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 100
|
||||
|
||||
# Simulate varying latencies — chunk 2 finishes before chunk 0
|
||||
async def _delayed_acall(msgs: Any, **kwargs: Any) -> str:
|
||||
user_content = msgs[1]["content"]
|
||||
if "msg-0" in user_content:
|
||||
await asyncio.sleep(0.05)
|
||||
return "<summary>Summary-A</summary>"
|
||||
elif "msg-1" in user_content:
|
||||
return "<summary>Summary-B</summary>" # fastest
|
||||
else:
|
||||
await asyncio.sleep(0.02)
|
||||
return "<summary>Summary-C</summary>"
|
||||
|
||||
mock_llm.acall = _delayed_acall
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
# The final summary message should have A, B, C in order
|
||||
summary_content = messages[-1]["content"]
|
||||
pos_a = summary_content.index("Summary-A")
|
||||
pos_b = summary_content.index("Summary-B")
|
||||
pos_c = summary_content.index("Summary-C")
|
||||
assert pos_a < pos_b < pos_c
|
||||
|
||||
def test_asummarize_chunks_returns_ordered_results(self) -> None:
|
||||
"""Direct test of the async helper _asummarize_chunks."""
|
||||
chunk_a: list[dict[str, Any]] = [{"role": "user", "content": "Chunk A"}]
|
||||
chunk_b: list[dict[str, Any]] = [{"role": "user", "content": "Chunk B"}]
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.acall = AsyncMock(
|
||||
side_effect=[
|
||||
"<summary>Result A</summary>",
|
||||
"<summary>Result B</summary>",
|
||||
]
|
||||
)
|
||||
|
||||
results = asyncio.run(
|
||||
_asummarize_chunks(
|
||||
chunks=[chunk_a, chunk_b],
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
assert results[0]["content"] == "Result A"
|
||||
assert results[1]["content"] == "Result B"
|
||||
|
||||
@patch("crewai.utilities.agent_utils.is_inside_event_loop", return_value=True)
|
||||
def test_works_inside_existing_event_loop(self, _mock_loop: Any) -> None:
|
||||
"""When called from inside a running event loop (e.g. a Flow),
|
||||
the ThreadPoolExecutor fallback should still work."""
|
||||
messages = self._make_messages_for_n_chunks(2)
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.get_context_window_size.return_value = 100
|
||||
mock_llm.acall = AsyncMock(
|
||||
side_effect=[
|
||||
"<summary>Flow summary 1</summary>",
|
||||
"<summary>Flow summary 2</summary>",
|
||||
]
|
||||
)
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=mock_llm,
|
||||
callbacks=[],
|
||||
i18n=_make_mock_i18n(),
|
||||
)
|
||||
|
||||
assert mock_llm.acall.await_count == 2
|
||||
# Verify the merged summary made it into messages
|
||||
assert "Flow summary 1" in messages[-1]["content"]
|
||||
assert "Flow summary 2" in messages[-1]["content"]
|
||||
|
||||
|
||||
def _build_long_conversation() -> list[dict[str, Any]]:
|
||||
"""Build a multi-turn conversation that produces multiple chunks at max_tokens=200.
|
||||
|
||||
Each non-system message is ~100-140 estimated tokens (400-560 chars),
|
||||
so a max_tokens of 200 yields roughly 3 chunks from 6 messages.
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful research assistant.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Tell me about the history of the Python programming language. "
|
||||
"Who created it, when was it first released, and what were the "
|
||||
"main design goals? Please provide a detailed overview covering "
|
||||
"the major milestones from its inception through Python 3."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": (
|
||||
"Python was created by Guido van Rossum and first released in 1991. "
|
||||
"The main design goals were code readability and simplicity. Key milestones: "
|
||||
"Python 1.0 (1994) introduced functional programming tools like lambda and map. "
|
||||
"Python 2.0 (2000) added list comprehensions and garbage collection. "
|
||||
"Python 3.0 (2008) was a major backward-incompatible release that fixed "
|
||||
"fundamental design flaws. Python 2 reached end-of-life in January 2020."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"What about the async/await features? When were they introduced "
|
||||
"and how do they compare to similar features in JavaScript and C#? "
|
||||
"Also explain the Global Interpreter Lock and its implications."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": (
|
||||
"Async/await was introduced in Python 3.5 (PEP 492, 2015). "
|
||||
"Unlike JavaScript which is single-threaded by design, Python's asyncio "
|
||||
"is an opt-in framework. C# introduced async/await in 2012 (C# 5.0) and "
|
||||
"was a major inspiration for Python's implementation. "
|
||||
"The GIL (Global Interpreter Lock) is a mutex that protects access to "
|
||||
"Python objects, preventing multiple threads from executing Python bytecodes "
|
||||
"simultaneously. This means CPU-bound multithreaded programs don't benefit "
|
||||
"from multiple cores. PEP 703 proposes making the GIL optional in CPython."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Explain the Python package ecosystem. How does pip work, what is PyPI, "
|
||||
"and what are virtual environments? Compare pip with conda and uv."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": (
|
||||
"PyPI (Python Package Index) is the official repository hosting 400k+ packages. "
|
||||
"pip is the standard package installer that downloads from PyPI. "
|
||||
"Virtual environments (venv) create isolated Python installations to avoid "
|
||||
"dependency conflicts between projects. conda is a cross-language package manager "
|
||||
"popular in data science that can manage non-Python dependencies. "
|
||||
"uv is a new Rust-based tool that is 10-100x faster than pip and aims to replace "
|
||||
"pip, pip-tools, and virtualenv with a single unified tool."
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class TestParallelSummarizationVCR:
|
||||
"""VCR-backed integration tests for parallel summarization.
|
||||
|
||||
These tests use a real LLM but patch get_context_window_size to force
|
||||
multiple chunks, exercising the asyncio.gather + acall parallel path.
|
||||
|
||||
To record cassettes:
|
||||
PYTEST_VCR_RECORD_MODE=all uv run pytest lib/crewai/tests/utilities/test_agent_utils.py::TestParallelSummarizationVCR -v
|
||||
"""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_parallel_summarize_openai(self) -> None:
|
||||
"""Test that parallel summarization with gpt-4o-mini produces a valid summary."""
|
||||
from crewai.llm import LLM
|
||||
from crewai.utilities.i18n import I18N
|
||||
|
||||
llm = LLM(model="gpt-4o-mini", temperature=0)
|
||||
i18n = I18N()
|
||||
messages = _build_long_conversation()
|
||||
|
||||
original_system = messages[0]["content"]
|
||||
|
||||
# Patch get_context_window_size to return 200 — forces multiple chunks
|
||||
with patch.object(type(llm), "get_context_window_size", return_value=200):
|
||||
# Verify we actually get multiple chunks with this window size
|
||||
non_system = [m for m in messages if m.get("role") != "system"]
|
||||
chunks = _split_messages_into_chunks(non_system, max_tokens=200)
|
||||
assert len(chunks) > 1, f"Expected multiple chunks, got {len(chunks)}"
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=llm,
|
||||
callbacks=[],
|
||||
i18n=i18n,
|
||||
)
|
||||
|
||||
# System message preserved
|
||||
assert messages[0]["role"] == "system"
|
||||
assert messages[0]["content"] == original_system
|
||||
|
||||
# Summary produced as a user message
|
||||
summary_msg = messages[-1]
|
||||
assert summary_msg["role"] == "user"
|
||||
assert len(summary_msg["content"]) > 0
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_parallel_summarize_preserves_files(self) -> None:
|
||||
"""Test that file references survive parallel summarization."""
|
||||
from crewai.llm import LLM
|
||||
from crewai.utilities.i18n import I18N
|
||||
|
||||
llm = LLM(model="gpt-4o-mini", temperature=0)
|
||||
i18n = I18N()
|
||||
messages = _build_long_conversation()
|
||||
|
||||
mock_file = MagicMock()
|
||||
messages[1]["files"] = {"report.pdf": mock_file}
|
||||
|
||||
with patch.object(type(llm), "get_context_window_size", return_value=200):
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=llm,
|
||||
callbacks=[],
|
||||
i18n=i18n,
|
||||
)
|
||||
|
||||
summary_msg = messages[-1]
|
||||
assert summary_msg["role"] == "user"
|
||||
assert "files" in summary_msg
|
||||
assert "report.pdf" in summary_msg["files"]
|
||||
|
||||
284
lib/crewai/tests/utilities/test_summarize_integration.py
Normal file
284
lib/crewai/tests/utilities/test_summarize_integration.py
Normal file
@@ -0,0 +1,284 @@
|
||||
"""
|
||||
Integration tests for structured context compaction (summarize_messages).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.crew import Crew
|
||||
from crewai.llm import LLM
|
||||
from crewai.task import Task
|
||||
from crewai.utilities.agent_utils import summarize_messages
|
||||
from crewai.utilities.i18n import I18N
|
||||
|
||||
|
||||
def _build_conversation_messages(
|
||||
*, include_system: bool = True, include_files: bool = False
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Build a realistic multi-turn conversation for summarization tests."""
|
||||
messages: list[dict[str, Any]] = []
|
||||
|
||||
if include_system:
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a research assistant specializing in AI topics. "
|
||||
"Your goal is to find accurate, up-to-date information."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
user_msg: dict[str, Any] = {
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Research the latest developments in large language models. "
|
||||
"Focus on architecture improvements and training techniques."
|
||||
),
|
||||
}
|
||||
if include_files:
|
||||
user_msg["files"] = {"reference.pdf": MagicMock()}
|
||||
messages.append(user_msg)
|
||||
|
||||
messages.append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": (
|
||||
"I'll research the latest developments in large language models. "
|
||||
"Based on my knowledge, recent advances include:\n"
|
||||
"1. Mixture of Experts (MoE) architectures\n"
|
||||
"2. Improved attention mechanisms like Flash Attention\n"
|
||||
"3. Better training data curation techniques\n"
|
||||
"4. Constitutional AI and RLHF improvements"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
messages.append(
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Can you go deeper on the MoE architectures? What are the key papers?",
|
||||
}
|
||||
)
|
||||
|
||||
messages.append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": (
|
||||
"Key papers on Mixture of Experts:\n"
|
||||
"- Switch Transformers (Google, 2021) - simplified MoE routing\n"
|
||||
"- GShard - scaling to 600B parameters\n"
|
||||
"- Mixtral (Mistral AI) - open-source MoE model\n"
|
||||
"The main advantage is computational efficiency: "
|
||||
"only a subset of experts is activated per token."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
class TestSummarizeDirectOpenAI:
|
||||
"""Test direct summarize_messages calls with OpenAI."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_summarize_direct_openai(self) -> None:
|
||||
"""Test summarize_messages with gpt-4o-mini preserves system messages."""
|
||||
llm = LLM(model="gpt-4o-mini", temperature=0)
|
||||
i18n = I18N()
|
||||
messages = _build_conversation_messages(include_system=True)
|
||||
|
||||
original_system_content = messages[0]["content"]
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=llm,
|
||||
callbacks=[],
|
||||
i18n=i18n,
|
||||
)
|
||||
|
||||
# System message should be preserved
|
||||
assert len(messages) >= 2
|
||||
assert messages[0]["role"] == "system"
|
||||
assert messages[0]["content"] == original_system_content
|
||||
|
||||
# Summary should be a user message with <summary> block
|
||||
summary_msg = messages[-1]
|
||||
assert summary_msg["role"] == "user"
|
||||
assert len(summary_msg["content"]) > 0
|
||||
assert "<summary>" in summary_msg["content"]
|
||||
assert "</summary>" in summary_msg["content"]
|
||||
|
||||
|
||||
class TestSummarizeDirectAnthropic:
|
||||
"""Test direct summarize_messages calls with Anthropic."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_summarize_direct_anthropic(self) -> None:
|
||||
"""Test summarize_messages with claude-3-5-haiku."""
|
||||
llm = LLM(model="anthropic/claude-3-5-haiku-latest", temperature=0)
|
||||
i18n = I18N()
|
||||
messages = _build_conversation_messages(include_system=True)
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=llm,
|
||||
callbacks=[],
|
||||
i18n=i18n,
|
||||
)
|
||||
|
||||
assert len(messages) >= 2
|
||||
assert messages[0]["role"] == "system"
|
||||
summary_msg = messages[-1]
|
||||
assert summary_msg["role"] == "user"
|
||||
assert len(summary_msg["content"]) > 0
|
||||
assert "<summary>" in summary_msg["content"]
|
||||
assert "</summary>" in summary_msg["content"]
|
||||
|
||||
|
||||
class TestSummarizeDirectGemini:
|
||||
"""Test direct summarize_messages calls with Gemini."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_summarize_direct_gemini(self) -> None:
|
||||
"""Test summarize_messages with gemini-2.0-flash."""
|
||||
llm = LLM(model="gemini/gemini-2.0-flash", temperature=0)
|
||||
i18n = I18N()
|
||||
messages = _build_conversation_messages(include_system=True)
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=llm,
|
||||
callbacks=[],
|
||||
i18n=i18n,
|
||||
)
|
||||
|
||||
assert len(messages) >= 2
|
||||
assert messages[0]["role"] == "system"
|
||||
summary_msg = messages[-1]
|
||||
assert summary_msg["role"] == "user"
|
||||
assert len(summary_msg["content"]) > 0
|
||||
assert "<summary>" in summary_msg["content"]
|
||||
assert "</summary>" in summary_msg["content"]
|
||||
|
||||
|
||||
class TestSummarizeDirectAzure:
|
||||
"""Test direct summarize_messages calls with Azure."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_summarize_direct_azure(self) -> None:
|
||||
"""Test summarize_messages with azure/gpt-4o-mini."""
|
||||
llm = LLM(model="azure/gpt-4o-mini", temperature=0)
|
||||
i18n = I18N()
|
||||
messages = _build_conversation_messages(include_system=True)
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=llm,
|
||||
callbacks=[],
|
||||
i18n=i18n,
|
||||
)
|
||||
|
||||
assert len(messages) >= 2
|
||||
assert messages[0]["role"] == "system"
|
||||
summary_msg = messages[-1]
|
||||
assert summary_msg["role"] == "user"
|
||||
assert len(summary_msg["content"]) > 0
|
||||
assert "<summary>" in summary_msg["content"]
|
||||
assert "</summary>" in summary_msg["content"]
|
||||
|
||||
|
||||
class TestCrewKickoffCompaction:
|
||||
"""Test compaction triggered via Crew.kickoff() with small context window."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_crew_kickoff_compaction_openai(self) -> None:
|
||||
"""Test that compaction is triggered during kickoff with small context_window_size."""
|
||||
llm = LLM(model="gpt-4o-mini", temperature=0)
|
||||
# Force a very small context window to trigger compaction
|
||||
llm.context_window_size = 500
|
||||
|
||||
agent = Agent(
|
||||
role="Researcher",
|
||||
goal="Find information about Python programming",
|
||||
backstory="You are an expert researcher.",
|
||||
llm=llm,
|
||||
verbose=False,
|
||||
max_iter=2,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="What is Python? Give a brief answer.",
|
||||
expected_output="A short description of Python.",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task], verbose=False)
|
||||
|
||||
# This may or may not trigger compaction depending on actual response sizes.
|
||||
# The test verifies the code path doesn't crash.
|
||||
result = crew.kickoff()
|
||||
assert result is not None
|
||||
|
||||
|
||||
class TestAgentExecuteTaskCompaction:
|
||||
"""Test compaction triggered via Agent.execute_task()."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_agent_execute_task_compaction(self) -> None:
|
||||
"""Test that Agent.execute_task() works with small context_window_size."""
|
||||
llm = LLM(model="gpt-4o-mini", temperature=0)
|
||||
llm.context_window_size = 500
|
||||
|
||||
agent = Agent(
|
||||
role="Writer",
|
||||
goal="Write concise content",
|
||||
backstory="You are a skilled writer.",
|
||||
llm=llm,
|
||||
verbose=False,
|
||||
max_iter=2,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Write one sentence about the sun.",
|
||||
expected_output="A single sentence about the sun.",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
result = agent.execute_task(task=task)
|
||||
assert result is not None
|
||||
|
||||
|
||||
class TestSummarizePreservesFiles:
|
||||
"""Test that files are preserved through real summarization."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_summarize_preserves_files_integration(self) -> None:
|
||||
"""Test that file references survive a real summarization call."""
|
||||
llm = LLM(model="gpt-4o-mini", temperature=0)
|
||||
i18n = I18N()
|
||||
messages = _build_conversation_messages(
|
||||
include_system=True, include_files=True
|
||||
)
|
||||
|
||||
summarize_messages(
|
||||
messages=messages,
|
||||
llm=llm,
|
||||
callbacks=[],
|
||||
i18n=i18n,
|
||||
)
|
||||
|
||||
# System message preserved
|
||||
assert messages[0]["role"] == "system"
|
||||
|
||||
# Files should be on the summary message with <summary> block
|
||||
summary_msg = messages[-1]
|
||||
assert "<summary>" in summary_msg["content"]
|
||||
assert "</summary>" in summary_msg["content"]
|
||||
assert "files" in summary_msg
|
||||
assert "reference.pdf" in summary_msg["files"]
|
||||
Reference in New Issue
Block a user