cassette gen

This commit is contained in:
lorenzejay
2026-02-09 12:23:20 -08:00
parent 0524d0454d
commit 687b1a1e85
6 changed files with 765 additions and 0 deletions

View File

@@ -0,0 +1,113 @@
interactions:
- request:
body: '{"messages":[{"role":"system","content":"You are Writer. You are a skilled
writer.\nYour personal goal is: Write concise content"},{"role":"user","content":"\nCurrent
Task: Write one sentence about the sun.\n\nThis is the expected criteria for
your final answer: A single sentence about the sun.\nyou MUST return the actual
complete content as the final answer, not a summary.\n\nProvide your complete
response:"}],"model":"gpt-4o-mini","temperature":0}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '453'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D7RxEngFVCbqdc7tNjV3VjeteqcwT\",\n \"object\":
\"chat.completion\",\n \"created\": 1770668124,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"The sun is a massive ball of glowing
gas at the center of our solar system, providing light and warmth essential
for life on Earth.\",\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 78,\n \"completion_tokens\":
27,\n \"total_tokens\": 105,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Mon, 09 Feb 2026 20:15:25 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '664'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
set-cookie:
- SET-COOKIE-XXX
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,120 @@
interactions:
- request:
body: '{"messages":[{"role":"system","content":"You are Researcher. You are an
expert researcher.\nYour personal goal is: Find information about Python programming"},{"role":"user","content":"\nCurrent
Task: What is Python? Give a brief answer.\n\nThis is the expected criteria
for your final answer: A short description of Python.\nyou MUST return the actual
complete content as the final answer, not a summary.\n\nProvide your complete
response:"}],"model":"gpt-4o-mini","temperature":0}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '482'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D7RxRv3U0LCLf2iqf40wxOQsuiYFR\",\n \"object\":
\"chat.completion\",\n \"created\": 1770668137,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"Python is a high-level, interpreted
programming language known for its readability and simplicity. It was created
by Guido van Rossum and first released in 1991. Python supports multiple programming
paradigms, including procedural, object-oriented, and functional programming.
It has a large standard library and is widely used for web development, data
analysis, artificial intelligence, scientific computing, and automation, among
other applications. Python's syntax emphasizes code readability, allowing
developers to express concepts in fewer lines of code compared to other languages.
Its active community and extensive ecosystem of libraries and frameworks make
it a popular choice for both beginners and experienced programmers.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
82,\n \"completion_tokens\": 123,\n \"total_tokens\": 205,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Mon, 09 Feb 2026 20:15:39 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '2467'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
set-cookie:
- SET-COOKIE-XXX
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,136 @@
interactions:
- request:
body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Analyze the following
conversation and create a structured summary that preserves all information
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
the latest developments in large language models. Focus on architecture improvements
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
in large language models. Based on my knowledge, recent advances include:\n1.
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
advantage is computational efficiency: only a subset of experts is activated
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
Steps**: What should the agent do next based on the conversation?\n5. **Context
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
structured summary here]\n</summary>"}],"model":"claude-3-5-haiku-latest","stream":false,"system":"You
are a precise assistant that creates structured summaries of agent conversations.
You preserve critical context needed for seamless task continuation.","temperature":0}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '1870'
content-type:
- application/json
host:
- api.anthropic.com
x-api-key:
- X-API-KEY-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.73.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01SK3LP6RedPBmpvD1HfKD23","type":"message","role":"assistant","content":[{"type":"text","text":"<summary>\n1.
**Task Overview**:\n- Research latest developments in large language models\n-
Focus on architecture improvements and training techniques\n\n2. **Current
State**:\n- Initial research completed on broad developments\n- Currently
exploring Mixture of Experts (MoE) architectures in depth\n- Detailed discussion
of key MoE research papers initiated\n\n3. **Important Discoveries**:\nMoE
Architecture Insights:\n- Computational efficiency through selective expert
activation\n- Key research papers:\n * Switch Transformers (Google, 2021)\n *
GShard\n * Mixtral (Mistral AI)\n- Main benefit: Only subset of experts activated
per token\n\n4. **Next Steps**:\n- Conduct deeper analysis of MoE architecture
mechanisms\n- Compare routing strategies across different MoE implementations\n-
Investigate performance metrics and scalability of MoE models\n\n5. **Context
to Preserve**:\n- Research Focus: Large Language Model Architectures\n- Specific
Interest: Mixture of Experts (MoE) Architectures\n- Key Researchers/Organizations:
Google, Mistral AI\n- Years of Significant Papers: 2021 onwards\n</summary>"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":400,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":270,"service_tier":"standard","inference_geo":"not_available"}}'
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Security-Policy:
- CSP-FILTERED
Content-Type:
- application/json
Date:
- Mon, 09 Feb 2026 20:18:41 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-02-09T20:18:35Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '5639'
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,103 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "Analyze the following conversation and
create a structured summary that preserves all information needed to continue
the task seamlessly.\n\n<conversation>\n[USER]: Research the latest developments
in large language models. Focus on architecture improvements and training techniques.\n\n[ASSISTANT]:
I''ll research the latest developments in large language models. Based on my
knowledge, recent advances include:\n1. Mixture of Experts (MoE) architectures\n2.
Improved attention mechanisms like Flash Attention\n3. Better training data
curation techniques\n4. Constitutional AI and RLHF improvements\n\n[USER]: Can
you go deeper on the MoE architectures? What are the key papers?\n\n[ASSISTANT]:
Key papers on Mixture of Experts:\n- Switch Transformers (Google, 2021) - simplified
MoE routing\n- GShard - scaling to 600B parameters\n- Mixtral (Mistral AI) -
open-source MoE model\nThe main advantage is computational efficiency: only
a subset of experts is activated per token.\n</conversation>\n\nCreate a summary
with these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
**Current State**: What has been completed so far? What step is the agent on?\n3.
**Important Discoveries**: Key facts, data, tool results, or findings that must
not be lost.\n4. **Next Steps**: What should the agent do next based on the
conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
code snippets, or details referenced in the conversation.\n\nWrap your entire
summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>"}],
"role": "user"}], "systemInstruction": {"parts": [{"text": "You are a precise
assistant that creates structured summaries of agent conversations. You preserve
critical context needed for seamless task continuation."}], "role": "user"},
"generationConfig": {"temperature": 0.0}}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- '*/*'
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
content-length:
- '1895'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
x-goog-api-client:
- google-genai-sdk/1.49.0 gl-python/3.13.3
x-goog-api-key:
- X-GOOG-API-KEY-XXX
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
response:
body:
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
[\n {\n \"text\": \"```xml\\n\\u003csummary\\u003e\\n**Task
Overview**: Research the latest developments in large language models, focusing
on architecture improvements and training techniques.\\n\\n**Current State**:
The agent has identified several key areas of advancement in LLMs: Mixture
of Experts (MoE) architectures, improved attention mechanisms (Flash Attention),
better training data curation, and Constitutional AI/RLHF improvements. The
user has requested a deeper dive into MoE architectures. The agent has provided
an initial overview of MoE architectures and listed some key papers.\\n\\n**Important
Discoveries**:\\n* Key MoE papers: Switch Transformers (Google, 2021), GShard,
Mixtral (Mistral AI).\\n* MoE advantage: Computational efficiency through
selective activation of experts.\\n\\n**Next Steps**: Continue researching
MoE architectures based on the user's request for more detail. The agent should
elaborate further on the listed papers and potentially find more recent or
relevant publications.\\n\\n**Context to Preserve**:\\n* Focus areas: Architecture
improvements and training techniques for LLMs.\\n* Specific architectures:
Mixture of Experts (MoE), Flash Attention.\\n* Training techniques: Data
curation, Constitutional AI, RLHF.\\n* Specific papers: Switch Transformers
(Google, 2021), GShard, Mixtral (Mistral AI).\\n\\u003c/summary\\u003e\\n```\"\n
\ }\n ],\n \"role\": \"model\"\n },\n \"finishReason\":
\"STOP\",\n \"avgLogprobs\": -0.14186729703630721\n }\n ],\n \"usageMetadata\":
{\n \"promptTokenCount\": 373,\n \"candidatesTokenCount\": 280,\n \"totalTokenCount\":
653,\n \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n
\ \"tokenCount\": 373\n }\n ],\n \"candidatesTokensDetails\":
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 280\n
\ }\n ]\n },\n \"modelVersion\": \"gemini-2.0-flash\",\n \"responseId\":
\"GEGKabP3OcGH-8YPzZCj2Ao\"\n}\n"
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Type:
- application/json; charset=UTF-8
Date:
- Mon, 09 Feb 2026 20:18:35 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=2310
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
X-Frame-Options:
- X-FRAME-OPTIONS-XXX
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,148 @@
interactions:
- request:
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
creates structured summaries of agent conversations. You preserve critical context
needed for seamless task continuation."},{"role":"user","content":"Analyze the
following conversation and create a structured summary that preserves all information
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
the latest developments in large language models. Focus on architecture improvements
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
in large language models. Based on my knowledge, recent advances include:\n1.
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
advantage is computational efficiency: only a subset of experts is activated
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
Steps**: What should the agent do next based on the conversation?\n5. **Context
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '1844'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D7RxGISdQet8JsWImiwzHQ2S9gSD4\",\n \"object\":
\"chat.completion\",\n \"created\": 1770668126,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
agent is tasked with researching the latest developments in large language
models, specifically focusing on architecture improvements and training techniques.\\n\\n2.
**Current State**: The agent has identified several recent advances in large
language models, including Mixture of Experts (MoE) architectures, improved
attention mechanisms, better training data curation techniques, and advancements
in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
**Important Discoveries**: \\n - Recent advances in large language models
include:\\n 1. Mixture of Experts (MoE) architectures\\n 2. Improved
attention mechanisms like Flash Attention\\n 3. Better training data curation
techniques\\n 4. Constitutional AI and RLHF improvements\\n - Key papers
on Mixture of Experts:\\n - Switch Transformers (Google, 2021) - simplified
MoE routing\\n - GShard - scaling to 600B parameters\\n - Mixtral
(Mistral AI) - open-source MoE model\\n - The main advantage of MoE architectures
is computational efficiency, as only a subset of experts is activated per
token.\\n\\n4. **Next Steps**: The agent should delve deeper into the Mixture
of Experts architectures, reviewing the key papers mentioned and summarizing
their contributions and implications for large language models.\\n\\n5. **Context
to Preserve**: \\n - Key papers: \\n - Switch Transformers (Google,
2021)\\n - GShard\\n - Mixtral (Mistral AI)\\n - Focus on computational
efficiency of MoE architectures.\\n</summary>\",\n \"refusal\": null,\n
\ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\":
\"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 368,\n \"completion_tokens\":
328,\n \"total_tokens\": 696,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Mon, 09 Feb 2026 20:15:32 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '5395'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
set-cookie:
- SET-COOKIE-XXX
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,145 @@
interactions:
- request:
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
creates structured summaries of agent conversations. You preserve critical context
needed for seamless task continuation."},{"role":"user","content":"Analyze the
following conversation and create a structured summary that preserves all information
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
the latest developments in large language models. Focus on architecture improvements
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
in large language models. Based on my knowledge, recent advances include:\n1.
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
advantage is computational efficiency: only a subset of experts is activated
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
Steps**: What should the agent do next based on the conversation?\n5. **Context
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '1844'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D7RxM4n36QoACHrC0QocV1pXIwvtD\",\n \"object\":
\"chat.completion\",\n \"created\": 1770668132,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
user has requested research on the latest developments in large language models,
specifically focusing on architecture improvements and training techniques.\\n\\n2.
**Current State**: The assistant has identified several recent advances in
large language models, including Mixture of Experts (MoE) architectures, improved
attention mechanisms, better training data curation techniques, and advancements
in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
**Important Discoveries**: \\n - Key papers on Mixture of Experts (MoE)
architectures:\\n - \\\"Switch Transformers\\\" (Google, 2021) - simplified
MoE routing.\\n - \\\"GShard\\\" - scaling to 600B parameters.\\n -
\\\"Mixtral\\\" (Mistral AI) - open-source MoE model.\\n - The main advantage
of MoE architectures is computational efficiency, as only a subset of experts
is activated per token.\\n\\n4. **Next Steps**: The assistant should delve
deeper into the Mixture of Experts architectures, potentially summarizing
the findings from the key papers mentioned.\\n\\n5. **Context to Preserve**:
\\n - Key papers: \\\"Switch Transformers,\\\" \\\"GShard,\\\" \\\"Mixtral.\\\"\\n
\ - Notable organizations: Google, Mistral AI.\\n - Focus areas: MoE architectures,
computational efficiency.\\n</summary>\",\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 368,\n \"completion_tokens\":
275,\n \"total_tokens\": 643,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Mon, 09 Feb 2026 20:15:36 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '4188'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
set-cookie:
- SET-COOKIE-XXX
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1