mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-30 23:02:50 +00:00
cassette gen
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are Writer. You are a skilled
|
||||
writer.\nYour personal goal is: Write concise content"},{"role":"user","content":"\nCurrent
|
||||
Task: Write one sentence about the sun.\n\nThis is the expected criteria for
|
||||
your final answer: A single sentence about the sun.\nyou MUST return the actual
|
||||
complete content as the final answer, not a summary.\n\nProvide your complete
|
||||
response:"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '453'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7RxEngFVCbqdc7tNjV3VjeteqcwT\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668124,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"The sun is a massive ball of glowing
|
||||
gas at the center of our solar system, providing light and warmth essential
|
||||
for life on Earth.\",\n \"refusal\": null,\n \"annotations\":
|
||||
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
|
||||
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 78,\n \"completion_tokens\":
|
||||
27,\n \"total_tokens\": 105,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:15:25 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '664'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,120 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are Researcher. You are an
|
||||
expert researcher.\nYour personal goal is: Find information about Python programming"},{"role":"user","content":"\nCurrent
|
||||
Task: What is Python? Give a brief answer.\n\nThis is the expected criteria
|
||||
for your final answer: A short description of Python.\nyou MUST return the actual
|
||||
complete content as the final answer, not a summary.\n\nProvide your complete
|
||||
response:"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '482'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7RxRv3U0LCLf2iqf40wxOQsuiYFR\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668137,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"Python is a high-level, interpreted
|
||||
programming language known for its readability and simplicity. It was created
|
||||
by Guido van Rossum and first released in 1991. Python supports multiple programming
|
||||
paradigms, including procedural, object-oriented, and functional programming.
|
||||
It has a large standard library and is widely used for web development, data
|
||||
analysis, artificial intelligence, scientific computing, and automation, among
|
||||
other applications. Python's syntax emphasizes code readability, allowing
|
||||
developers to express concepts in fewer lines of code compared to other languages.
|
||||
Its active community and extensive ecosystem of libraries and frameworks make
|
||||
it a popular choice for both beginners and experienced programmers.\",\n \"refusal\":
|
||||
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
|
||||
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
82,\n \"completion_tokens\": 123,\n \"total_tokens\": 205,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:15:39 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '2467'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,136 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Analyze the following
|
||||
conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
|
||||
the latest developments in large language models. Focus on architecture improvements
|
||||
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
|
||||
in large language models. Based on my knowledge, recent advances include:\n1.
|
||||
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
|
||||
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
|
||||
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
|
||||
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
|
||||
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
|
||||
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
|
||||
advantage is computational efficiency: only a subset of experts is activated
|
||||
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
|
||||
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
|
||||
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}],"model":"claude-3-5-haiku-latest","stream":false,"system":"You
|
||||
are a precise assistant that creates structured summaries of agent conversations.
|
||||
You preserve critical context needed for seamless task continuation.","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
anthropic-version:
|
||||
- '2023-06-01'
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1870'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.anthropic.com
|
||||
x-api-key:
|
||||
- X-API-KEY-XXX
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 0.73.0
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
x-stainless-timeout:
|
||||
- NOT_GIVEN
|
||||
method: POST
|
||||
uri: https://api.anthropic.com/v1/messages
|
||||
response:
|
||||
body:
|
||||
string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01SK3LP6RedPBmpvD1HfKD23","type":"message","role":"assistant","content":[{"type":"text","text":"<summary>\n1.
|
||||
**Task Overview**:\n- Research latest developments in large language models\n-
|
||||
Focus on architecture improvements and training techniques\n\n2. **Current
|
||||
State**:\n- Initial research completed on broad developments\n- Currently
|
||||
exploring Mixture of Experts (MoE) architectures in depth\n- Detailed discussion
|
||||
of key MoE research papers initiated\n\n3. **Important Discoveries**:\nMoE
|
||||
Architecture Insights:\n- Computational efficiency through selective expert
|
||||
activation\n- Key research papers:\n * Switch Transformers (Google, 2021)\n *
|
||||
GShard\n * Mixtral (Mistral AI)\n- Main benefit: Only subset of experts activated
|
||||
per token\n\n4. **Next Steps**:\n- Conduct deeper analysis of MoE architecture
|
||||
mechanisms\n- Compare routing strategies across different MoE implementations\n-
|
||||
Investigate performance metrics and scalability of MoE models\n\n5. **Context
|
||||
to Preserve**:\n- Research Focus: Large Language Model Architectures\n- Specific
|
||||
Interest: Mixture of Experts (MoE) Architectures\n- Key Researchers/Organizations:
|
||||
Google, Mistral AI\n- Years of Significant Papers: 2021 onwards\n</summary>"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":400,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":270,"service_tier":"standard","inference_geo":"not_available"}}'
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Security-Policy:
|
||||
- CSP-FILTERED
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:18:41 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Robots-Tag:
|
||||
- none
|
||||
anthropic-organization-id:
|
||||
- ANTHROPIC-ORGANIZATION-ID-XXX
|
||||
anthropic-ratelimit-input-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-input-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-input-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
|
||||
anthropic-ratelimit-output-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-output-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-output-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
|
||||
anthropic-ratelimit-requests-limit:
|
||||
- '4000'
|
||||
anthropic-ratelimit-requests-remaining:
|
||||
- '3999'
|
||||
anthropic-ratelimit-requests-reset:
|
||||
- '2026-02-09T20:18:35Z'
|
||||
anthropic-ratelimit-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
request-id:
|
||||
- REQUEST-ID-XXX
|
||||
strict-transport-security:
|
||||
- STS-XXX
|
||||
x-envoy-upstream-service-time:
|
||||
- '5639'
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,103 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"contents": [{"parts": [{"text": "Analyze the following conversation and
|
||||
create a structured summary that preserves all information needed to continue
|
||||
the task seamlessly.\n\n<conversation>\n[USER]: Research the latest developments
|
||||
in large language models. Focus on architecture improvements and training techniques.\n\n[ASSISTANT]:
|
||||
I''ll research the latest developments in large language models. Based on my
|
||||
knowledge, recent advances include:\n1. Mixture of Experts (MoE) architectures\n2.
|
||||
Improved attention mechanisms like Flash Attention\n3. Better training data
|
||||
curation techniques\n4. Constitutional AI and RLHF improvements\n\n[USER]: Can
|
||||
you go deeper on the MoE architectures? What are the key papers?\n\n[ASSISTANT]:
|
||||
Key papers on Mixture of Experts:\n- Switch Transformers (Google, 2021) - simplified
|
||||
MoE routing\n- GShard - scaling to 600B parameters\n- Mixtral (Mistral AI) -
|
||||
open-source MoE model\nThe main advantage is computational efficiency: only
|
||||
a subset of experts is activated per token.\n</conversation>\n\nCreate a summary
|
||||
with these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2.
|
||||
**Current State**: What has been completed so far? What step is the agent on?\n3.
|
||||
**Important Discoveries**: Key facts, data, tool results, or findings that must
|
||||
not be lost.\n4. **Next Steps**: What should the agent do next based on the
|
||||
conversation?\n5. **Context to Preserve**: Any specific values, names, URLs,
|
||||
code snippets, or details referenced in the conversation.\n\nWrap your entire
|
||||
summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>"}],
|
||||
"role": "user"}], "systemInstruction": {"parts": [{"text": "You are a precise
|
||||
assistant that creates structured summaries of agent conversations. You preserve
|
||||
critical context needed for seamless task continuation."}], "role": "user"},
|
||||
"generationConfig": {"temperature": 0.0}}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- '*/*'
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1895'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- generativelanguage.googleapis.com
|
||||
x-goog-api-client:
|
||||
- google-genai-sdk/1.49.0 gl-python/3.13.3
|
||||
x-goog-api-key:
|
||||
- X-GOOG-API-KEY-XXX
|
||||
method: POST
|
||||
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
|
||||
[\n {\n \"text\": \"```xml\\n\\u003csummary\\u003e\\n**Task
|
||||
Overview**: Research the latest developments in large language models, focusing
|
||||
on architecture improvements and training techniques.\\n\\n**Current State**:
|
||||
The agent has identified several key areas of advancement in LLMs: Mixture
|
||||
of Experts (MoE) architectures, improved attention mechanisms (Flash Attention),
|
||||
better training data curation, and Constitutional AI/RLHF improvements. The
|
||||
user has requested a deeper dive into MoE architectures. The agent has provided
|
||||
an initial overview of MoE architectures and listed some key papers.\\n\\n**Important
|
||||
Discoveries**:\\n* Key MoE papers: Switch Transformers (Google, 2021), GShard,
|
||||
Mixtral (Mistral AI).\\n* MoE advantage: Computational efficiency through
|
||||
selective activation of experts.\\n\\n**Next Steps**: Continue researching
|
||||
MoE architectures based on the user's request for more detail. The agent should
|
||||
elaborate further on the listed papers and potentially find more recent or
|
||||
relevant publications.\\n\\n**Context to Preserve**:\\n* Focus areas: Architecture
|
||||
improvements and training techniques for LLMs.\\n* Specific architectures:
|
||||
Mixture of Experts (MoE), Flash Attention.\\n* Training techniques: Data
|
||||
curation, Constitutional AI, RLHF.\\n* Specific papers: Switch Transformers
|
||||
(Google, 2021), GShard, Mixtral (Mistral AI).\\n\\u003c/summary\\u003e\\n```\"\n
|
||||
\ }\n ],\n \"role\": \"model\"\n },\n \"finishReason\":
|
||||
\"STOP\",\n \"avgLogprobs\": -0.14186729703630721\n }\n ],\n \"usageMetadata\":
|
||||
{\n \"promptTokenCount\": 373,\n \"candidatesTokenCount\": 280,\n \"totalTokenCount\":
|
||||
653,\n \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n
|
||||
\ \"tokenCount\": 373\n }\n ],\n \"candidatesTokensDetails\":
|
||||
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 280\n
|
||||
\ }\n ]\n },\n \"modelVersion\": \"gemini-2.0-flash\",\n \"responseId\":
|
||||
\"GEGKabP3OcGH-8YPzZCj2Ao\"\n}\n"
|
||||
headers:
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
||||
Content-Type:
|
||||
- application/json; charset=UTF-8
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:18:35 GMT
|
||||
Server:
|
||||
- scaffolding on HTTPServer2
|
||||
Server-Timing:
|
||||
- gfet4t7; dur=2310
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
Vary:
|
||||
- Origin
|
||||
- X-Origin
|
||||
- Referer
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
X-Frame-Options:
|
||||
- X-FRAME-OPTIONS-XXX
|
||||
X-XSS-Protection:
|
||||
- '0'
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,148 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
|
||||
the latest developments in large language models. Focus on architecture improvements
|
||||
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
|
||||
in large language models. Based on my knowledge, recent advances include:\n1.
|
||||
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
|
||||
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
|
||||
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
|
||||
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
|
||||
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
|
||||
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
|
||||
advantage is computational efficiency: only a subset of experts is activated
|
||||
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
|
||||
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
|
||||
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1844'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7RxGISdQet8JsWImiwzHQ2S9gSD4\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668126,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
agent is tasked with researching the latest developments in large language
|
||||
models, specifically focusing on architecture improvements and training techniques.\\n\\n2.
|
||||
**Current State**: The agent has identified several recent advances in large
|
||||
language models, including Mixture of Experts (MoE) architectures, improved
|
||||
attention mechanisms, better training data curation techniques, and advancements
|
||||
in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
|
||||
**Important Discoveries**: \\n - Recent advances in large language models
|
||||
include:\\n 1. Mixture of Experts (MoE) architectures\\n 2. Improved
|
||||
attention mechanisms like Flash Attention\\n 3. Better training data curation
|
||||
techniques\\n 4. Constitutional AI and RLHF improvements\\n - Key papers
|
||||
on Mixture of Experts:\\n - Switch Transformers (Google, 2021) - simplified
|
||||
MoE routing\\n - GShard - scaling to 600B parameters\\n - Mixtral
|
||||
(Mistral AI) - open-source MoE model\\n - The main advantage of MoE architectures
|
||||
is computational efficiency, as only a subset of experts is activated per
|
||||
token.\\n\\n4. **Next Steps**: The agent should delve deeper into the Mixture
|
||||
of Experts architectures, reviewing the key papers mentioned and summarizing
|
||||
their contributions and implications for large language models.\\n\\n5. **Context
|
||||
to Preserve**: \\n - Key papers: \\n - Switch Transformers (Google,
|
||||
2021)\\n - GShard\\n - Mixtral (Mistral AI)\\n - Focus on computational
|
||||
efficiency of MoE architectures.\\n</summary>\",\n \"refusal\": null,\n
|
||||
\ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\":
|
||||
\"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 368,\n \"completion_tokens\":
|
||||
328,\n \"total_tokens\": 696,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:15:32 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '5395'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,145 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"system","content":"You are a precise assistant that
|
||||
creates structured summaries of agent conversations. You preserve critical context
|
||||
needed for seamless task continuation."},{"role":"user","content":"Analyze the
|
||||
following conversation and create a structured summary that preserves all information
|
||||
needed to continue the task seamlessly.\n\n<conversation>\n[USER]: Research
|
||||
the latest developments in large language models. Focus on architecture improvements
|
||||
and training techniques.\n\n[ASSISTANT]: I''ll research the latest developments
|
||||
in large language models. Based on my knowledge, recent advances include:\n1.
|
||||
Mixture of Experts (MoE) architectures\n2. Improved attention mechanisms like
|
||||
Flash Attention\n3. Better training data curation techniques\n4. Constitutional
|
||||
AI and RLHF improvements\n\n[USER]: Can you go deeper on the MoE architectures?
|
||||
What are the key papers?\n\n[ASSISTANT]: Key papers on Mixture of Experts:\n-
|
||||
Switch Transformers (Google, 2021) - simplified MoE routing\n- GShard - scaling
|
||||
to 600B parameters\n- Mixtral (Mistral AI) - open-source MoE model\nThe main
|
||||
advantage is computational efficiency: only a subset of experts is activated
|
||||
per token.\n</conversation>\n\nCreate a summary with these sections:\n1. **Task
|
||||
Overview**: What is the agent trying to accomplish?\n2. **Current State**: What
|
||||
has been completed so far? What step is the agent on?\n3. **Important Discoveries**:
|
||||
Key facts, data, tool results, or findings that must not be lost.\n4. **Next
|
||||
Steps**: What should the agent do next based on the conversation?\n5. **Context
|
||||
to Preserve**: Any specific values, names, URLs, code snippets, or details referenced
|
||||
in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your
|
||||
structured summary here]\n</summary>"}],"model":"gpt-4o-mini","temperature":0}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1844'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.3
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-D7RxM4n36QoACHrC0QocV1pXIwvtD\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1770668132,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"<summary>\\n1. **Task Overview**: The
|
||||
user has requested research on the latest developments in large language models,
|
||||
specifically focusing on architecture improvements and training techniques.\\n\\n2.
|
||||
**Current State**: The assistant has identified several recent advances in
|
||||
large language models, including Mixture of Experts (MoE) architectures, improved
|
||||
attention mechanisms, better training data curation techniques, and advancements
|
||||
in Constitutional AI and Reinforcement Learning from Human Feedback (RLHF).\\n\\n3.
|
||||
**Important Discoveries**: \\n - Key papers on Mixture of Experts (MoE)
|
||||
architectures:\\n - \\\"Switch Transformers\\\" (Google, 2021) - simplified
|
||||
MoE routing.\\n - \\\"GShard\\\" - scaling to 600B parameters.\\n -
|
||||
\\\"Mixtral\\\" (Mistral AI) - open-source MoE model.\\n - The main advantage
|
||||
of MoE architectures is computational efficiency, as only a subset of experts
|
||||
is activated per token.\\n\\n4. **Next Steps**: The assistant should delve
|
||||
deeper into the Mixture of Experts architectures, potentially summarizing
|
||||
the findings from the key papers mentioned.\\n\\n5. **Context to Preserve**:
|
||||
\\n - Key papers: \\\"Switch Transformers,\\\" \\\"GShard,\\\" \\\"Mixtral.\\\"\\n
|
||||
\ - Notable organizations: Google, Mistral AI.\\n - Focus areas: MoE architectures,
|
||||
computational efficiency.\\n</summary>\",\n \"refusal\": null,\n \"annotations\":
|
||||
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
|
||||
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 368,\n \"completion_tokens\":
|
||||
275,\n \"total_tokens\": 643,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 09 Feb 2026 20:15:36 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '4188'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
Reference in New Issue
Block a user