refactor: enhance final answer synthesis in AgentExecutor

This commit improves the synthesis of final answers in the AgentExecutor class by implementing a more coherent approach to combining results from multiple todo items. The method now utilizes a single LLM call to generate a polished response, falling back to concatenation if the synthesis fails. Additionally, the test cases have been updated to reflect the changes in planning and execution, ensuring that the results are properly validated and that the plan-and-execute architecture is functioning as intended.
This commit is contained in:
lorenzejay
2026-02-06 15:39:04 -08:00
parent 9f3c53ca97
commit ff57956d05
4 changed files with 8894 additions and 0 deletions

View File

@@ -721,3 +721,116 @@ class TestAgentExecutorPlanning:
# Plan should exist
assert captured_plan[0] is not None
class TestResponseFormatWithKickoff:
"""Test that Agent.kickoff(response_format=MyModel) returns structured output.
Real LLM calls via VCR cassettes. Tests both with and without planning,
using real tools for the planning case to exercise the full Plan-and-Execute
path including synthesis with response_model.
"""
@pytest.mark.vcr()
def test_kickoff_response_format_without_planning(self):
"""Test that kickoff(response_format) returns structured output without planning."""
from pydantic import BaseModel, Field
from crewai import Agent
from crewai.llm import LLM
class MathResult(BaseModel):
answer: int = Field(description="The numeric answer")
explanation: str = Field(description="Brief explanation of the solution")
llm = LLM("gpt-4o-mini")
agent = Agent(
role="Math Assistant",
goal="Solve math problems and return structured results",
backstory="A precise math assistant that always returns structured data",
llm=llm,
verbose=False,
)
result = agent.kickoff("What is 15 + 27?", response_format=MathResult)
assert result is not None
assert result.pydantic is not None
assert isinstance(result.pydantic, MathResult)
assert result.pydantic.answer == 42
assert len(result.pydantic.explanation) > 0
@pytest.mark.vcr()
def test_kickoff_response_format_with_planning_and_tools(self):
"""Test response_format with planning + tools (multi-step research).
This is the key test for _synthesize_final_answer_from_todos:
1. Planning generates steps that use the EXA search tool
2. StepExecutor runs each step in isolation with tool calls
3. The synthesis step produces a structured BaseModel output
The response_format should be respected by the synthesis LLM call,
NOT by intermediate step executions.
"""
from pydantic import BaseModel, Field
from crewai import Agent, PlanningConfig
from crewai.llm import LLM
from crewai_tools import EXASearchTool
class ResearchSummary(BaseModel):
topic: str = Field(description="The research topic")
key_findings: list[str] = Field(description="List of 3-5 key findings")
conclusion: str = Field(description="A brief conclusion paragraph")
llm = LLM("gpt-4o-mini")
exa = EXASearchTool()
agent = Agent(
role="Research Analyst",
goal="Research topics using search tools and produce structured summaries",
backstory=(
"You are a research analyst who searches the web for information, "
"identifies key findings, and produces structured research summaries."
),
llm=llm,
planning_config=PlanningConfig(max_attempts=1, max_steps=5),
tools=[exa],
verbose=False,
)
result = agent.kickoff(
"Research the current state of autonomous AI agents in 2025. "
"Search for recent developments, then summarize the key findings.",
response_format=ResearchSummary,
)
assert result is not None
# The synthesis step should have produced structured output
assert result.pydantic is not None
assert isinstance(result.pydantic, ResearchSummary)
# Verify the structured fields are populated
assert len(result.pydantic.topic) > 0
assert len(result.pydantic.key_findings) >= 1
assert len(result.pydantic.conclusion) > 0
@pytest.mark.vcr()
def test_kickoff_no_response_format_returns_raw_text(self):
"""Test that kickoff without response_format returns plain text."""
from crewai import Agent
from crewai.llm import LLM
llm = LLM("gpt-4o-mini")
agent = Agent(
role="Math Assistant",
goal="Solve math problems",
backstory="A helpful math assistant",
llm=llm,
verbose=False,
)
result = agent.kickoff("What is 10 + 10?")
assert result is not None
assert result.pydantic is None
assert "20" in str(result)

View File

@@ -0,0 +1,214 @@
interactions:
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
math assistant\nYour personal goal is: Solve math problems"},{"role":"user","content":"\nCurrent
Task: What is 10 + 10?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '255'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D6NfywWXSxBmPTEjnpyaIWqF80Bz8\",\n \"object\":
\"chat.completion\",\n \"created\": 1770413350,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"10 + 10 equals 20.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
46,\n \"completion_tokens\": 8,\n \"total_tokens\": 54,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Fri, 06 Feb 2026 21:29:10 GMT
Server:
- cloudflare
Set-Cookie:
- SET-COOKIE-XXX
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '288'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
math assistant\nYour personal goal is: Solve math problems"},{"role":"user","content":"\nCurrent
Task: What is 10 + 10?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '255'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D6NfyZWuh1ZpzpgkqSDcyZKEhjYnm\",\n \"object\":
\"chat.completion\",\n \"created\": 1770413350,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"10 + 10 equals 20.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
46,\n \"completion_tokens\": 8,\n \"total_tokens\": 54,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Fri, 06 Feb 2026 21:29:11 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '323'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,228 @@
interactions:
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A precise
math assistant that always returns structured data\nYour personal goal is: Solve
math problems and return structured results"},{"role":"user","content":"\nCurrent
Task: What is 15 + 27?\n\nProvide your complete response:"}],"model":"gpt-4o-mini","response_format":{"type":"json_schema","json_schema":{"schema":{"properties":{"answer":{"description":"The
numeric answer","title":"Answer","type":"integer"},"explanation":{"description":"Brief
explanation of the solution","title":"Explanation","type":"string"}},"required":["answer","explanation"],"title":"MathResult","type":"object","additionalProperties":false},"name":"MathResult","strict":true}},"stream":false}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '739'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D6NfzfSu3KkFRgIbm3Z222RJMTPy9\",\n \"object\":
\"chat.completion\",\n \"created\": 1770413351,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"answer\\\":42,\\\"explanation\\\":\\\"To
find the sum of 15 and 27, we simply add the two numbers together: 15 + 27
= 42.\\\"}\",\n \"refusal\": null,\n \"annotations\": []\n },\n
\ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n
\ \"usage\": {\n \"prompt_tokens\": 117,\n \"completion_tokens\": 37,\n
\ \"total_tokens\": 154,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Fri, 06 Feb 2026 21:29:12 GMT
Server:
- cloudflare
Set-Cookie:
- SET-COOKIE-XXX
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '976'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A precise
math assistant that always returns structured data\nYour personal goal is: Solve
math problems and return structured results"},{"role":"user","content":"\nCurrent
Task: What is 15 + 27?\n\nProvide your complete response:"}],"model":"gpt-4o-mini","response_format":{"type":"json_schema","json_schema":{"schema":{"properties":{"answer":{"description":"The
numeric answer","title":"Answer","type":"integer"},"explanation":{"description":"Brief
explanation of the solution","title":"Explanation","type":"string"}},"required":["answer","explanation"],"title":"MathResult","type":"object","additionalProperties":false},"name":"MathResult","strict":true}},"stream":false}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '739'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D6Ng1hVush1SBz7bgwbNvbWC8F3Lm\",\n \"object\":
\"chat.completion\",\n \"created\": 1770413353,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"answer\\\":42,\\\"explanation\\\":\\\"To
find the sum of 15 and 27, you simply add the two numbers together: 15 + 27
= 42.\\\"}\",\n \"refusal\": null,\n \"annotations\": []\n },\n
\ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n
\ \"usage\": {\n \"prompt_tokens\": 117,\n \"completion_tokens\": 37,\n
\ \"total_tokens\": 154,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Fri, 06 Feb 2026 21:29:14 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1057'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1