Compare commits

...

2 Commits

Author SHA1 Message Date
Devin AI
7d236d42fb style: fix lint issues in test_task_output_json_overrides_llm_response_format
Applied ruff auto-fixes to remove trailing whitespace from docstring
and blank lines in the new test function.

Co-Authored-By: João <joao@crewai.com>
2025-10-03 06:09:45 +00:00
Devin AI
0452c0af47 fix: prioritize task output_json over LLM response_format
This commit fixes issue #3639 by ensuring task-level output settings
(output_json and output_pydantic) take precedence over agent-level
LLM response_format when both are set with Pydantic models.

Changes:
- Modified LLM._prepare_completion_params() to accept from_task parameter
  and check if task has output_json or output_pydantic set
- If task has output settings, LLM's response_format is ignored
- Updated LLM.call() to pass from_task to _prepare_completion_params()
- Added comprehensive test to verify the priority behavior

The fix ensures predictable behavior following the standard configuration
hierarchy where more specific (task-level) settings override general
(agent-level) defaults.

Co-Authored-By: João <joao@crewai.com>
2025-10-03 06:04:24 +00:00
3 changed files with 204 additions and 3 deletions

View File

@@ -355,12 +355,14 @@ class LLM(BaseLLM):
self,
messages: str | list[dict[str, str]],
tools: list[dict] | None = None,
from_task: Any | None = None,
) -> dict[str, Any]:
"""Prepare parameters for the completion call.
Args:
messages: Input messages for the LLM
tools: Optional list of tool schemas
from_task: Optional task object to check for output settings
Returns:
Dict[str, Any]: Parameters for the completion call
@@ -370,7 +372,16 @@ class LLM(BaseLLM):
messages = [{"role": "user", "content": messages}]
formatted_messages = self._format_messages_for_provider(messages)
# --- 2) Prepare the parameters for the completion call
# Task-level output settings take precedence over LLM-level response_format
response_format = self.response_format
if from_task and hasattr(from_task, 'output_json') and from_task.output_json:
# Task has output_json set, so ignore LLM's response_format
response_format = None
elif from_task and hasattr(from_task, 'output_pydantic') and from_task.output_pydantic:
# Task has output_pydantic set, so ignore LLM's response_format
response_format = None
# --- 3) Prepare the parameters for the completion call
params = {
"model": self.model,
"messages": formatted_messages,
@@ -383,7 +394,7 @@ class LLM(BaseLLM):
"presence_penalty": self.presence_penalty,
"frequency_penalty": self.frequency_penalty,
"logit_bias": self.logit_bias,
"response_format": self.response_format,
"response_format": response_format,
"seed": self.seed,
"logprobs": self.logprobs,
"top_logprobs": self.top_logprobs,
@@ -1015,7 +1026,7 @@ class LLM(BaseLLM):
self.set_callbacks(callbacks)
try:
# --- 6) Prepare parameters for the completion call
params = self._prepare_completion_params(messages, tools)
params = self._prepare_completion_params(messages, tools, from_task)
# --- 7) Make the completion call and handle response
if self.stream:
return self._handle_streaming_response(

View File

@@ -0,0 +1,131 @@
interactions:
- request:
body: '{"messages": [{"role": "system", "content": "You are Test Agent. You''re an
expert in testing priority systems.\nYour personal goal is: Test goal for priority testing\nTo give my best complete final answer to the task use the exact following
format:\n\nThought: I now can give a great answer\nFinal Answer: Your final
answer must be the great and the most complete as possible, it must be outcome
described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user",
"content": "\nCurrent Task: Analyze the priority system and provide a result\n\nThis is the expect criteria
for your final answer: A structured result with task_result and task_confidence fields.\nyou MUST return the actual complete
content as the final answer, not a summary.\n\nBegin! This is VERY important
to you, use the tools available and give your best Final Answer, your job depends
on it!\n\nThought:"}], "model": "gpt-4o-mini"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.47.0
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-test123\",\n \"object\":
\"chat.completion\",\n \"created\": 1727214471,\n \"model\": \"gpt-4o-mini\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"I now can give a great answer\\nFinal
Answer: The priority system is working correctly with task-level settings taking precedence over agent-level settings.\",\n \"refusal\": null\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
186,\n \"completion_tokens\": 25,\n \"total_tokens\": 211,\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0\n }\n },\n \"system_fingerprint\": \"fp_test\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8c85f9af4ef31cf3-GRU
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Tue, 24 Sep 2024 21:47:52 GMT
Server:
- cloudflare
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '170'
openai-version:
- '2020-10-01'
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '30000000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '29999781'
http_version: HTTP/1.1
status_code: 200
- request:
body: '{"messages": [{"role": "user", "content": "The priority system is working correctly with task-level settings taking precedence over agent-level settings."}, {"role": "system", "content":
"I''m gonna convert this raw text into valid JSON.\n\nThe json should have the
following structure, with the following keys:\n{\n task_result: str\n task_confidence: float\n}"}], "model":
"gpt-4o-mini", "tool_choice": {"type": "function", "function": {"name": "TaskOutputModel"}},
"tools": [{"type": "function", "function": {"name": "TaskOutputModel", "description":
"Correctly extracted `TaskOutputModel` with all the required parameters with correct
types", "parameters": {"properties": {"task_result": {"title": "Task Result", "type": "string"}, "task_confidence": {"title": "Task Confidence", "type": "number"}},
"required": ["task_result", "task_confidence"], "type": "object"}}}]}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.47.0
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-test456\",\n \"object\":
\"chat.completion\",\n \"created\": 1727214472,\n \"model\": \"gpt-4o-mini\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n
\ \"id\": \"call_test789\",\n \"type\":
\"function\",\n \"function\": {\n \"name\": \"TaskOutputModel\",\n
\ \"arguments\": \"{\\\"task_result\\\":\\\"The priority system is working correctly with task-level settings taking precedence over agent-level settings.\\\",\\\"task_confidence\\\":0.95}\"\n }\n }\n
\ ],\n \"refusal\": null\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
120,\n \"completion_tokens\": 30,\n \"total_tokens\": 150,\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0\n }\n },\n \"system_fingerprint\": \"fp_test\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8c85f9b2fc671cf3-GRU
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Tue, 24 Sep 2024 21:47:52 GMT
Server:
- cloudflare
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '163'
openai-version:
- '2020-10-01'
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '30000000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '29999947'
http_version: HTTP/1.1
status_code: 200
version: 1

View File

@@ -1635,3 +1635,62 @@ def test_task_interpolation_with_hyphens():
assert "say hello world" in task.prompt()
assert result.raw == "Hello, World!"
@pytest.mark.vcr(filter_headers=["authorization"])
def test_task_output_json_overrides_llm_response_format():
"""Test that task.output_json takes priority over llm.response_format when both are set.
This test addresses issue #3639: when both a task's output_json and an agent's LLM
response_format are set with pydantic models, the task-level setting should take
precedence over the agent-level setting.
"""
from crewai.llm import LLM
class TaskOutputModel(BaseModel):
"""Expected output model for the task."""
task_result: str
task_confidence: float
class LLMOutputModel(BaseModel):
"""Different output model set on the LLM."""
llm_answer: str
llm_score: int
llm = LLM(model="gpt-4o-mini", response_format=LLMOutputModel)
agent = Agent(
role="Test Agent",
goal="Test goal for priority testing",
backstory="Test backstory for priority testing",
llm=llm,
allow_delegation=False,
)
task = Task(
description="Analyze the priority system and provide a result",
expected_output="A structured result with task_result and task_confidence fields",
agent=agent,
output_json=TaskOutputModel,
)
crew = Crew(agents=[agent], tasks=[task], process=Process.sequential)
result = crew.kickoff()
assert result.json_dict is not None, "Result should have json_dict output"
assert "task_result" in result.json_dict, (
"Should have task_result field from TaskOutputModel. "
"Task-level output_json should override LLM-level response_format."
)
assert "task_confidence" in result.json_dict, (
"Should have task_confidence field from TaskOutputModel"
)
assert "llm_answer" not in result.json_dict, (
"Should not have llm_answer field from LLMOutputModel. "
"This proves task-level output_json took precedence over LLM-level response_format."
)
assert "llm_score" not in result.json_dict, (
"Should not have llm_score field from LLMOutputModel"
)
assert isinstance(result.json_dict["task_result"], str), "task_result should be a string"
assert isinstance(result.json_dict["task_confidence"], (int, float)), "task_confidence should be a number"