From 0452c0af4780411ea3b5ebcb5c534422177c86ad Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 3 Oct 2025 06:04:24 +0000 Subject: [PATCH] fix: prioritize task output_json over LLM response_format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes issue #3639 by ensuring task-level output settings (output_json and output_pydantic) take precedence over agent-level LLM response_format when both are set with Pydantic models. Changes: - Modified LLM._prepare_completion_params() to accept from_task parameter and check if task has output_json or output_pydantic set - If task has output settings, LLM's response_format is ignored - Updated LLM.call() to pass from_task to _prepare_completion_params() - Added comprehensive test to verify the priority behavior The fix ensures predictable behavior following the standard configuration hierarchy where more specific (task-level) settings override general (agent-level) defaults. Co-Authored-By: João --- src/crewai/llm.py | 17 ++- ...ut_json_overrides_llm_response_format.yaml | 131 ++++++++++++++++++ tests/test_task.py | 59 ++++++++ 3 files changed, 204 insertions(+), 3 deletions(-) create mode 100644 tests/cassettes/test_task_output_json_overrides_llm_response_format.yaml diff --git a/src/crewai/llm.py b/src/crewai/llm.py index 733b46c79..28871bc38 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -355,12 +355,14 @@ class LLM(BaseLLM): self, messages: str | list[dict[str, str]], tools: list[dict] | None = None, + from_task: Any | None = None, ) -> dict[str, Any]: """Prepare parameters for the completion call. Args: messages: Input messages for the LLM tools: Optional list of tool schemas + from_task: Optional task object to check for output settings Returns: Dict[str, Any]: Parameters for the completion call @@ -370,7 +372,16 @@ class LLM(BaseLLM): messages = [{"role": "user", "content": messages}] formatted_messages = self._format_messages_for_provider(messages) - # --- 2) Prepare the parameters for the completion call + # Task-level output settings take precedence over LLM-level response_format + response_format = self.response_format + if from_task and hasattr(from_task, 'output_json') and from_task.output_json: + # Task has output_json set, so ignore LLM's response_format + response_format = None + elif from_task and hasattr(from_task, 'output_pydantic') and from_task.output_pydantic: + # Task has output_pydantic set, so ignore LLM's response_format + response_format = None + + # --- 3) Prepare the parameters for the completion call params = { "model": self.model, "messages": formatted_messages, @@ -383,7 +394,7 @@ class LLM(BaseLLM): "presence_penalty": self.presence_penalty, "frequency_penalty": self.frequency_penalty, "logit_bias": self.logit_bias, - "response_format": self.response_format, + "response_format": response_format, "seed": self.seed, "logprobs": self.logprobs, "top_logprobs": self.top_logprobs, @@ -1015,7 +1026,7 @@ class LLM(BaseLLM): self.set_callbacks(callbacks) try: # --- 6) Prepare parameters for the completion call - params = self._prepare_completion_params(messages, tools) + params = self._prepare_completion_params(messages, tools, from_task) # --- 7) Make the completion call and handle response if self.stream: return self._handle_streaming_response( diff --git a/tests/cassettes/test_task_output_json_overrides_llm_response_format.yaml b/tests/cassettes/test_task_output_json_overrides_llm_response_format.yaml new file mode 100644 index 000000000..0151d5611 --- /dev/null +++ b/tests/cassettes/test_task_output_json_overrides_llm_response_format.yaml @@ -0,0 +1,131 @@ +interactions: +- request: + body: '{"messages": [{"role": "system", "content": "You are Test Agent. You''re an + expert in testing priority systems.\nYour personal goal is: Test goal for priority testing\nTo give my best complete final answer to the task use the exact following + format:\n\nThought: I now can give a great answer\nFinal Answer: Your final + answer must be the great and the most complete as possible, it must be outcome + described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user", + "content": "\nCurrent Task: Analyze the priority system and provide a result\n\nThis is the expect criteria + for your final answer: A structured result with task_result and task_confidence fields.\nyou MUST return the actual complete + content as the final answer, not a summary.\n\nBegin! This is VERY important + to you, use the tools available and give your best Final Answer, your job depends + on it!\n\nThought:"}], "model": "gpt-4o-mini"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.47.0 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-test123\",\n \"object\": + \"chat.completion\",\n \"created\": 1727214471,\n \"model\": \"gpt-4o-mini\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"I now can give a great answer\\nFinal + Answer: The priority system is working correctly with task-level settings taking precedence over agent-level settings.\",\n \"refusal\": null\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 186,\n \"completion_tokens\": 25,\n \"total_tokens\": 211,\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0\n }\n },\n \"system_fingerprint\": \"fp_test\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8c85f9af4ef31cf3-GRU + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 24 Sep 2024 21:47:52 GMT + Server: + - cloudflare + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '170' + openai-version: + - '2020-10-01' + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '30000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '29999781' + http_version: HTTP/1.1 + status_code: 200 +- request: + body: '{"messages": [{"role": "user", "content": "The priority system is working correctly with task-level settings taking precedence over agent-level settings."}, {"role": "system", "content": + "I''m gonna convert this raw text into valid JSON.\n\nThe json should have the + following structure, with the following keys:\n{\n task_result: str\n task_confidence: float\n}"}], "model": + "gpt-4o-mini", "tool_choice": {"type": "function", "function": {"name": "TaskOutputModel"}}, + "tools": [{"type": "function", "function": {"name": "TaskOutputModel", "description": + "Correctly extracted `TaskOutputModel` with all the required parameters with correct + types", "parameters": {"properties": {"task_result": {"title": "Task Result", "type": "string"}, "task_confidence": {"title": "Task Confidence", "type": "number"}}, + "required": ["task_result", "task_confidence"], "type": "object"}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.47.0 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-test456\",\n \"object\": + \"chat.completion\",\n \"created\": 1727214472,\n \"model\": \"gpt-4o-mini\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_test789\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"TaskOutputModel\",\n + \ \"arguments\": \"{\\\"task_result\\\":\\\"The priority system is working correctly with task-level settings taking precedence over agent-level settings.\\\",\\\"task_confidence\\\":0.95}\"\n }\n }\n + \ ],\n \"refusal\": null\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 120,\n \"completion_tokens\": 30,\n \"total_tokens\": 150,\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0\n }\n },\n \"system_fingerprint\": \"fp_test\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8c85f9b2fc671cf3-GRU + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 24 Sep 2024 21:47:52 GMT + Server: + - cloudflare + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '163' + openai-version: + - '2020-10-01' + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '30000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '29999947' + http_version: HTTP/1.1 + status_code: 200 +version: 1 diff --git a/tests/test_task.py b/tests/test_task.py index 0e304df54..ec9556f7b 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -1635,3 +1635,62 @@ def test_task_interpolation_with_hyphens(): assert "say hello world" in task.prompt() assert result.raw == "Hello, World!" + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_task_output_json_overrides_llm_response_format(): + """Test that task.output_json takes priority over llm.response_format when both are set. + + This test addresses issue #3639: when both a task's output_json and an agent's LLM + response_format are set with pydantic models, the task-level setting should take + precedence over the agent-level setting. + """ + + from crewai.llm import LLM + + class TaskOutputModel(BaseModel): + """Expected output model for the task.""" + task_result: str + task_confidence: float + + class LLMOutputModel(BaseModel): + """Different output model set on the LLM.""" + llm_answer: str + llm_score: int + + llm = LLM(model="gpt-4o-mini", response_format=LLMOutputModel) + + agent = Agent( + role="Test Agent", + goal="Test goal for priority testing", + backstory="Test backstory for priority testing", + llm=llm, + allow_delegation=False, + ) + + task = Task( + description="Analyze the priority system and provide a result", + expected_output="A structured result with task_result and task_confidence fields", + agent=agent, + output_json=TaskOutputModel, + ) + + crew = Crew(agents=[agent], tasks=[task], process=Process.sequential) + result = crew.kickoff() + + assert result.json_dict is not None, "Result should have json_dict output" + assert "task_result" in result.json_dict, ( + "Should have task_result field from TaskOutputModel. " + "Task-level output_json should override LLM-level response_format." + ) + assert "task_confidence" in result.json_dict, ( + "Should have task_confidence field from TaskOutputModel" + ) + assert "llm_answer" not in result.json_dict, ( + "Should not have llm_answer field from LLMOutputModel. " + "This proves task-level output_json took precedence over LLM-level response_format." + ) + assert "llm_score" not in result.json_dict, ( + "Should not have llm_score field from LLMOutputModel" + ) + assert isinstance(result.json_dict["task_result"], str), "task_result should be a string" + assert isinstance(result.json_dict["task_confidence"], (int, float)), "task_confidence should be a number"