diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 8a4ac2edd..1c6a9b060 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -968,7 +968,10 @@ class LLM(BaseLLM): self._handle_streaming_callbacks(callbacks, usage_info, last_chunk) if not tool_calls or not available_functions: - if response_model and self.is_litellm: + # Only use InternalInstructor for structured output when there + # are no tools — otherwise tools would be silently discarded. + has_tools = bool(params.get("tools")) + if response_model and self.is_litellm and not has_tools: instructor_instance = InternalInstructor( content=full_response, model=response_model, @@ -1150,7 +1153,10 @@ class LLM(BaseLLM): str: The response text """ # --- 1) Handle response_model with InternalInstructor for LiteLLM - if response_model and self.is_litellm: + # Skip InternalInstructor when tools are present so the LLM can + # see and call the agent's tools before returning structured output. + has_tools = bool(params.get("tools")) + if response_model and self.is_litellm and not has_tools: from crewai.utilities.internal_instructor import InternalInstructor messages = params.get("messages", []) @@ -1290,7 +1296,10 @@ class LLM(BaseLLM): Returns: str: The response text """ - if response_model and self.is_litellm: + # Skip InternalInstructor when tools are present so the LLM can + # see and call the agent's tools before returning structured output. + has_tools = bool(params.get("tools")) + if response_model and self.is_litellm and not has_tools: from crewai.utilities.internal_instructor import InternalInstructor messages = params.get("messages", []) diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index 71cb69790..1b2066756 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -1022,3 +1022,202 @@ async def test_usage_info_streaming_with_acall(): assert llm._token_usage["total_tokens"] > 0 assert len(result) > 0 + + +# --- Tests for issue #4697: tools discarded when response_model is set --- + + +class MyOutput(BaseModel): + name: str + value: str + + +def _make_tool_call_response(): + """Create a mock litellm response that contains a tool call.""" + mock_message = MagicMock() + mock_message.content = "" + mock_tool_call = MagicMock() + mock_tool_call.function.name = "my_search_tool" + mock_tool_call.function.arguments = '{"query": "test"}' + mock_tool_call.id = "call_123" + mock_message.tool_calls = [mock_tool_call] + mock_choice = MagicMock() + mock_choice.message = mock_message + mock_response = MagicMock() + mock_response.choices = [mock_choice] + mock_response.usage = MagicMock() + mock_response.usage.prompt_tokens = 10 + mock_response.usage.completion_tokens = 5 + mock_response.usage.total_tokens = 15 + return mock_response + + +def _make_text_response(text: str = '{"name": "Alice", "value": "42"}'): + """Create a mock litellm response that contains only text content.""" + mock_message = MagicMock() + mock_message.content = text + mock_message.tool_calls = [] + mock_choice = MagicMock() + mock_choice.message = mock_message + mock_response = MagicMock() + mock_response.choices = [mock_choice] + mock_response.usage = MagicMock() + mock_response.usage.prompt_tokens = 10 + mock_response.usage.completion_tokens = 5 + mock_response.usage.total_tokens = 15 + return mock_response + + +def _get_tool_schema(): + return { + "type": "function", + "function": { + "name": "my_search_tool", + "description": "Search for data", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "The search query"} + }, + "required": ["query"], + }, + }, + } + + +def test_non_streaming_response_model_with_tools_skips_instructor(): + """When response_model AND tools are both present, InternalInstructor must NOT + be used so that LiteLLM sees the tools and can return tool_calls.""" + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + tool_schema = _get_tool_schema() + tool_call_resp = _make_tool_call_response() + + with patch("litellm.completion", return_value=tool_call_resp) as mock_completion: + result = llm.call( + messages=[{"role": "user", "content": "Find the name and value"}], + tools=[tool_schema], + available_functions=None, # executor handles tool execution + response_model=MyOutput, + ) + + # litellm.completion must have been called (not InternalInstructor) + mock_completion.assert_called_once() + # The result should be tool_calls (a list), not a structured JSON string + assert isinstance(result, list), ( + "Expected tool_calls list but got a string/model — " + "InternalInstructor likely intercepted the call" + ) + + +def test_non_streaming_response_model_without_tools_uses_instructor(): + """When response_model is set but NO tools, InternalInstructor should still + be used for structured output conversion.""" + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + with patch( + "crewai.utilities.internal_instructor.InternalInstructor" + ) as MockInstructor: + mock_instance = MagicMock() + mock_pydantic = MyOutput(name="Alice", value="42") + mock_instance.to_pydantic.return_value = mock_pydantic + MockInstructor.return_value = mock_instance + + result = llm.call( + messages=[{"role": "user", "content": "Find the name and value"}], + tools=None, + response_model=MyOutput, + ) + + # InternalInstructor should have been used + MockInstructor.assert_called_once() + assert '"Alice"' in result + + +def test_non_streaming_response_model_with_tools_returns_tool_calls(): + """Verify that when LLM returns tool_calls with response_model + tools, + the tool_calls are returned to the caller (not swallowed).""" + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + tool_schema = _get_tool_schema() + tool_call_resp = _make_tool_call_response() + + with patch("litellm.completion", return_value=tool_call_resp): + result = llm.call( + messages=[{"role": "user", "content": "Find the name and value"}], + tools=[tool_schema], + available_functions=None, + response_model=MyOutput, + ) + + # The result should be the raw tool_calls list + assert isinstance(result, list) + assert len(result) == 1 + assert result[0].function.name == "my_search_tool" + + +def test_non_streaming_response_model_with_tools_text_response(): + """When LLM returns text (not tool_calls) with both response_model and tools, + the text response should be returned normally.""" + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + tool_schema = _get_tool_schema() + text_resp = _make_text_response('{"name": "Alice", "value": "42"}') + + with patch("litellm.completion", return_value=text_resp): + result = llm.call( + messages=[{"role": "user", "content": "Find the name and value"}], + tools=[tool_schema], + available_functions=None, + response_model=MyOutput, + ) + + # Text response should be returned as-is + assert isinstance(result, str) + assert "Alice" in result + + +@pytest.mark.asyncio +async def test_async_non_streaming_response_model_with_tools_skips_instructor(): + """Async variant: response_model + tools should skip InternalInstructor.""" + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + tool_schema = _get_tool_schema() + tool_call_resp = _make_tool_call_response() + + with patch("litellm.acompletion", return_value=tool_call_resp) as mock_acompletion: + result = await llm.acall( + messages=[{"role": "user", "content": "Find the name and value"}], + tools=[tool_schema], + available_functions=None, + response_model=MyOutput, + ) + + mock_acompletion.assert_called_once() + assert isinstance(result, list), ( + "Expected tool_calls list but got a string/model — " + "InternalInstructor likely intercepted the call" + ) + + +@pytest.mark.asyncio +async def test_async_non_streaming_response_model_without_tools_uses_instructor(): + """Async variant: response_model without tools should use InternalInstructor.""" + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + with patch( + "crewai.utilities.internal_instructor.InternalInstructor" + ) as MockInstructor: + mock_instance = MagicMock() + mock_pydantic = MyOutput(name="Alice", value="42") + mock_instance.to_pydantic.return_value = mock_pydantic + MockInstructor.return_value = mock_instance + + result = await llm.acall( + messages=[{"role": "user", "content": "Find the name and value"}], + tools=None, + response_model=MyOutput, + ) + + MockInstructor.assert_called_once() + assert '"Alice"' in result