"""Tests for LLM.call() tool loop and LLMResult. All LLM calls are mocked — no real API traffic. """ from __future__ import annotations import json from types import SimpleNamespace from typing import Any from unittest.mock import MagicMock, patch import pytest from crewai.llm_result import ( LLMResult, ToolCallRecord, _lookup_pricing, estimate_cost_usd, ) def _make_litellm_llm(model: str = "gpt-4o") -> Any: """Create an LLM instance that uses the litellm fallback path.""" from crewai.llm import LLM return LLM(model=model, is_litellm=True) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_tool_call(name: str, arguments: dict, call_id: str = "call_1"): """Build a tool-call object using litellm's actual types.""" try: from litellm.types.utils import ( ChatCompletionMessageToolCall, Function, ) return ChatCompletionMessageToolCall( id=call_id, function=Function(name=name, arguments=json.dumps(arguments)), type="function", ) except ImportError: func = SimpleNamespace(name=name, arguments=json.dumps(arguments)) return SimpleNamespace(id=call_id, function=func, type="function") def _make_model_response(content: str | None = None, tool_calls: list | None = None): """Build a minimal mock ModelResponse that passes isinstance checks. We need it to be an instance of litellm's ModelResponse/ModelResponseBase so the internal isinstance() checks work. We import those types when litellm is available. """ try: from litellm.types.utils import ( Choices, Message, ModelResponse, Usage, ) message = Message(content=content, tool_calls=tool_calls or None) choice = Choices(message=message, finish_reason="stop", index=0) resp = ModelResponse( choices=[choice], usage=Usage( prompt_tokens=100, completion_tokens=50, total_tokens=150, ), ) return resp except ImportError: # Fallback to SimpleNamespace if litellm not installed message = SimpleNamespace(content=content, tool_calls=tool_calls or []) choice = SimpleNamespace(message=message, finish_reason="stop") usage = SimpleNamespace( prompt_tokens=100, completion_tokens=50, total_tokens=150, ) resp = SimpleNamespace( choices=[choice], model_extra={"usage": usage}, ) return resp DUMMY_TOOL_SCHEMA = [ { "type": "function", "function": { "name": "get_weather", "description": "Get weather for a city", "parameters": { "type": "object", "properties": { "city": {"type": "string"}, }, "required": ["city"], }, }, } ] # --------------------------------------------------------------------------- # Unit tests for LLMResult / ToolCallRecord # --------------------------------------------------------------------------- class TestLLMResultModels: def test_tool_call_record_defaults(self): r = ToolCallRecord(name="foo") assert r.input == {} assert r.output == "" assert r.duration_ms == 0.0 assert r.is_error is False def test_llm_result_defaults(self): r = LLMResult() assert r.text == "" assert r.tool_calls == [] assert r.cost_usd == 0.0 assert r.iterations == 0 assert r.usage.total_tokens == 0 def test_llm_result_with_data(self): r = LLMResult( text="hello", tool_calls=[ToolCallRecord(name="foo", input={"a": 1}, output="bar")], iterations=2, cost_usd=0.005, ) assert r.text == "hello" assert len(r.tool_calls) == 1 assert r.tool_calls[0].name == "foo" # --------------------------------------------------------------------------- # Cost estimation # --------------------------------------------------------------------------- class TestCostEstimation: def test_known_model(self): cost = estimate_cost_usd("gpt-4o", prompt_tokens=1_000_000, completion_tokens=0) assert cost == pytest.approx(2.50) def test_known_model_output(self): cost = estimate_cost_usd("gpt-4o", prompt_tokens=0, completion_tokens=1_000_000) assert cost == pytest.approx(10.00) def test_unknown_model_returns_zero(self): cost = estimate_cost_usd("some-random-model-xyz", 1000, 1000) assert cost == 0.0 def test_provider_prefix_stripped(self): cost = estimate_cost_usd("anthropic/claude-sonnet-4-6", 1_000_000, 0) assert cost == pytest.approx(3.00) def test_partial_match(self): # "claude-sonnet-4-6-20250514" should match "claude-sonnet-4-6" cost = estimate_cost_usd("claude-sonnet-4-6-20250514", 1_000_000, 0) assert cost == pytest.approx(3.00) def test_lookup_none(self): assert _lookup_pricing("") is None assert _lookup_pricing("nonexistent") is None # --------------------------------------------------------------------------- # LLM.call() backwards compatibility (no tools → returns str) # --------------------------------------------------------------------------- class TestCallBackwardsCompat: """LLM.call() without tools must return str exactly as before.""" @patch("crewai.llm.litellm") def test_call_without_tools_returns_str(self, mock_litellm): """Plain call without tools should return a string.""" mock_litellm.completion.return_value = _make_model_response(content="Hello world") mock_litellm.drop_params = True mock_litellm.suppress_debug_info = True mock_litellm.success_callback = [] mock_litellm._async_success_callback = [] mock_litellm.callbacks = [] llm = _make_litellm_llm() result = llm.call("Say hello") assert isinstance(result, str) assert result == "Hello world" # --------------------------------------------------------------------------- # LLM.call() with tools → returns LLMResult # --------------------------------------------------------------------------- class TestCallWithToolLoop: """When tools + available_functions are passed, call() returns LLMResult.""" @patch("crewai.llm.litellm") def test_single_tool_call_then_text(self, mock_litellm): """Model calls one tool, then responds with text.""" mock_litellm.drop_params = True mock_litellm.suppress_debug_info = True mock_litellm.success_callback = [] mock_litellm._async_success_callback = [] mock_litellm.callbacks = [] # First call: model wants to call get_weather tool_call = _make_tool_call("get_weather", {"city": "SF"}) resp1 = _make_model_response(content=None, tool_calls=[tool_call]) # Second call: model responds with text resp2 = _make_model_response(content="It's sunny in SF!") mock_litellm.completion.side_effect = [resp1, resp2] llm = _make_litellm_llm() def get_weather(city: str) -> str: return f"Sunny, 72°F in {city}" result = llm.call( messages="What's the weather in SF?", tools=DUMMY_TOOL_SCHEMA, available_functions={"get_weather": get_weather}, ) assert isinstance(result, LLMResult) assert result.text == "It's sunny in SF!" assert len(result.tool_calls) == 1 assert result.tool_calls[0].name == "get_weather" assert result.tool_calls[0].input == {"city": "SF"} assert "Sunny" in result.tool_calls[0].output assert result.tool_calls[0].is_error is False assert result.iterations == 2 @patch("crewai.llm.litellm") def test_multiple_tool_calls_in_sequence(self, mock_litellm): """Model calls two tools across two iterations.""" mock_litellm.drop_params = True mock_litellm.suppress_debug_info = True mock_litellm.success_callback = [] mock_litellm._async_success_callback = [] mock_litellm.callbacks = [] tc1 = _make_tool_call("get_weather", {"city": "SF"}, "call_1") resp1 = _make_model_response(content=None, tool_calls=[tc1]) tc2 = _make_tool_call("get_weather", {"city": "NYC"}, "call_2") resp2 = _make_model_response(content=None, tool_calls=[tc2]) resp3 = _make_model_response(content="SF is sunny, NYC is rainy.") mock_litellm.completion.side_effect = [resp1, resp2, resp3] llm = _make_litellm_llm() def get_weather(city: str) -> str: return f"Weather for {city}: fine" result = llm.call( messages="Compare SF and NYC weather", tools=DUMMY_TOOL_SCHEMA, available_functions={"get_weather": get_weather}, ) assert isinstance(result, LLMResult) assert len(result.tool_calls) == 2 assert result.tool_calls[0].input["city"] == "SF" assert result.tool_calls[1].input["city"] == "NYC" assert result.iterations == 3 @patch("crewai.llm.litellm") def test_max_iterations_stops_loop(self, mock_litellm): """Loop stops when max_iterations is reached.""" mock_litellm.drop_params = True mock_litellm.suppress_debug_info = True mock_litellm.success_callback = [] mock_litellm._async_success_callback = [] mock_litellm.callbacks = [] # Model always wants to call a tool — never stops def make_tool_resp(): tc = _make_tool_call("get_weather", {"city": "SF"}) return _make_model_response(content=None, tool_calls=[tc]) mock_litellm.completion.side_effect = [make_tool_resp() for _ in range(5)] llm = _make_litellm_llm() result = llm.call( messages="Loop forever", tools=DUMMY_TOOL_SCHEMA, available_functions={"get_weather": lambda city: "sunny"}, max_iterations=3, ) assert isinstance(result, LLMResult) assert result.iterations == 3 assert len(result.tool_calls) == 3 # Should have a text noting max iterations assert "Max iterations" in result.text @patch("crewai.llm.litellm") def test_tool_error_handling(self, mock_litellm): """Tool that raises an exception is captured in the record.""" mock_litellm.drop_params = True mock_litellm.suppress_debug_info = True mock_litellm.success_callback = [] mock_litellm._async_success_callback = [] mock_litellm.callbacks = [] tc = _make_tool_call("get_weather", {"city": "SF"}) resp1 = _make_model_response(content=None, tool_calls=[tc]) resp2 = _make_model_response(content="Sorry, couldn't get weather.") mock_litellm.completion.side_effect = [resp1, resp2] llm = _make_litellm_llm() def broken_weather(city: str) -> str: raise RuntimeError("API down") result = llm.call( messages="Weather?", tools=DUMMY_TOOL_SCHEMA, available_functions={"get_weather": broken_weather}, ) assert isinstance(result, LLMResult) assert len(result.tool_calls) == 1 assert result.tool_calls[0].is_error is True assert "API down" in result.tool_calls[0].output assert result.text == "Sorry, couldn't get weather." @patch("crewai.llm.litellm") def test_unknown_function_error(self, mock_litellm): """Tool call for a function not in available_functions.""" mock_litellm.drop_params = True mock_litellm.suppress_debug_info = True mock_litellm.success_callback = [] mock_litellm._async_success_callback = [] mock_litellm.callbacks = [] tc = _make_tool_call("nonexistent_tool", {}) resp1 = _make_model_response(content=None, tool_calls=[tc]) resp2 = _make_model_response(content="I couldn't find that tool.") mock_litellm.completion.side_effect = [resp1, resp2] llm = _make_litellm_llm() result = llm.call( messages="Do something", tools=DUMMY_TOOL_SCHEMA, available_functions={"get_weather": lambda city: "sunny"}, ) assert isinstance(result, LLMResult) assert result.tool_calls[0].is_error is True assert "unknown function" in result.tool_calls[0].output @patch("crewai.llm.litellm") def test_cost_estimation_populated(self, mock_litellm): """cost_usd is populated from token usage and model pricing.""" mock_litellm.drop_params = True mock_litellm.suppress_debug_info = True mock_litellm.success_callback = [] mock_litellm._async_success_callback = [] mock_litellm.callbacks = [] resp = _make_model_response(content="Done!") mock_litellm.completion.return_value = resp llm = _make_litellm_llm() result = llm.call( messages="Hello", tools=DUMMY_TOOL_SCHEMA, available_functions={"get_weather": lambda city: "sunny"}, ) assert isinstance(result, LLMResult) # cost_usd should be >= 0 (may be 0 if usage tracking didn't fire, # but the field should exist and be a float) assert isinstance(result.cost_usd, float) @patch("crewai.llm.litellm") def test_immediate_text_response_with_tools(self, mock_litellm): """Model responds with text on first call (no tool use).""" mock_litellm.drop_params = True mock_litellm.suppress_debug_info = True mock_litellm.success_callback = [] mock_litellm._async_success_callback = [] mock_litellm.callbacks = [] resp = _make_model_response(content="I know the answer already.") mock_litellm.completion.return_value = resp llm = _make_litellm_llm() result = llm.call( messages="What's 2+2?", tools=DUMMY_TOOL_SCHEMA, available_functions={"get_weather": lambda city: "sunny"}, ) assert isinstance(result, LLMResult) assert result.text == "I know the answer already." assert len(result.tool_calls) == 0 assert result.iterations == 1