Compare commits

...

9 Commits

Author SHA1 Message Date
lorenzejay
d86259b0b9 fixed test for actual usage 2025-07-11 17:17:32 -07:00
lorenzejay
1f106015ea Merge branch 'main' of github.com:crewAIInc/crewAI into devin/1751908431-fix-lite-agent-llm-isinstance-check 2025-07-11 17:10:54 -07:00
Lucas Gomide
f388890971 Merge branch 'main' into devin/1751908431-fix-lite-agent-llm-isinstance-check 2025-07-08 11:00:59 -03:00
Devin AI
3220575d29 Fix test_lite_agent_with_invalid_llm using proper mocking
- Mock create_llm to return None to properly test isinstance validation
- Addresses lucasgomide's comment about tests still failing
- All lite_agent tests now pass locally (13 passed, 0 failed)

Co-Authored-By: João <joao@crewai.com>
2025-07-07 18:46:26 +00:00
Devin AI
6be376f804 Fix test failures: improve CustomLLM and error handling
- Fix CustomLLM to handle structured output for guardrails with JSON response
- Add proper method implementations (supports_function_calling, etc.)
- Handle 'Thought:' pattern like working CustomLLM implementation
- Change invalid LLM test to use LiteAgent instead of Agent
- Improve error messages to use type() instead of __class__
- Address GitHub review feedback for better error handling

Co-Authored-By: João <joao@crewai.com>
2025-07-07 18:37:03 +00:00
Devin AI
5b548d618d Improve CustomLLM test implementation and error handling
- Fix CustomLLM.call method to avoid modifying messages parameter
- Add better error messages for isinstance checks as suggested in review
- Replace assert with proper exception handling in guardrail validation
- Add type hints to CustomLLM test class
- Add edge case test for invalid LLM type

Co-Authored-By: João <joao@crewai.com>
2025-07-07 17:27:13 +00:00
Devin AI
2ffed3ccf0 Fix lint issues: remove unused imports from guardrail classes
- Remove unused LLM import from hallucination_guardrail.py
- Remove unused LLM, Optional, and Task imports from llm_guardrail.py
- Fixes ruff lint errors: F401 imported but unused

Co-Authored-By: João <joao@crewai.com>
2025-07-07 17:23:28 +00:00
Devin AI
1ea3fc44fa Fix type annotations in guardrail classes to accept BaseLLM
- Update LLMGuardrail to accept BaseLLM instead of LLM in constructor
- Update HallucinationGuardrail to accept BaseLLM instead of LLM in constructor
- Add BaseLLM imports to both guardrail classes
- Fixes type-checker CI failure: 'Argument llm to LLMGuardrail has incompatible type BaseLLM; expected LLM'

Co-Authored-By: João <joao@crewai.com>
2025-07-07 17:20:08 +00:00
Devin AI
6e91a26785 Fix LiteAgent isinstance checks to accept BaseLLM instances
- Fix setup_llm method to check isinstance(self.llm, BaseLLM) instead of LLM
- Fix ensure_guardrail_is_callable method to check isinstance(self.llm, BaseLLM)
- Add comprehensive test for CustomLLM with guardrails functionality
- Resolves issue #3112 where CustomLLM classes couldn't use guardrails

Co-Authored-By: João <joao@crewai.com>
2025-07-07 17:16:08 +00:00
4 changed files with 154 additions and 36 deletions

View File

@@ -41,6 +41,7 @@ from crewai.agents.parser import (
)
from crewai.flow.flow_trackable import FlowTrackable
from crewai.llm import LLM
from crewai.llms.base_llm import BaseLLM
from crewai.tools.base_tool import BaseTool
from crewai.tools.structured_tool import CrewStructuredTool
from crewai.utilities import I18N
@@ -209,8 +210,8 @@ class LiteAgent(FlowTrackable, BaseModel):
def setup_llm(self):
"""Set up the LLM and other components after initialization."""
self.llm = create_llm(self.llm)
if not isinstance(self.llm, LLM):
raise ValueError("Unable to create LLM instance")
if not isinstance(self.llm, BaseLLM):
raise ValueError(f"Expected LLM instance of type BaseLLM, got {type(self.llm).__name__}")
# Initialize callbacks
token_callback = TokenCalcHandler(token_cost_process=self._token_process)
@@ -232,7 +233,8 @@ class LiteAgent(FlowTrackable, BaseModel):
elif isinstance(self.guardrail, str):
from crewai.tasks.llm_guardrail import LLMGuardrail
assert isinstance(self.llm, LLM)
if not isinstance(self.llm, BaseLLM):
raise TypeError(f"Guardrail requires LLM instance of type BaseLLM, got {type(self.llm).__name__}")
self._guardrail = LLMGuardrail(description=self.guardrail, llm=self.llm)

View File

@@ -8,7 +8,7 @@ Classes:
from typing import Any, Optional, Tuple
from crewai.llm import LLM
from crewai.llms.base_llm import BaseLLM
from crewai.tasks.task_output import TaskOutput
from crewai.utilities.logger import Logger
@@ -47,7 +47,7 @@ class HallucinationGuardrail:
def __init__(
self,
context: str,
llm: LLM,
llm: BaseLLM,
threshold: Optional[float] = None,
tool_response: str = "",
):
@@ -60,7 +60,7 @@ class HallucinationGuardrail:
tool_response: Optional tool response information that would be used in evaluation.
"""
self.context = context
self.llm: LLM = llm
self.llm: BaseLLM = llm
self.threshold = threshold
self.tool_response = tool_response
self._logger = Logger(verbose=True)

View File

@@ -1,10 +1,9 @@
from typing import Any, Optional, Tuple
from typing import Any, Tuple
from pydantic import BaseModel, Field
from crewai.agent import Agent, LiteAgentOutput
from crewai.llm import LLM
from crewai.task import Task
from crewai.llms.base_llm import BaseLLM
from crewai.tasks.task_output import TaskOutput
@@ -32,11 +31,11 @@ class LLMGuardrail:
def __init__(
self,
description: str,
llm: LLM,
llm: BaseLLM,
):
self.description = description
self.llm: LLM = llm
self.llm: BaseLLM = llm
def _validate_output(self, task_output: TaskOutput) -> LiteAgentOutput:
agent = Agent(

View File

@@ -146,12 +146,12 @@ def test_lite_agent_with_tools():
"What is the population of Tokyo and how many people would that be per square kilometer if Tokyo's area is 2,194 square kilometers?"
)
assert (
"21 million" in result.raw or "37 million" in result.raw
), "Agent should find Tokyo's population"
assert (
"per square kilometer" in result.raw
), "Agent should calculate population density"
assert "21 million" in result.raw or "37 million" in result.raw, (
"Agent should find Tokyo's population"
)
assert "per square kilometer" in result.raw, (
"Agent should calculate population density"
)
received_events = []
@@ -316,11 +316,17 @@ def test_sets_parent_flow_when_inside_flow():
flow.kickoff()
assert captured_agent.parent_flow is flow
@pytest.mark.vcr(filter_headers=["authorization"])
def test_guardrail_is_called_using_string():
guardrail_events = defaultdict(list)
from crewai.utilities.events import LLMGuardrailCompletedEvent, LLMGuardrailStartedEvent
from crewai.utilities.events import (
LLMGuardrailCompletedEvent,
LLMGuardrailStartedEvent,
)
with crewai_event_bus.scoped_handlers():
@crewai_event_bus.on(LLMGuardrailStartedEvent)
def capture_guardrail_started(source, event):
guardrail_events["started"].append(event)
@@ -338,17 +344,26 @@ def test_guardrail_is_called_using_string():
result = agent.kickoff(messages="Top 10 best players in the world?")
assert len(guardrail_events['started']) == 2
assert len(guardrail_events['completed']) == 2
assert not guardrail_events['completed'][0].success
assert guardrail_events['completed'][1].success
assert "Here are the top 10 best soccer players in the world, focusing exclusively on Brazilian players" in result.raw
assert len(guardrail_events["started"]) == 2
assert len(guardrail_events["completed"]) == 2
assert not guardrail_events["completed"][0].success
assert guardrail_events["completed"][1].success
assert (
"Here are the top 10 best soccer players in the world, focusing exclusively on Brazilian players"
in result.raw
)
@pytest.mark.vcr(filter_headers=["authorization"])
def test_guardrail_is_called_using_callable():
guardrail_events = defaultdict(list)
from crewai.utilities.events import LLMGuardrailCompletedEvent, LLMGuardrailStartedEvent
from crewai.utilities.events import (
LLMGuardrailCompletedEvent,
LLMGuardrailStartedEvent,
)
with crewai_event_bus.scoped_handlers():
@crewai_event_bus.on(LLMGuardrailStartedEvent)
def capture_guardrail_started(source, event):
guardrail_events["started"].append(event)
@@ -366,16 +381,22 @@ def test_guardrail_is_called_using_callable():
result = agent.kickoff(messages="Top 1 best players in the world?")
assert len(guardrail_events['started']) == 1
assert len(guardrail_events['completed']) == 1
assert guardrail_events['completed'][0].success
assert len(guardrail_events["started"]) == 1
assert len(guardrail_events["completed"]) == 1
assert guardrail_events["completed"][0].success
assert "Pelé - Santos, 1958" in result.raw
@pytest.mark.vcr(filter_headers=["authorization"])
def test_guardrail_reached_attempt_limit():
guardrail_events = defaultdict(list)
from crewai.utilities.events import LLMGuardrailCompletedEvent, LLMGuardrailStartedEvent
from crewai.utilities.events import (
LLMGuardrailCompletedEvent,
LLMGuardrailStartedEvent,
)
with crewai_event_bus.scoped_handlers():
@crewai_event_bus.on(LLMGuardrailStartedEvent)
def capture_guardrail_started(source, event):
guardrail_events["started"].append(event)
@@ -388,18 +409,23 @@ def test_guardrail_reached_attempt_limit():
role="Sports Analyst",
goal="Gather information about the best soccer players",
backstory="""You are an expert at gathering and organizing information. You carefully collect details and present them in a structured way.""",
guardrail=lambda output: (False, "You are not allowed to include Brazilian players"),
guardrail=lambda output: (
False,
"You are not allowed to include Brazilian players",
),
guardrail_max_retries=2,
)
with pytest.raises(Exception, match="Agent's guardrail failed validation after 2 retries"):
with pytest.raises(
Exception, match="Agent's guardrail failed validation after 2 retries"
):
agent.kickoff(messages="Top 10 best players in the world?")
assert len(guardrail_events['started']) == 3 # 2 retries + 1 initial call
assert len(guardrail_events['completed']) == 3 # 2 retries + 1 initial call
assert not guardrail_events['completed'][0].success
assert not guardrail_events['completed'][1].success
assert not guardrail_events['completed'][2].success
assert len(guardrail_events["started"]) == 3 # 2 retries + 1 initial call
assert len(guardrail_events["completed"]) == 3 # 2 retries + 1 initial call
assert not guardrail_events["completed"][0].success
assert not guardrail_events["completed"][1].success
assert not guardrail_events["completed"][2].success
@pytest.mark.vcr(filter_headers=["authorization"])
@@ -412,9 +438,100 @@ def test_agent_output_when_guardrail_returns_base_model():
role="Sports Analyst",
goal="Gather information about the best soccer players",
backstory="""You are an expert at gathering and organizing information. You carefully collect details and present them in a structured way.""",
guardrail=lambda output: (True, Player(name="Lionel Messi", country="Argentina")),
guardrail=lambda output: (
True,
Player(name="Lionel Messi", country="Argentina"),
),
)
result = agent.kickoff(messages="Top 10 best players in the world?")
assert result.pydantic == Player(name="Lionel Messi", country="Argentina")
@pytest.mark.vcr(filter_headers=["authorization"])
def test_lite_agent_with_custom_llm_and_guardrails():
"""Test that CustomLLM (inheriting from BaseLLM) works with guardrails."""
from crewai.llms.base_llm import BaseLLM
class CustomLLM(BaseLLM):
def __init__(self, response: str = "Custom response"):
super().__init__(model="custom-model")
self.response = response
self.call_count = 0
def call(
self,
messages,
tools=None,
callbacks=None,
available_functions=None,
from_task=None,
from_agent=None,
) -> str:
self.call_count += 1
if "valid" in str(messages) and "feedback" in str(messages):
return '{"valid": true, "feedback": null}'
if "Thought:" in str(messages):
return f"Thought: I will analyze soccer players\nFinal Answer: {self.response}"
return self.response
def supports_function_calling(self) -> bool:
return False
def supports_stop_words(self) -> bool:
return False
def get_context_window_size(self) -> int:
return 4096
custom_llm = CustomLLM(response="Brazilian soccer players are the best!")
agent = Agent(
role="Sports Analyst",
goal="Analyze soccer players",
backstory="You analyze soccer players and their performance.",
llm=custom_llm,
guardrail="Only include Brazilian players",
)
result = agent.kickoff("Tell me about the best soccer players")
assert custom_llm.call_count > 0
assert "Brazilian" in result.raw
custom_llm2 = CustomLLM(response="Original response")
def test_guardrail(output):
return (True, "Modified by guardrail")
agent2 = Agent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
llm=custom_llm2,
guardrail=test_guardrail,
)
result2 = agent2.kickoff("Test message")
assert result2.raw == "Modified by guardrail"
@pytest.mark.vcr(filter_headers=["authorization"])
def test_lite_agent_with_invalid_llm():
"""Test that LiteAgent raises proper error when create_llm returns None."""
from unittest.mock import patch
with patch("crewai.lite_agent.create_llm", return_value=None):
agent = Agent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
llm="invalid-model",
)
with pytest.raises(ValueError) as exc_info:
agent.kickoff("Test message")
assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)