mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-24 15:48:23 +00:00
Compare commits
1 Commits
devin/1767
...
devin/1767
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83b07b9d23 |
@@ -55,7 +55,7 @@ Each webhook sends a list of events:
|
||||
}
|
||||
```
|
||||
|
||||
The `data` object structure varies by event type. Refer to the [event list](https://github.com/crewAIInc/crewAI/tree/main/lib/crewai/src/crewai/events/types) on GitHub.
|
||||
The `data` object structure varies by event type. Refer to the [event list](https://github.com/crewAIInc/crewAI/tree/main/src/crewai/utilities/events) on GitHub.
|
||||
|
||||
As requests are sent over HTTP, the order of events can't be guaranteed. If you need ordering, use the `timestamp` field.
|
||||
|
||||
|
||||
@@ -26,9 +26,3 @@ ACTION_REGEX: Final[re.Pattern[str]] = re.compile(
|
||||
ACTION_INPUT_ONLY_REGEX: Final[re.Pattern[str]] = re.compile(
|
||||
r"\s*Action\s*\d*\s*Input\s*\d*\s*:\s*(.*)", re.DOTALL
|
||||
)
|
||||
# Regex to match "Action: None" or similar non-action values (None, N/A, etc.)
|
||||
# This captures the action value and any text that follows it
|
||||
ACTION_NONE_REGEX: Final[re.Pattern[str]] = re.compile(
|
||||
r"Action\s*\d*\s*:\s*(none|n/a|na|no action|no_action)(?:\s*[-:(]?\s*(.*))?",
|
||||
re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
|
||||
@@ -278,7 +278,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
)
|
||||
|
||||
self._invoke_step_callback(formatted_answer) # type: ignore[arg-type]
|
||||
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
|
||||
|
||||
# Properly attribute messages to avoid LLM hallucination of observations:
|
||||
# - LLM's response goes as assistant message
|
||||
# - Tool observation goes as user message (not assistant)
|
||||
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
|
||||
# For tool use: append LLM response as assistant, observation as user
|
||||
self._append_message(formatted_answer.llm_response)
|
||||
if formatted_answer.result:
|
||||
self._append_message(
|
||||
f"Observation: {formatted_answer.result}", role="user"
|
||||
)
|
||||
else:
|
||||
# For final answer or other cases: append text as assistant
|
||||
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
|
||||
|
||||
except OutputParserError as e:
|
||||
formatted_answer = handle_output_parser_exception( # type: ignore[assignment]
|
||||
@@ -431,7 +444,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
)
|
||||
|
||||
self._invoke_step_callback(formatted_answer) # type: ignore[arg-type]
|
||||
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
|
||||
|
||||
# Properly attribute messages to avoid LLM hallucination of observations:
|
||||
# - LLM's response goes as assistant message
|
||||
# - Tool observation goes as user message (not assistant)
|
||||
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
|
||||
# For tool use: append LLM response as assistant, observation as user
|
||||
self._append_message(formatted_answer.llm_response)
|
||||
if formatted_answer.result:
|
||||
self._append_message(
|
||||
f"Observation: {formatted_answer.result}", role="user"
|
||||
)
|
||||
else:
|
||||
# For final answer or other cases: append text as assistant
|
||||
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
|
||||
|
||||
except OutputParserError as e:
|
||||
formatted_answer = handle_output_parser_exception( # type: ignore[assignment]
|
||||
@@ -481,13 +507,18 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
Updated action or final answer.
|
||||
"""
|
||||
# Special case for add_image_tool
|
||||
# Note: Even for add_image_tool, we should not attribute tool output to assistant
|
||||
# to avoid LLM hallucination. The LLM's action is stored as assistant message,
|
||||
# and the tool result (image) is stored as user message.
|
||||
add_image_tool = self._i18n.tools("add_image")
|
||||
if (
|
||||
isinstance(add_image_tool, dict)
|
||||
and formatted_answer.tool.casefold().strip()
|
||||
== add_image_tool.get("name", "").casefold().strip()
|
||||
):
|
||||
self.messages.append({"role": "assistant", "content": tool_result.result})
|
||||
# Store original LLM response for proper message attribution
|
||||
formatted_answer.llm_response = formatted_answer.text
|
||||
formatted_answer.result = tool_result.result
|
||||
return formatted_answer
|
||||
|
||||
return handle_agent_action_core(
|
||||
|
||||
@@ -12,7 +12,6 @@ from json_repair import repair_json # type: ignore[import-untyped]
|
||||
from crewai.agents.constants import (
|
||||
ACTION_INPUT_ONLY_REGEX,
|
||||
ACTION_INPUT_REGEX,
|
||||
ACTION_NONE_REGEX,
|
||||
ACTION_REGEX,
|
||||
FINAL_ANSWER_ACTION,
|
||||
MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE,
|
||||
@@ -34,6 +33,7 @@ class AgentAction:
|
||||
tool_input: str
|
||||
text: str
|
||||
result: str | None = None
|
||||
llm_response: str | None = None # Original LLM response before observation appended
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -119,34 +119,6 @@ def parse(text: str) -> AgentAction | AgentFinish:
|
||||
thought=thought, tool=clean_action, tool_input=safe_tool_input, text=text
|
||||
)
|
||||
|
||||
# Check for "Action: None" or similar non-action values
|
||||
# This handles cases where the LLM indicates it cannot/should not use a tool
|
||||
action_none_match = ACTION_NONE_REGEX.search(text)
|
||||
if action_none_match:
|
||||
# Extract any additional content after "Action: None"
|
||||
additional_content = action_none_match.group(2)
|
||||
if additional_content:
|
||||
additional_content = additional_content.strip()
|
||||
# Remove trailing parenthesis if present (from patterns like "Action: None (reason)")
|
||||
if additional_content.startswith("(") and ")" in additional_content:
|
||||
additional_content = additional_content.split(")", 1)[-1].strip()
|
||||
elif additional_content.startswith(")"):
|
||||
additional_content = additional_content[1:].strip()
|
||||
|
||||
# Build the final answer from thought and any additional content
|
||||
final_answer = thought
|
||||
if additional_content:
|
||||
if final_answer:
|
||||
final_answer = f"{final_answer}\n\n{additional_content}"
|
||||
else:
|
||||
final_answer = additional_content
|
||||
|
||||
# If we still have no content, use a generic message
|
||||
if not final_answer:
|
||||
final_answer = "I cannot perform this action with the available tools."
|
||||
|
||||
return AgentFinish(thought=thought, output=final_answer, text=text)
|
||||
|
||||
if not ACTION_REGEX.search(text):
|
||||
raise OutputParserError(
|
||||
f"{MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE}\n{_I18N.slice('final_answer_format')}",
|
||||
|
||||
@@ -349,8 +349,18 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
|
||||
# Invoke step callback if configured
|
||||
self._invoke_step_callback(result)
|
||||
|
||||
# Append result message to conversation state
|
||||
if hasattr(result, "text"):
|
||||
# Properly attribute messages to avoid LLM hallucination of observations:
|
||||
# - LLM's response goes as assistant message
|
||||
# - Tool observation goes as user message (not assistant)
|
||||
if isinstance(result, AgentAction) and result.llm_response:
|
||||
# For tool use: append LLM response as assistant, observation as user
|
||||
self._append_message_to_state(result.llm_response)
|
||||
if result.result:
|
||||
self._append_message_to_state(
|
||||
f"Observation: {result.result}", role="user"
|
||||
)
|
||||
elif hasattr(result, "text"):
|
||||
# For final answer or other cases: append text as assistant
|
||||
self._append_message_to_state(result.text)
|
||||
|
||||
# Check if tool result became a final answer (result_as_answer flag)
|
||||
@@ -537,15 +547,19 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
|
||||
Returns:
|
||||
Updated action or final answer.
|
||||
"""
|
||||
# Special case for add_image_tool
|
||||
# Note: Even for add_image_tool, we should not attribute tool output to assistant
|
||||
# to avoid LLM hallucination. The LLM's action is stored as assistant message,
|
||||
# and the tool result (image) is stored as user message.
|
||||
add_image_tool = self._i18n.tools("add_image")
|
||||
if (
|
||||
isinstance(add_image_tool, dict)
|
||||
and formatted_answer.tool.casefold().strip()
|
||||
== add_image_tool.get("name", "").casefold().strip()
|
||||
):
|
||||
self.state.messages.append(
|
||||
{"role": "assistant", "content": tool_result.result}
|
||||
)
|
||||
# Store original LLM response for proper message attribution
|
||||
formatted_answer.llm_response = formatted_answer.text
|
||||
formatted_answer.result = tool_result.result
|
||||
return formatted_answer
|
||||
|
||||
return handle_agent_action_core(
|
||||
|
||||
@@ -582,7 +582,19 @@ class LiteAgent(FlowTrackable, BaseModel):
|
||||
show_logs=self._show_logs,
|
||||
)
|
||||
|
||||
self._append_message(formatted_answer.text, role="assistant")
|
||||
# Properly attribute messages to avoid LLM hallucination of observations:
|
||||
# - LLM's response goes as assistant message
|
||||
# - Tool observation goes as user message (not assistant)
|
||||
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
|
||||
# For tool use: append LLM response as assistant, observation as user
|
||||
self._append_message(formatted_answer.llm_response, role="assistant")
|
||||
if formatted_answer.result:
|
||||
self._append_message(
|
||||
f"Observation: {formatted_answer.result}", role="user"
|
||||
)
|
||||
else:
|
||||
# For final answer or other cases: append text as assistant
|
||||
self._append_message(formatted_answer.text, role="assistant")
|
||||
except OutputParserError as e: # noqa: PERF203
|
||||
self._printer.print(
|
||||
content="Failed to parse LLM output. Retrying...",
|
||||
|
||||
@@ -382,10 +382,21 @@ def handle_agent_action_core(
|
||||
|
||||
Notes:
|
||||
- TODO: Remove messages parameter and its usage.
|
||||
- The observation is appended to formatted_answer.text for logging/trace
|
||||
purposes, but callers should use formatted_answer.llm_response for the
|
||||
assistant message (without observation) and append the observation
|
||||
separately as a user message to avoid LLM hallucination of observations.
|
||||
"""
|
||||
if step_callback:
|
||||
step_callback(tool_result)
|
||||
|
||||
# Store the original LLM response before appending observation
|
||||
# This is used by executors to correctly attribute messages:
|
||||
# - llm_response goes as assistant message
|
||||
# - observation goes as user message (to prevent LLM hallucination)
|
||||
formatted_answer.llm_response = formatted_answer.text
|
||||
|
||||
# Append observation to text for logging/trace purposes
|
||||
formatted_answer.text += f"\nObservation: {tool_result.result}"
|
||||
formatted_answer.result = tool_result.result
|
||||
|
||||
|
||||
@@ -360,92 +360,3 @@ def test_integration_valid_and_invalid():
|
||||
|
||||
|
||||
# TODO: ADD TEST TO MAKE SURE ** REMOVAL DOESN'T MESS UP ANYTHING
|
||||
|
||||
|
||||
# Tests for Action: None handling (Issue #4186)
|
||||
def test_action_none_basic():
|
||||
"""Test that 'Action: None' is parsed as AgentFinish."""
|
||||
text = "Thought: I cannot use any tool for this.\nAction: None"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "I cannot use any tool for this." in result.output
|
||||
|
||||
|
||||
def test_action_none_with_reason_in_parentheses():
|
||||
"""Test 'Action: None (reason)' format."""
|
||||
text = "Thought: The tool is not available.\nAction: None (direct response required)"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "The tool is not available." in result.output
|
||||
|
||||
|
||||
def test_action_none_lowercase():
|
||||
"""Test that 'Action: none' (lowercase) is handled."""
|
||||
text = "Thought: I should respond directly.\nAction: none"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "I should respond directly." in result.output
|
||||
|
||||
|
||||
def test_action_na():
|
||||
"""Test that 'Action: N/A' is handled."""
|
||||
text = "Thought: No action needed here.\nAction: N/A"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "No action needed here." in result.output
|
||||
|
||||
|
||||
def test_action_na_lowercase():
|
||||
"""Test that 'Action: n/a' (lowercase) is handled."""
|
||||
text = "Thought: This requires a direct answer.\nAction: n/a"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "This requires a direct answer." in result.output
|
||||
|
||||
|
||||
def test_action_none_with_dash_separator():
|
||||
"""Test 'Action: None - reason' format."""
|
||||
text = "Thought: I need to provide a direct response.\nAction: None - direct response"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "I need to provide a direct response." in result.output
|
||||
|
||||
|
||||
def test_action_none_with_additional_content():
|
||||
"""Test 'Action: None' with additional content after."""
|
||||
text = "Thought: I analyzed the request.\nAction: None\nHere is my direct response to your question."
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "I analyzed the request." in result.output
|
||||
|
||||
|
||||
def test_action_no_action():
|
||||
"""Test that 'Action: no action' is handled."""
|
||||
text = "Thought: I will respond without using tools.\nAction: no action"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "I will respond without using tools." in result.output
|
||||
|
||||
|
||||
def test_action_none_without_thought():
|
||||
"""Test 'Action: None' without a thought prefix."""
|
||||
text = "Action: None"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert result.output == "I cannot perform this action with the available tools."
|
||||
|
||||
|
||||
def test_action_none_preserves_original_text():
|
||||
"""Test that the original text is preserved in the result."""
|
||||
text = "Thought: I cannot delegate this task.\nAction: None"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert result.text == text
|
||||
|
||||
|
||||
def test_action_none_with_colon_separator():
|
||||
"""Test 'Action: None: reason' format."""
|
||||
text = "Thought: Direct response needed.\nAction: None: providing direct answer"
|
||||
result = parser.parse(text)
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert "Direct response needed." in result.output
|
||||
|
||||
320
lib/crewai/tests/agents/test_observation_message_attribution.py
Normal file
320
lib/crewai/tests/agents/test_observation_message_attribution.py
Normal file
@@ -0,0 +1,320 @@
|
||||
"""Tests for proper message attribution to prevent LLM observation hallucination.
|
||||
|
||||
This module tests that tool observations are correctly attributed to user messages
|
||||
rather than assistant messages, which prevents the LLM from learning to hallucinate
|
||||
fake observations during tool calls.
|
||||
|
||||
Related to GitHub issue #4181.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||
from crewai.agents.parser import AgentAction, AgentFinish
|
||||
from crewai.tools.tool_types import ToolResult
|
||||
from crewai.utilities.agent_utils import handle_agent_action_core
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_llm() -> MagicMock:
|
||||
"""Create a mock LLM for testing."""
|
||||
llm = MagicMock()
|
||||
llm.supports_stop_words.return_value = True
|
||||
llm.stop = []
|
||||
return llm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_agent() -> MagicMock:
|
||||
"""Create a mock agent for testing."""
|
||||
agent = MagicMock()
|
||||
agent.role = "Test Agent"
|
||||
agent.key = "test_agent_key"
|
||||
agent.verbose = False
|
||||
agent.id = "test_agent_id"
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_task() -> MagicMock:
|
||||
"""Create a mock task for testing."""
|
||||
task = MagicMock()
|
||||
task.description = "Test task description"
|
||||
return task
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_crew() -> MagicMock:
|
||||
"""Create a mock crew for testing."""
|
||||
crew = MagicMock()
|
||||
crew.verbose = False
|
||||
crew._train = False
|
||||
return crew
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tools_handler() -> MagicMock:
|
||||
"""Create a mock tools handler."""
|
||||
return MagicMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def executor(
|
||||
mock_llm: MagicMock,
|
||||
mock_agent: MagicMock,
|
||||
mock_task: MagicMock,
|
||||
mock_crew: MagicMock,
|
||||
mock_tools_handler: MagicMock,
|
||||
) -> CrewAgentExecutor:
|
||||
"""Create a CrewAgentExecutor instance for testing."""
|
||||
return CrewAgentExecutor(
|
||||
llm=mock_llm,
|
||||
task=mock_task,
|
||||
crew=mock_crew,
|
||||
agent=mock_agent,
|
||||
prompt={"prompt": "Test prompt {input} {tool_names} {tools}"},
|
||||
max_iter=5,
|
||||
tools=[],
|
||||
tools_names="",
|
||||
stop_words=["Observation:"],
|
||||
tools_description="",
|
||||
tools_handler=mock_tools_handler,
|
||||
)
|
||||
|
||||
|
||||
class TestHandleAgentActionCore:
|
||||
"""Tests for handle_agent_action_core function."""
|
||||
|
||||
def test_stores_llm_response_before_observation(self) -> None:
|
||||
"""Test that llm_response is stored before observation is appended."""
|
||||
original_text = "Thought: I need to search\nAction: search\nAction Input: query"
|
||||
action = AgentAction(
|
||||
thought="I need to search",
|
||||
tool="search",
|
||||
tool_input="query",
|
||||
text=original_text,
|
||||
)
|
||||
tool_result = ToolResult(result="Search result: found data", result_as_answer=False)
|
||||
|
||||
result = handle_agent_action_core(
|
||||
formatted_answer=action,
|
||||
tool_result=tool_result,
|
||||
)
|
||||
|
||||
assert isinstance(result, AgentAction)
|
||||
assert result.llm_response == original_text
|
||||
assert "Observation:" in result.text
|
||||
assert result.result == "Search result: found data"
|
||||
|
||||
def test_text_contains_observation_for_logging(self) -> None:
|
||||
"""Test that text contains observation for logging purposes."""
|
||||
action = AgentAction(
|
||||
thought="Testing",
|
||||
tool="test_tool",
|
||||
tool_input="{}",
|
||||
text="Thought: Testing\nAction: test_tool\nAction Input: {}",
|
||||
)
|
||||
tool_result = ToolResult(result="Tool output", result_as_answer=False)
|
||||
|
||||
result = handle_agent_action_core(
|
||||
formatted_answer=action,
|
||||
tool_result=tool_result,
|
||||
)
|
||||
|
||||
assert isinstance(result, AgentAction)
|
||||
assert "Observation: Tool output" in result.text
|
||||
|
||||
def test_result_as_answer_returns_agent_finish(self) -> None:
|
||||
"""Test that result_as_answer=True returns AgentFinish."""
|
||||
action = AgentAction(
|
||||
thought="Using tool",
|
||||
tool="final_tool",
|
||||
tool_input="{}",
|
||||
text="Thought: Using tool\nAction: final_tool\nAction Input: {}",
|
||||
)
|
||||
tool_result = ToolResult(result="Final answer from tool", result_as_answer=True)
|
||||
|
||||
result = handle_agent_action_core(
|
||||
formatted_answer=action,
|
||||
tool_result=tool_result,
|
||||
)
|
||||
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert result.output == "Final answer from tool"
|
||||
|
||||
|
||||
class TestCrewAgentExecutorMessageAttribution:
|
||||
"""Tests for proper message attribution in CrewAgentExecutor."""
|
||||
|
||||
def test_tool_observation_not_in_assistant_message(
|
||||
self, executor: CrewAgentExecutor
|
||||
) -> None:
|
||||
"""Test that tool observations are not attributed to assistant messages.
|
||||
|
||||
This is the core fix for GitHub issue #4181 - observations should be
|
||||
in user messages, not assistant messages, to prevent LLM hallucination.
|
||||
"""
|
||||
call_count = 0
|
||||
|
||||
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
return (
|
||||
"Thought: I need to use a tool\n"
|
||||
"Action: test_tool\n"
|
||||
'Action Input: {"arg": "value"}'
|
||||
)
|
||||
return "Thought: I have the answer\nFinal Answer: Done"
|
||||
|
||||
with patch(
|
||||
"crewai.agents.crew_agent_executor.get_llm_response",
|
||||
side_effect=mock_llm_response,
|
||||
):
|
||||
with patch(
|
||||
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
|
||||
return_value=ToolResult(
|
||||
result="Tool executed successfully", result_as_answer=False
|
||||
),
|
||||
):
|
||||
with patch.object(executor, "_show_logs"):
|
||||
result = executor._invoke_loop()
|
||||
|
||||
assert isinstance(result, AgentFinish)
|
||||
|
||||
assistant_messages = [
|
||||
msg for msg in executor.messages if msg.get("role") == "assistant"
|
||||
]
|
||||
user_messages = [msg for msg in executor.messages if msg.get("role") == "user"]
|
||||
|
||||
for msg in assistant_messages:
|
||||
content = msg.get("content", "")
|
||||
assert "Observation:" not in content, (
|
||||
f"Assistant message should not contain 'Observation:'. "
|
||||
f"Found: {content[:100]}..."
|
||||
)
|
||||
|
||||
observation_in_user = any(
|
||||
"Observation:" in msg.get("content", "") for msg in user_messages
|
||||
)
|
||||
assert observation_in_user, (
|
||||
"Tool observation should be in a user message, not assistant message"
|
||||
)
|
||||
|
||||
def test_llm_response_in_assistant_message(
|
||||
self, executor: CrewAgentExecutor
|
||||
) -> None:
|
||||
"""Test that the LLM's actual response is in assistant messages."""
|
||||
call_count = 0
|
||||
llm_action_text = (
|
||||
"Thought: I need to use a tool\n"
|
||||
"Action: test_tool\n"
|
||||
'Action Input: {"arg": "value"}'
|
||||
)
|
||||
|
||||
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
return llm_action_text
|
||||
return "Thought: I have the answer\nFinal Answer: Done"
|
||||
|
||||
with patch(
|
||||
"crewai.agents.crew_agent_executor.get_llm_response",
|
||||
side_effect=mock_llm_response,
|
||||
):
|
||||
with patch(
|
||||
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
|
||||
return_value=ToolResult(
|
||||
result="Tool executed successfully", result_as_answer=False
|
||||
),
|
||||
):
|
||||
with patch.object(executor, "_show_logs"):
|
||||
executor._invoke_loop()
|
||||
|
||||
assistant_messages = [
|
||||
msg for msg in executor.messages if msg.get("role") == "assistant"
|
||||
]
|
||||
|
||||
llm_response_found = any(
|
||||
"Action: test_tool" in msg.get("content", "") for msg in assistant_messages
|
||||
)
|
||||
assert llm_response_found, (
|
||||
"LLM's action response should be in an assistant message"
|
||||
)
|
||||
|
||||
def test_message_order_after_tool_use(
|
||||
self, executor: CrewAgentExecutor
|
||||
) -> None:
|
||||
"""Test that messages are in correct order: assistant (action) then user (observation)."""
|
||||
call_count = 0
|
||||
|
||||
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
return (
|
||||
"Thought: I need to use a tool\n"
|
||||
"Action: test_tool\n"
|
||||
'Action Input: {"arg": "value"}'
|
||||
)
|
||||
return "Thought: I have the answer\nFinal Answer: Done"
|
||||
|
||||
with patch(
|
||||
"crewai.agents.crew_agent_executor.get_llm_response",
|
||||
side_effect=mock_llm_response,
|
||||
):
|
||||
with patch(
|
||||
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
|
||||
return_value=ToolResult(
|
||||
result="Tool executed successfully", result_as_answer=False
|
||||
),
|
||||
):
|
||||
with patch.object(executor, "_show_logs"):
|
||||
executor._invoke_loop()
|
||||
|
||||
action_msg_idx = None
|
||||
observation_msg_idx = None
|
||||
|
||||
for i, msg in enumerate(executor.messages):
|
||||
content = msg.get("content", "")
|
||||
if "Action: test_tool" in content and msg.get("role") == "assistant":
|
||||
action_msg_idx = i
|
||||
if "Observation:" in content and msg.get("role") == "user":
|
||||
observation_msg_idx = i
|
||||
|
||||
assert action_msg_idx is not None, "Action message not found"
|
||||
assert observation_msg_idx is not None, "Observation message not found"
|
||||
assert observation_msg_idx == action_msg_idx + 1, (
|
||||
f"Observation (user) should immediately follow action (assistant). "
|
||||
f"Action at {action_msg_idx}, Observation at {observation_msg_idx}"
|
||||
)
|
||||
|
||||
|
||||
class TestAgentActionLlmResponseField:
|
||||
"""Tests for the llm_response field on AgentAction."""
|
||||
|
||||
def test_agent_action_has_llm_response_field(self) -> None:
|
||||
"""Test that AgentAction has llm_response field."""
|
||||
action = AgentAction(
|
||||
thought="Test",
|
||||
tool="test",
|
||||
tool_input="{}",
|
||||
text="Test text",
|
||||
)
|
||||
assert hasattr(action, "llm_response")
|
||||
assert action.llm_response is None
|
||||
|
||||
def test_agent_action_llm_response_can_be_set(self) -> None:
|
||||
"""Test that llm_response can be set on AgentAction."""
|
||||
action = AgentAction(
|
||||
thought="Test",
|
||||
tool="test",
|
||||
tool_input="{}",
|
||||
text="Test text",
|
||||
)
|
||||
action.llm_response = "Original LLM response"
|
||||
assert action.llm_response == "Original LLM response"
|
||||
Reference in New Issue
Block a user