Feat/individual react agent (#2483)

* WIP * WIP * wip * wip * WIP * More WIP * Its working but needs a massive clean up * output type works now * Usage metrics fixed * more testing * WIP * cleaning up * Update logger * 99% done. Need to make docs match new example * cleanup * drop hard coded examples * docs * Clean up * Fix errors * Trying to fix CI issues * more type checker fixes * More type checking fixes * Update LiteAgent documentation for clarity and consistency; replace WebsiteSearchTool with SerperDevTool, and improve formatting in examples. * fix fingerprinting issues * fix type-checker * Fix type-checker issue by adding type ignore comment for cache read in ToolUsage class * Add optional agent parameter to CrewAgentParser and enhance action handling logic * Remove unused parameters from ToolUsage instantiation in tests and clean up debug print statement in CrewAgentParser. * Remove deprecated test files and examples for LiteAgent; add comprehensive tests for LiteAgent functionality, including tool usage and structured output handling. * Remove unused variable 'result' from ToolUsage class to clean up code. * Add initialization for 'result' variable in ToolUsage class to resolve type-checker warnings * Refactor agent_utils.py by removing unused event imports and adding missing commas in function definitions. Update test_events.py to reflect changes in expected event counts and adjust assertions accordingly. Modify test_tools_emits_error_events.yaml to include new headers and update response content for consistency with recent API changes. * Enhance tests in crew_test.py by verifying cache behavior in test_tools_with_custom_caching and ensuring proper agent initialization with added commas in test_crew_kickoff_for_each_works_with_manager_agent_copy. * Update agent tests to reflect changes in expected call counts and improve response formatting in YAML cassette. Adjusted mock call count from 2 to 3 and refined interaction formats for clarity and consistency. * Refactor agent tests to update model versions and improve response formatting in YAML cassettes. Changed model references from 'o1-preview' to 'o3-mini' and adjusted interaction formats for consistency. Enhanced error handling in context length tests and refined mock setups for better clarity. * Update tool usage logging to ensure tool arguments are consistently formatted as strings. Adjust agent test cases to reflect changes in maximum iterations and expected outputs, enhancing clarity in assertions. Update YAML cassettes to align with new response formats and improve overall consistency across tests. * Update YAML cassette for LLM tests to reflect changes in response structure and model version. Adjusted request and response headers, including updated content length and user agent. Enhanced token limits and request counts for improved testing accuracy. * Update tool usage logging to store tool arguments as native types instead of strings, enhancing data integrity and usability. * Refactor agent tests by removing outdated test cases and updating YAML cassettes to reflect changes in tool usage and response formats. Adjusted request and response headers, including user agent and content length, for improved accuracy in testing. Enhanced interaction formats for consistency across tests. * Add Excalidraw diagram file for visual representation of input-output flow Created a new Excalidraw file that includes a diagram illustrating the input box, database, and output box with connecting arrows. This visual aid enhances understanding of the data flow within the application. * Remove redundant error handling for action and final answer in CrewAgentParser. Update tests to reflect this change by deleting the corresponding test case. --------- Co-authored-by: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com> Co-authored-by: Lorenze Jay <lorenzejaytech@gmail.com>
2026-05-06 01:32:36 +00:00 · 2025-04-02 11:54:46 -04:00
parent 9b51e1174c
commit efe27bd570
42 changed files with 21642 additions and 12531 deletions
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -9,7 +9,7 @@ import pytest
 from crewai import Agent, Crew, Task
 from crewai.agents.cache import CacheHandler
 from crewai.agents.crew_agent_executor import AgentFinish, CrewAgentExecutor
-from crewai.agents.parser import AgentAction, CrewAgentParser, OutputParserException
+from crewai.agents.parser import CrewAgentParser, OutputParserException
 from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
 from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
 from crewai.llm import LLM
@@ -18,7 +18,6 @@ from crewai.tools.tool_calling import InstructorToolCalling
 from crewai.tools.tool_usage import ToolUsage
 from crewai.utilities import RPMController
 from crewai.utilities.events import crewai_event_bus
-from crewai.utilities.events.llm_events import LLMStreamChunkEvent
 from crewai.utilities.events.tool_usage_events import ToolUsageFinishedEvent


@@ -375,7 +374,7 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
        role="test role",
        goal="test goal",
        backstory="test backstory",
-        llm="o1-preview",
+        llm=LLM(model="o3-mini"),
        max_iter=3,
        use_system_prompt=False,
        allow_delegation=False,
@@ -401,7 +400,7 @@ def test_agent_powered_by_new_o_model_family_that_uses_tool():
        role="test role",
        goal="test goal",
        backstory="test backstory",
-        llm="o1-preview",
+        llm="o3-mini",
        max_iter=3,
        use_system_prompt=False,
        allow_delegation=False,
@@ -443,7 +442,7 @@ def test_agent_custom_max_iterations():
            task=task,
            tools=[get_final_answer],
        )
-        assert private_mock.call_count == 2
+        assert private_mock.call_count == 3


@pytest.mark.vcr(filter_headers=["authorization"])
@@ -531,7 +530,7 @@ def test_agent_moved_on_after_max_iterations():
        role="test role",
        goal="test goal",
        backstory="test backstory",
-        max_iter=3,
+        max_iter=5,
        allow_delegation=False,
    )

@@ -552,6 +551,7 @@ def test_agent_respect_the_max_rpm_set(capsys):
    def get_final_answer() -> float:
        """Get the final answer but don't give it yet, just re-use this
        tool non-stop."""
+        return 42

    agent = Agent(
        role="test role",
@@ -573,7 +573,7 @@ def test_agent_respect_the_max_rpm_set(capsys):
            task=task,
            tools=[get_final_answer],
        )
-        assert output == "The final answer is 42."
+        assert output == "42"
        captured = capsys.readouterr()
        assert "Max RPM reached, waiting for next minute to start." in captured.out
        moveon.assert_called()
@@ -863,25 +863,6 @@ def test_agent_function_calling_llm():
        mock_original_tool_calling.assert_called()


-def test_agent_count_formatting_error():
-    from unittest.mock import patch
-
-    agent1 = Agent(
-        role="test role",
-        goal="test goal",
-        backstory="test backstory",
-        verbose=True,
-    )
-
-    parser = CrewAgentParser(agent=agent1)
-
-    with patch.object(Agent, "increment_formatting_errors") as mock_count_errors:
-        test_text = "This text does not match expected formats."
-        with pytest.raises(OutputParserException):
-            parser.parse(test_text)
-        mock_count_errors.assert_called_once()
-
-
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_tool_result_as_answer_is_the_final_answer_for_the_agent():
    from crewai.tools import BaseTool
@@ -1305,46 +1286,55 @@ def test_llm_call_with_error():

@pytest.mark.vcr(filter_headers=["authorization"])
 def test_handle_context_length_exceeds_limit():
+    # Import necessary modules
+    from crewai.utilities.agent_utils import handle_context_length
+    from crewai.utilities.i18n import I18N
+    from crewai.utilities.printer import Printer
+
+    # Create mocks for dependencies
+    printer = Printer()
+    i18n = I18N()
+
+    # Create an agent just for its LLM
    agent = Agent(
        role="test role",
        goal="test goal",
        backstory="test backstory",
-    )
-    original_action = AgentAction(
-        tool="test_tool",
-        tool_input="test_input",
-        text="test_log",
-        thought="test_thought",
+        respect_context_window=True,
    )

-    with patch.object(
-        CrewAgentExecutor, "invoke", wraps=agent.agent_executor.invoke
-    ) as private_mock:
-        task = Task(
-            description="The final answer is 42. But don't give it yet, instead keep using the `get_final_answer` tool.",
-            expected_output="The final answer",
-        )
-        agent.execute_task(
-            task=task,
-        )
-        private_mock.assert_called_once()
-        with patch.object(
-            CrewAgentExecutor, "_handle_context_length"
-        ) as mock_handle_context:
-            mock_handle_context.side_effect = ValueError(
-                "Context length limit exceeded"
+    llm = agent.llm
+
+    # Create test messages
+    messages = [
+        {
+            "role": "user",
+            "content": "This is a test message that would exceed context length",
+        }
+    ]
+
+    # Set up test parameters
+    respect_context_window = True
+    callbacks = []
+
+    # Apply our patch to summarize_messages to force an error
+    with patch("crewai.utilities.agent_utils.summarize_messages") as mock_summarize:
+        mock_summarize.side_effect = ValueError("Context length limit exceeded")
+
+        # Directly call handle_context_length with our parameters
+        with pytest.raises(ValueError) as excinfo:
+            handle_context_length(
+                respect_context_window=respect_context_window,
+                printer=printer,
+                messages=messages,
+                llm=llm,
+                callbacks=callbacks,
+                i18n=i18n,
            )

-            long_input = "This is a very long input. " * 10000
-
-            # Attempt to handle context length, expecting the mocked error
-            with pytest.raises(ValueError) as excinfo:
-                agent.agent_executor._handle_context_length(
-                    [(original_action, long_input)]
-                )
-
-            assert "Context length limit exceeded" in str(excinfo.value)
-            mock_handle_context.assert_called_once()
+        # Verify our patch was called and raised the correct error
+        assert "Context length limit exceeded" in str(excinfo.value)
+        mock_summarize.assert_called_once()


@pytest.mark.vcr(filter_headers=["authorization"])
@@ -1353,7 +1343,7 @@ def test_handle_context_length_exceeds_limit_cli_no():
        role="test role",
        goal="test goal",
        backstory="test backstory",
-        sliding_context_window=False,
+        respect_context_window=False,
    )
    task = Task(description="test task", agent=agent, expected_output="test output")

@@ -1369,8 +1359,8 @@ def test_handle_context_length_exceeds_limit_cli_no():
        )
        private_mock.assert_called_once()
        pytest.raises(SystemExit)
-        with patch.object(
-            CrewAgentExecutor, "_handle_context_length"
+        with patch(
+            "crewai.utilities.agent_utils.handle_context_length"
        ) as mock_handle_context:
            mock_handle_context.assert_not_called()