Adding ability to track tools_errors and delegations

2026-01-09 16:18:30 +00:00 · 2024-02-28 02:28:19 -03:00
parent 3df3bba756
commit 340bea3271
8 changed files with 5888 additions and 4 deletions
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -221,7 +221,7 @@ class Crew(BaseModel):
                agents_for_delegation = [
                    agent for agent in self.agents if agent != task.agent
                ]
-                if len(agents_for_delegation) > 0:
+                if len(self.agents) > 1 and len(agents_for_delegation) > 0:
                    task.tools += AgentTools(agents=agents_for_delegation).tools()

            role = task.agent.role if task.agent is not None else "None"
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -20,6 +20,8 @@ class Task(BaseModel):

    __hash__ = object.__hash__  # type: ignore
    used_tools: int = 0
+    tools_errors: int = 0
+    delegations: int = 0
    i18n: I18N = I18N()
    thread: threading.Thread = None
    description: str = Field(description="Description of the actual task.")
@@ -171,6 +173,14 @@ class Task(BaseModel):
            tasks_slices = [self.description, output]
        return "\n".join(tasks_slices)

+    def increment_tools_errors(self) -> None:
+        """Increment the tools errors counter."""
+        self.tools_errors += 1
+
+    def increment_delegations(self) -> None:
+        """Increment the delegations counter."""
+        self.delegations += 1
+
    def _export_output(self, result: str) -> Any:
        exported_result = result
        instructions = "I'm gonna convert this raw text into valid JSON."
--- a/src/crewai/tools/tool_usage.py
+++ b/src/crewai/tools/tool_usage.py
@@ -73,11 +73,13 @@ class ToolUsage:
        if isinstance(calling, ToolUsageErrorException):
            error = calling.message
            self._printer.print(content=f"\n\n{error}\n", color="red")
+            self.task.increment_tools_errors()
            return error
        try:
            tool = self._select_tool(calling.tool_name)
        except Exception as e:
            error = getattr(e, "message", str(e))
+            self.task.increment_tools_errors()
            self._printer.print(content=f"\n\n{error}\n", color="red")
            return error
        return f"{self._use(tool_string=tool_string, tool=tool, calling=calling)}\n\n{self._i18n.slice('final_answer_format')}"
@@ -103,7 +105,7 @@ class ToolUsage:
                result = self._format_result(result=result)
                return result
            except Exception:
-                pass
+                self.task.increment_tools_errors()

        result = self.tools_handler.cache.read(
            tool=calling.tool_name, input=calling.arguments
@@ -111,8 +113,17 @@ class ToolUsage:

        if not result:
            try:
+                print(f"Calling tool: {calling.tool_name}")
+                if calling.tool_name in [
+                    "Delegate work to co-worker",
+                    "Ask question to co-worker",
+                ]:
+                    self.task.increment_delegations()
+
                if calling.arguments:
+                    print(f"Calling tool NOW: {calling.tool_name}")
                    result = tool._run(**calling.arguments)
+                    print("Got result back from tool")
                else:
                    result = tool._run()
            except Exception as e:
@@ -125,8 +136,10 @@ class ToolUsage:
                    error = ToolUsageErrorException(
                        f'\n{error_message}.\nMoving one then. {self._i18n.slice("format").format(tool_names=self.tools_names)}'
                    ).message
+                    self.task.increment_tools_errors()
                    self._printer.print(content=f"\n\n{error_message}\n", color="red")
                    return error
+                self.task.increment_tools_errors()
                return self.use(calling=calling, tool_string=tool_string)

            self.tools_handler.on_tool_use(calling=calling, output=result)
@@ -166,6 +179,7 @@ class ToolUsage:
        for tool in self.tools:
            if tool.name.lower().strip() == tool_name.lower().strip():
                return tool
+        self.task.increment_tools_errors()
        raise Exception(f"Tool '{tool_name}' not found.")

    def _render(self) -> str:
@@ -210,7 +224,9 @@ class ToolUsage:
                ),
                max_attemps=1,
            )
+            print(f"Converter: {converter}")
            calling = converter.to_pydantic()
+            print(f"Calling: {calling}")

            if isinstance(calling, ConverterError):
                raise calling
@@ -218,6 +234,7 @@ class ToolUsage:
            self._run_attempts += 1
            if self._run_attempts > self._max_parsing_attempts:
                self._telemetry.tool_usage_error(llm=self.llm)
+                self.task.increment_tools_errors()
                self._printer.print(content=f"\n\n{e}\n", color="red")
                return ToolUsageErrorException(
                    f'{self._i18n.errors("tool_usage_error")}\n{self._i18n.slice("format").format(tool_names=self.tools_names)}'
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -593,7 +593,7 @@ def test_agent_count_formatting_error():
    parser = CrewAgentParser()
    parser.agent = agent1

-    with patch.object(Agent, "count_formatting_errors") as mock_count_errors:
+    with patch.object(Agent, "increment_formatting_errors") as mock_count_errors:
        test_text = "This text does not match expected formats."
        with pytest.raises(OutputParserException):
            parser.parse(test_text)
--- a/tests/cassettes/test_increment_delegations_for_hierarchical_process.yaml
+++ b/tests/cassettes/test_increment_delegations_for_hierarchical_process.yaml
--- a/tests/cassettes/test_increment_delegations_for_sequential_process.yaml
+++ b/tests/cassettes/test_increment_delegations_for_sequential_process.yaml
@@ -0,0 +1,153 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "You are Manager.\nYou''re great
+      at delegating work about scoring.\n\nYour personal goal is: Coordinate scoring
+      processesTo give my final answer use the exact following format:\n\n```\nFinal
+      Answer: [my expected final answer, entire content of my most complete final
+      answer goes here]\n```\nI MUST use these formats, my jobs depends on it!\n\nCurrent
+      Task: Give me an integer score between 1-5 for the following title: ''The impact
+      of AI in the future of work''\nYour final answer must be: The score of the title.\n\n
+      Begin! This is VERY important to you, your job depends on it!\n\n\n"}], "model":
+      "gpt-4", "n": 1, "stop": ["\nResult"], "stream": true, "temperature": 0.7}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, br
+      connection:
+      - keep-alive
+      content-length:
+      - '712'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.12.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.12.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.7
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Final"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        Answer"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        The"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        score"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        of"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        the"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        title"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        is"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 85c63ba94ea60110-GRU
+      Cache-Control:
+      - no-cache, must-revalidate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream
+      Date:
+      - Wed, 28 Feb 2024 05:23:01 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=AaCQFIZM8yehA4h1745UTRRtL0FczZJtdLfNQ6_8NzA-1709097781-1.0-AUIh6/dxRTiveEa2WnhkSYSTau7hn7cRLNnlSfeiJp2fgTieIadq3fkeBHjqHSnQ7k/pE4WZgIZ9SAAmacifrgc=;
+        path=/; expires=Wed, 28-Feb-24 05:53:01 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=o.lLAcb8kPLRizp5FDtYBR4rjdIgMyVXhQ_NLWlcuj8-1709097781239-0.0-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-model:
+      - gpt-4-0613
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '224'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '300000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '299840'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 31ms
+      x-request-id:
+      - req_3129f92f1bc422dba1aa396cc072a30e
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/tests/cassettes/test_increment_tool_errors.yaml
+++ b/tests/cassettes/test_increment_tool_errors.yaml
--- a/tests/task_test.py
+++ b/tests/task_test.py
@@ -6,7 +6,7 @@ import pytest
 from pydantic import BaseModel
 from pydantic_core import ValidationError

-from crewai import Agent, Crew, Task
+from crewai import Agent, Crew, Process, Task


 def test_task_tool_reflect_agent_tools():
@@ -347,3 +347,103 @@ def test_save_task_pydantic_output():
        save_file.return_value = None
        crew.kickoff()
        save_file.assert_called_once_with('{"score":4}')
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_increment_delegations_for_hierarchical_process():
+    from langchain_openai import ChatOpenAI
+
+    scorer = Agent(
+        role="Scorer",
+        goal="Score the title",
+        backstory="You're an expert scorer, specialized in scoring titles.",
+        allow_delegation=False,
+    )
+
+    task = Task(
+        description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
+        expected_output="The score of the title.",
+    )
+
+    crew = Crew(
+        agents=[scorer],
+        tasks=[task],
+        process=Process.hierarchical,
+        manager_llm=ChatOpenAI(model="gpt-4-0125-preview"),
+    )
+
+    with patch.object(Task, "increment_delegations") as increment_delegations:
+        increment_delegations.return_value = None
+        crew.kickoff()
+        increment_delegations.assert_called_once
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_increment_delegations_for_sequential_process():
+    pass
+
+    manager = Agent(
+        role="Manager",
+        goal="Coordinate scoring processes",
+        backstory="You're great at delegating work about scoring.",
+        allow_delegation=False,
+    )
+
+    scorer = Agent(
+        role="Scorer",
+        goal="Score the title",
+        backstory="You're an expert scorer, specialized in scoring titles.",
+        allow_delegation=False,
+    )
+
+    task = Task(
+        description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
+        expected_output="The score of the title.",
+        agent=manager,
+    )
+
+    crew = Crew(
+        agents=[manager, scorer],
+        tasks=[task],
+        process=Process.sequential,
+    )
+
+    with patch.object(Task, "increment_delegations") as increment_delegations:
+        increment_delegations.return_value = None
+        crew.kickoff()
+        increment_delegations.assert_called_once
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_increment_tool_errors():
+    from crewai_tools import tool
+    from langchain_openai import ChatOpenAI
+
+    @tool
+    def scoring_examples() -> None:
+        "Useful examples for scoring titles."
+        raise Exception("Error")
+
+    scorer = Agent(
+        role="Scorer",
+        goal="Score the title",
+        backstory="You're an expert scorer, specialized in scoring titles.",
+        tools=[scoring_examples],
+    )
+
+    task = Task(
+        description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work', check examples to based your evaluation.",
+        expected_output="The score of the title.",
+    )
+
+    crew = Crew(
+        agents=[scorer],
+        tasks=[task],
+        process=Process.hierarchical,
+        manager_llm=ChatOpenAI(model="gpt-4-0125-preview"),
+    )
+
+    with patch.object(Task, "increment_tools_errors") as increment_tools_errors:
+        increment_tools_errors.return_value = None
+        crew.kickoff()
+        increment_tools_errors.assert_called_once