Adding ability to track tools_errors and delegations

This commit is contained in:
João Moura
2024-02-28 02:28:19 -03:00
parent 3df3bba756
commit 340bea3271
8 changed files with 5888 additions and 4 deletions

View File

@@ -593,7 +593,7 @@ def test_agent_count_formatting_error():
parser = CrewAgentParser()
parser.agent = agent1
with patch.object(Agent, "count_formatting_errors") as mock_count_errors:
with patch.object(Agent, "increment_formatting_errors") as mock_count_errors:
test_text = "This text does not match expected formats."
with pytest.raises(OutputParserException):
parser.parse(test_text)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,153 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "You are Manager.\nYou''re great
at delegating work about scoring.\n\nYour personal goal is: Coordinate scoring
processesTo give my final answer use the exact following format:\n\n```\nFinal
Answer: [my expected final answer, entire content of my most complete final
answer goes here]\n```\nI MUST use these formats, my jobs depends on it!\n\nCurrent
Task: Give me an integer score between 1-5 for the following title: ''The impact
of AI in the future of work''\nYour final answer must be: The score of the title.\n\n
Begin! This is VERY important to you, your job depends on it!\n\n\n"}], "model":
"gpt-4", "n": 1, "stop": ["\nResult"], "stream": true, "temperature": 0.7}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, br
connection:
- keep-alive
content-length:
- '712'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.12.0
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.12.0
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Final"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
Answer"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
The"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
score"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
of"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
the"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
title"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
is"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
data: {"id":"chatcmpl-8x6hAnJdNQ95CMhSACL5TNL0lG6Ws","object":"chat.completion.chunk","created":1709097780,"model":"gpt-4-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
data: [DONE]
'
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 85c63ba94ea60110-GRU
Cache-Control:
- no-cache, must-revalidate
Connection:
- keep-alive
Content-Type:
- text/event-stream
Date:
- Wed, 28 Feb 2024 05:23:01 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=AaCQFIZM8yehA4h1745UTRRtL0FczZJtdLfNQ6_8NzA-1709097781-1.0-AUIh6/dxRTiveEa2WnhkSYSTau7hn7cRLNnlSfeiJp2fgTieIadq3fkeBHjqHSnQ7k/pE4WZgIZ9SAAmacifrgc=;
path=/; expires=Wed, 28-Feb-24 05:53:01 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=o.lLAcb8kPLRizp5FDtYBR4rjdIgMyVXhQ_NLWlcuj8-1709097781239-0.0-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Transfer-Encoding:
- chunked
access-control-allow-origin:
- '*'
alt-svc:
- h3=":443"; ma=86400
openai-model:
- gpt-4-0613
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '224'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=15724800; includeSubDomains
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '300000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '299840'
x-ratelimit-reset-requests:
- 6ms
x-ratelimit-reset-tokens:
- 31ms
x-request-id:
- req_3129f92f1bc422dba1aa396cc072a30e
status:
code: 200
message: OK
version: 1

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ import pytest
from pydantic import BaseModel
from pydantic_core import ValidationError
from crewai import Agent, Crew, Task
from crewai import Agent, Crew, Process, Task
def test_task_tool_reflect_agent_tools():
@@ -347,3 +347,103 @@ def test_save_task_pydantic_output():
save_file.return_value = None
crew.kickoff()
save_file.assert_called_once_with('{"score":4}')
@pytest.mark.vcr(filter_headers=["authorization"])
def test_increment_delegations_for_hierarchical_process():
from langchain_openai import ChatOpenAI
scorer = Agent(
role="Scorer",
goal="Score the title",
backstory="You're an expert scorer, specialized in scoring titles.",
allow_delegation=False,
)
task = Task(
description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
expected_output="The score of the title.",
)
crew = Crew(
agents=[scorer],
tasks=[task],
process=Process.hierarchical,
manager_llm=ChatOpenAI(model="gpt-4-0125-preview"),
)
with patch.object(Task, "increment_delegations") as increment_delegations:
increment_delegations.return_value = None
crew.kickoff()
increment_delegations.assert_called_once
@pytest.mark.vcr(filter_headers=["authorization"])
def test_increment_delegations_for_sequential_process():
pass
manager = Agent(
role="Manager",
goal="Coordinate scoring processes",
backstory="You're great at delegating work about scoring.",
allow_delegation=False,
)
scorer = Agent(
role="Scorer",
goal="Score the title",
backstory="You're an expert scorer, specialized in scoring titles.",
allow_delegation=False,
)
task = Task(
description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
expected_output="The score of the title.",
agent=manager,
)
crew = Crew(
agents=[manager, scorer],
tasks=[task],
process=Process.sequential,
)
with patch.object(Task, "increment_delegations") as increment_delegations:
increment_delegations.return_value = None
crew.kickoff()
increment_delegations.assert_called_once
@pytest.mark.vcr(filter_headers=["authorization"])
def test_increment_tool_errors():
from crewai_tools import tool
from langchain_openai import ChatOpenAI
@tool
def scoring_examples() -> None:
"Useful examples for scoring titles."
raise Exception("Error")
scorer = Agent(
role="Scorer",
goal="Score the title",
backstory="You're an expert scorer, specialized in scoring titles.",
tools=[scoring_examples],
)
task = Task(
description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work', check examples to based your evaluation.",
expected_output="The score of the title.",
)
crew = Crew(
agents=[scorer],
tasks=[task],
process=Process.hierarchical,
manager_llm=ChatOpenAI(model="gpt-4-0125-preview"),
)
with patch.object(Task, "increment_tools_errors") as increment_tools_errors:
increment_tools_errors.return_value = None
crew.kickoff()
increment_tools_errors.assert_called_once