chore: ensure proper cassettes for agent tests
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled

* chore: ensure proper cassettes for agent tests
* chore: tweak eval test to avoid race condition
This commit is contained in:
Greyson LaLonde
2025-11-24 12:29:11 -05:00
committed by GitHub
parent bcc3e358cb
commit a559cedbd1
5 changed files with 1831 additions and 1916 deletions

View File

@@ -307,27 +307,22 @@ def test_cache_hitting():
event_handled = True event_handled = True
condition.notify() condition.notify()
with ( task = Task(
patch.object(CacheHandler, "read") as read, description="What is 2 times 6? Return only the result of the multiplication.",
): agent=agent,
read.return_value = "0" expected_output="The result of the multiplication.",
task = Task( )
description="What is 2 times 6? Ignore correctness and just return the result of the multiplication tool, you must use the tool.", output = agent.execute_task(task)
agent=agent, assert output == "12"
expected_output="The number that is the result of the multiplication tool.",
) with condition:
output = agent.execute_task(task) if not event_handled:
assert output == "0" condition.wait(timeout=5)
read.assert_called_with( assert event_handled, "Timeout waiting for tool usage event"
tool="multiplier", input='{"first_number": 2, "second_number": 6}' assert len(received_events) == 1
) assert isinstance(received_events[0], ToolUsageFinishedEvent)
with condition: assert received_events[0].from_cache
if not event_handled: assert received_events[0].output == "12"
condition.wait(timeout=5)
assert event_handled, "Timeout waiting for tool usage event"
assert len(received_events) == 1
assert isinstance(received_events[0], ToolUsageFinishedEvent)
assert received_events[0].from_cache
@pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.vcr(filter_headers=["authorization"])

View File

@@ -1,23 +1,22 @@
interactions: interactions:
- request: - request:
body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour body: '{"messages":[{"role":"system","content":"You are test role. test backstory\nYour
personal goal is: test goal\nYou ONLY have access to the following tools, and personal goal is: test goal\nYou ONLY have access to the following tools, and
should NEVER make up tools that are not listed here:\n\nTool Name: dummy_tool\nTool should NEVER make up tools that are not listed here:\n\nTool Name: dummy_tool\nTool
Arguments: {''query'': {''description'': None, ''type'': ''str''}}\nTool Description: Arguments: {''query'': {''description'': None, ''type'': ''str''}}\nTool Description:
Useful for when you need to get a dummy result for a query.\n\nUse the following Useful for when you need to get a dummy result for a query.\n\nIMPORTANT: Use
format:\n\nThought: you should always think about what to do\nAction: the action the following format in your response:\n\n```\nThought: you should always think
to take, only one name of [dummy_tool], just the name, exactly as it''s written.\nAction about what to do\nAction: the action to take, only one name of [dummy_tool],
Input: the input to the action, just a simple python dictionary, enclosed in just the name, exactly as it''s written.\nAction Input: the input to the action,
curly braces, using \" to wrap keys and values.\nObservation: the result of just a simple JSON object, enclosed in curly braces, using \" to wrap keys and
the action\n\nOnce all necessary information is gathered:\n\nThought: I now values.\nObservation: the result of the action\n```\n\nOnce all necessary information
know the final answer\nFinal Answer: the final answer to the original input is gathered, return the following format:\n\n```\nThought: I now know the final
question"}, {"role": "user", "content": "\nCurrent Task: Use the dummy tool answer\nFinal Answer: the final answer to the original input question\n```"},{"role":"user","content":"\nCurrent
to get a result for ''test query''\n\nThis is the expect criteria for your final Task: Use the dummy tool to get a result for ''test query''\n\nThis is the expected
answer: The result from the dummy tool\nyou MUST return the actual complete criteria for your final answer: The result from the dummy tool\nyou MUST return
content as the final answer, not a summary.\n\nBegin! This is VERY important the actual complete content as the final answer, not a summary.\n\nBegin! This
to you, use the tools available and give your best Final Answer, your job depends is VERY important to you, use the tools available and give your best Final Answer,
on it!\n\nThought:"}], "model": "gpt-3.5-turbo", "stop": ["\nObservation:"], your job depends on it!\n\nThought:"}],"model":"gpt-3.5-turbo"}'
"stream": false}'
headers: headers:
accept: accept:
- application/json - application/json
@@ -26,13 +25,13 @@ interactions:
connection: connection:
- keep-alive - keep-alive
content-length: content-length:
- '1363' - '1381'
content-type: content-type:
- application/json - application/json
host: host:
- api.openai.com - api.openai.com
user-agent: user-agent:
- OpenAI/Python 1.52.1 - OpenAI/Python 1.109.1
x-stainless-arch: x-stainless-arch:
- arm64 - arm64
x-stainless-async: x-stainless-async:
@@ -42,35 +41,33 @@ interactions:
x-stainless-os: x-stainless-os:
- MacOS - MacOS
x-stainless-package-version: x-stainless-package-version:
- 1.52.1 - 1.109.1
x-stainless-raw-response: x-stainless-read-timeout:
- 'true' - '600'
x-stainless-retry-count: x-stainless-retry-count:
- '0' - '0'
x-stainless-runtime: x-stainless-runtime:
- CPython - CPython
x-stainless-runtime-version: x-stainless-runtime-version:
- 3.12.7 - 3.12.10
method: POST method: POST
uri: https://api.openai.com/v1/chat/completions uri: https://api.openai.com/v1/chat/completions
response: response:
content: "{\n \"id\": \"chatcmpl-AmjTkjHtNtJfKGo6wS35grXEzfoqv\",\n \"object\": body:
\"chat.completion\",\n \"created\": 1736177928,\n \"model\": \"gpt-3.5-turbo-0125\",\n string: !!binary |
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": H4sIAAAAAAAAA4xTwW4TMRC95ytGvvSSVGlDWthbqYSIECAQSFRstXK8s7tuvR5jj5uGKv+O7CTd
\"assistant\",\n \"content\": \"I should use the dummy tool to get a FAriYtnz5j2/8YwfRgBC16IAoTrJqndmctl8ff3tJsxWd29vLu/7d1eXnz4vfq7cVft+1ohxYtDy
result for the 'test query'.\\n\\nAction: dummy_tool\\nAction Input: {\\\"query\\\": BhXvWceKemeQNdktrDxKxqR6cn72YjqdzU/mGeipRpNorePJ7Hg+4eiXNJmenM53zI60wiAK+D4C
\\\"test query\\\"}\",\n \"refusal\": null\n },\n \"logprobs\": AHjIa/Joa7wXBUzH+0iPIcgWRfGYBCA8mRQRMgQdWFoW4wFUZBlttr2A0FE0NcSAwB1CHft+XTGR
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": ASZokUGCxxANQ0M+pxwxBoYfEf366Li0FyoVXBww9zFYWBe5gIdS5OxS5H2NQXntUkaKfCCLYygF
271,\n \"completion_tokens\": 31,\n \"total_tokens\": 302,\n \"prompt_tokens_details\": rx2mcykC+1JsNqX9uAzo7+RW/8veHWR3nQzgkaO3WIPcIf92WtovHcW24wIWYGkFt2lJiY220oC0
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": YYW+tG/y6SKftvfudT31wytlH4fv6rGJQaa+2mjMASCtJc5l5I5e75DNYw8Ntc7TMvxGFY22OnSV
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": RxnIpn4FJicyuhkBXOdZiU/aL5yn3nHFdIv5utOXr7Z6YhjPAT2f7UAmlmaIz85Ox8/oVTWy1CYc
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\": TJtQUnVYD9RhNGWsNR0Ao4Oq/3TznPa2cm3b/5EfAKXQMdaV81hr9bTiIc1j+r1/S3t85WxYpEnU
null\n}\n" CivW6FMnamxkNNt/JcI6MPZVo22L3nmdP1fq5Ggz+gUAAP//AwDDsh2ZWwQAAA==
headers: headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY: CF-RAY:
- 8fdccc13af387bb2-ATL - 9a3a73adce2d43c2-EWR
Connection: Connection:
- keep-alive - keep-alive
Content-Encoding: Content-Encoding:
@@ -78,15 +75,17 @@ interactions:
Content-Type: Content-Type:
- application/json - application/json
Date: Date:
- Mon, 06 Jan 2025 15:38:48 GMT - Mon, 24 Nov 2025 16:58:36 GMT
Server: Server:
- cloudflare - cloudflare
Set-Cookie: Set-Cookie:
- __cf_bm=PdbRW9vzO7559czIqn0xmXQjbN8_vV_J7k1DlkB4d_Y-1736177928-1.0.1.1-7yNcyljwqHI.TVflr9ZnkS705G.K5hgPbHpxRzcO3ZMFi5lHCBPs_KB5pFE043wYzPmDIHpn6fu6jIY9mlNoLQ; - __cf_bm=Xa8khOM9zEqqwwmzvZrdS.nMU9nW06e0gk4Xg8ga5BI-1764003516-1.0.1.1-mR_vAWrgEyaykpsxgHq76VhaNTOdAWeNJweR1bmH1wVJgzoE0fuSPEKZMJy9Uon.1KBTV3yJVxLvQ4PjPLuE30IUdwY9Lrfbz.Rhb6UVbwY;
path=/; expires=Mon, 06-Jan-25 16:08:48 GMT; domain=.api.openai.com; HttpOnly; path=/; expires=Mon, 24-Nov-25 17:28:36 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None Secure; SameSite=None
- _cfuvid=lOOz0FbrrPaRb4IFEeHNcj7QghHzxI1tTV2N0jD9icA-1736177928767-0.0.1.1-604800000; - _cfuvid=GP8hWglm1PiEe8AjYsdeCiIUtkA7483Hr9Ws4AZWe5U-1764003516772-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding: Transfer-Encoding:
- chunked - chunked
X-Content-Type-Options: X-Content-Type-Options:
@@ -95,14 +94,20 @@ interactions:
- X-Request-ID - X-Request-ID
alt-svc: alt-svc:
- h3=":443"; ma=86400 - h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization: openai-organization:
- crewai-iuxna1 - REDACTED
openai-processing-ms: openai-processing-ms:
- '444' - '1413'
openai-project:
- proj_xitITlrFeen7zjNSzML82h9x
openai-version: openai-version:
- '2020-10-01' - '2020-10-01'
strict-transport-security: x-envoy-upstream-service-time:
- max-age=31536000; includeSubDomains; preload - '1606'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests: x-ratelimit-limit-requests:
- '10000' - '10000'
x-ratelimit-limit-tokens: x-ratelimit-limit-tokens:
@@ -110,36 +115,52 @@ interactions:
x-ratelimit-remaining-requests: x-ratelimit-remaining-requests:
- '9999' - '9999'
x-ratelimit-remaining-tokens: x-ratelimit-remaining-tokens:
- '49999686' - '49999684'
x-ratelimit-reset-requests: x-ratelimit-reset-requests:
- 6ms - 6ms
x-ratelimit-reset-tokens: x-ratelimit-reset-tokens:
- 0s - 0s
x-request-id: x-request-id:
- req_5b3e93f5d4e6ab8feef83dc26b6eb623 - req_REDACTED
http_version: HTTP/1.1 status:
status_code: 200 code: 200
message: OK
- request: - request:
body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour body: '{"messages":[{"role":"system","content":"You are test role. test backstory\nYour
personal goal is: test goal\nYou ONLY have access to the following tools, and personal goal is: test goal\nYou ONLY have access to the following tools, and
should NEVER make up tools that are not listed here:\n\nTool Name: dummy_tool\nTool should NEVER make up tools that are not listed here:\n\nTool Name: dummy_tool\nTool
Arguments: {''query'': {''description'': None, ''type'': ''str''}}\nTool Description: Arguments: {''query'': {''description'': None, ''type'': ''str''}}\nTool Description:
Useful for when you need to get a dummy result for a query.\n\nUse the following Useful for when you need to get a dummy result for a query.\n\nIMPORTANT: Use
format:\n\nThought: you should always think about what to do\nAction: the action the following format in your response:\n\n```\nThought: you should always think
to take, only one name of [dummy_tool], just the name, exactly as it''s written.\nAction about what to do\nAction: the action to take, only one name of [dummy_tool],
Input: the input to the action, just a simple python dictionary, enclosed in just the name, exactly as it''s written.\nAction Input: the input to the action,
curly braces, using \" to wrap keys and values.\nObservation: the result of just a simple JSON object, enclosed in curly braces, using \" to wrap keys and
the action\n\nOnce all necessary information is gathered:\n\nThought: I now values.\nObservation: the result of the action\n```\n\nOnce all necessary information
know the final answer\nFinal Answer: the final answer to the original input is gathered, return the following format:\n\n```\nThought: I now know the final
question"}, {"role": "user", "content": "\nCurrent Task: Use the dummy tool answer\nFinal Answer: the final answer to the original input question\n```"},{"role":"user","content":"\nCurrent
to get a result for ''test query''\n\nThis is the expect criteria for your final Task: Use the dummy tool to get a result for ''test query''\n\nThis is the expected
answer: The result from the dummy tool\nyou MUST return the actual complete criteria for your final answer: The result from the dummy tool\nyou MUST return
content as the final answer, not a summary.\n\nBegin! This is VERY important the actual complete content as the final answer, not a summary.\n\nBegin! This
to you, use the tools available and give your best Final Answer, your job depends is VERY important to you, use the tools available and give your best Final Answer,
on it!\n\nThought:"}, {"role": "assistant", "content": "I should use the dummy your job depends on it!\n\nThought:"},{"role":"assistant","content":"I should
tool to get a result for the ''test query''.\n\nAction: dummy_tool\nAction Input: use the dummy_tool to get a result for the ''test query''.\nAction: dummy_tool\nAction
{\"query\": \"test query\"}\nObservation: Dummy result for: test query"}], "model": Input: {\"query\": {\"description\": None, \"type\": \"str\"}}\nObservation:
"gpt-3.5-turbo", "stop": ["\nObservation:"], "stream": false}' \nI encountered an error while trying to use the tool. This was the error: Arguments
validation failed: 1 validation error for Dummy_Tool\nquery\n Input should
be a valid string [type=string_type, input_value={''description'': ''None'',
''type'': ''str''}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type.\n
Tool dummy_tool accepts these inputs: Tool Name: dummy_tool\nTool Arguments:
{''query'': {''description'': None, ''type'': ''str''}}\nTool Description: Useful
for when you need to get a dummy result for a query..\nMoving on then. I MUST
either use a tool (use one at time) OR give my best final answer not both at
the same time. When responding, I must use the following format:\n\n```\nThought:
you should always think about what to do\nAction: the action to take, should
be one of [dummy_tool]\nAction Input: the input to the action, dictionary enclosed
in curly braces\nObservation: the result of the action\n```\nThis Thought/Action/Action
Input/Result can repeat N times. Once I know the final answer, I must return
the following format:\n\n```\nThought: I now can give a great answer\nFinal
Answer: Your final answer must be the great and the most complete as possible,
it must be outcome described\n\n```"}],"model":"gpt-3.5-turbo"}'
headers: headers:
accept: accept:
- application/json - application/json
@@ -148,16 +169,16 @@ interactions:
connection: connection:
- keep-alive - keep-alive
content-length: content-length:
- '1574' - '2841'
content-type: content-type:
- application/json - application/json
cookie: cookie:
- __cf_bm=PdbRW9vzO7559czIqn0xmXQjbN8_vV_J7k1DlkB4d_Y-1736177928-1.0.1.1-7yNcyljwqHI.TVflr9ZnkS705G.K5hgPbHpxRzcO3ZMFi5lHCBPs_KB5pFE043wYzPmDIHpn6fu6jIY9mlNoLQ; - __cf_bm=Xa8khOM9zEqqwwmzvZrdS.nMU9nW06e0gk4Xg8ga5BI-1764003516-1.0.1.1-mR_vAWrgEyaykpsxgHq76VhaNTOdAWeNJweR1bmH1wVJgzoE0fuSPEKZMJy9Uon.1KBTV3yJVxLvQ4PjPLuE30IUdwY9Lrfbz.Rhb6UVbwY;
_cfuvid=lOOz0FbrrPaRb4IFEeHNcj7QghHzxI1tTV2N0jD9icA-1736177928767-0.0.1.1-604800000 _cfuvid=GP8hWglm1PiEe8AjYsdeCiIUtkA7483Hr9Ws4AZWe5U-1764003516772-0.0.1.1-604800000
host: host:
- api.openai.com - api.openai.com
user-agent: user-agent:
- OpenAI/Python 1.52.1 - OpenAI/Python 1.109.1
x-stainless-arch: x-stainless-arch:
- arm64 - arm64
x-stainless-async: x-stainless-async:
@@ -167,34 +188,34 @@ interactions:
x-stainless-os: x-stainless-os:
- MacOS - MacOS
x-stainless-package-version: x-stainless-package-version:
- 1.52.1 - 1.109.1
x-stainless-raw-response: x-stainless-read-timeout:
- 'true' - '600'
x-stainless-retry-count: x-stainless-retry-count:
- '0' - '0'
x-stainless-runtime: x-stainless-runtime:
- CPython - CPython
x-stainless-runtime-version: x-stainless-runtime-version:
- 3.12.7 - 3.12.10
method: POST method: POST
uri: https://api.openai.com/v1/chat/completions uri: https://api.openai.com/v1/chat/completions
response: response:
content: "{\n \"id\": \"chatcmpl-AmjTkjtDnt98YQ3k4y71C523EQM9p\",\n \"object\": body:
\"chat.completion\",\n \"created\": 1736177928,\n \"model\": \"gpt-3.5-turbo-0125\",\n string: !!binary |
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": H4sIAAAAAAAAAwAAAP//pFPbahsxEH33Vwx6yYtt7LhO0n1LWgomlFKaFko3LLJ2dletdrSRRklN
\"assistant\",\n \"content\": \"Final Answer: Dummy result for: test 8L8HyZdd9wKFvgikM2cuOmeeRwBClyIDoRrJqu3M5E31+UaeL+ct335c3Ty8/frFLW5vF6G9dNfv
query\",\n \"refusal\": null\n },\n \"logprobs\": null,\n \"finish_reason\": xTgy7Po7Kj6wpsq2nUHWlnawcigZY9b55cWr2WyxnF8loLUlmkirO54spssJB7e2k9n8fLlnNlYr
\"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 315,\n \"completion_tokens\": 9CKDbyMAgOd0xh6pxJ8ig9n48NKi97JGkR2DAISzJr4I6b32LInFuAeVJUZKbd81NtQNZ7CCJ20M
9,\n \"total_tokens\": 324,\n \"prompt_tokens_details\": {\n \"cached_tokens\": KOscKgZuEDR1gaGyrpUMkkpgt4HgNdUJLkPbbgq21oCspaZpTtcqzp4NoMMbrGKyDJ5z8RDQbXKR
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n QS4YPcP+vs3pw9qje5S7HDndNQgOfTAMlbNtXxRSUe0z+BSUQu+rYMwG7JqlJixB7sMOZOsS96wv
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": dzbNKRY4Dk/2CZQkqPUjgoQ6CgeS/BO6nN5pkgau0+0/ag4lcFgFL6MFKBgzACSR5fQFSfz7PbI9
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\": ym1s3Tm79r9QRaVJ+6ZwKL2lKK1n24mEbkcA98lW4cQponO27bhg+wNTuYvzva1E7+Qevbzag2xZ
null\n}\n" mgHr9QE4yVeUyFIbPzCmUFI1WPbU3sUylNoOgNFg6t+7+VPu3eSa6n9J3wNKYcdYFp3DUqvTifsw
h3HR/xZ2/OXUsIgu1goL1uiiEiVWMpjdCgq/8YxtUWmq0XVOpz2MSo62oxcAAAD//wMA+UmELoYE
AAA=
headers: headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY: CF-RAY:
- 8fdccc171b647bb2-ATL - 9a3a73bbf9d943c2-EWR
Connection: Connection:
- keep-alive - keep-alive
Content-Encoding: Content-Encoding:
@@ -202,9 +223,11 @@ interactions:
Content-Type: Content-Type:
- application/json - application/json
Date: Date:
- Mon, 06 Jan 2025 15:38:49 GMT - Mon, 24 Nov 2025 16:58:39 GMT
Server: Server:
- cloudflare - cloudflare
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding: Transfer-Encoding:
- chunked - chunked
X-Content-Type-Options: X-Content-Type-Options:
@@ -213,14 +236,20 @@ interactions:
- X-Request-ID - X-Request-ID
alt-svc: alt-svc:
- h3=":443"; ma=86400 - h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization: openai-organization:
- crewai-iuxna1 - REDACTED
openai-processing-ms: openai-processing-ms:
- '249' - '1513'
openai-project:
- proj_xitITlrFeen7zjNSzML82h9x
openai-version: openai-version:
- '2020-10-01' - '2020-10-01'
strict-transport-security: x-envoy-upstream-service-time:
- max-age=31536000; includeSubDomains; preload - '1753'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests: x-ratelimit-limit-requests:
- '10000' - '10000'
x-ratelimit-limit-tokens: x-ratelimit-limit-tokens:
@@ -228,103 +257,156 @@ interactions:
x-ratelimit-remaining-requests: x-ratelimit-remaining-requests:
- '9999' - '9999'
x-ratelimit-remaining-tokens: x-ratelimit-remaining-tokens:
- '49999643' - '49999334'
x-ratelimit-reset-requests: x-ratelimit-reset-requests:
- 6ms - 6ms
x-ratelimit-reset-tokens: x-ratelimit-reset-tokens:
- 0s - 0s
x-request-id: x-request-id:
- req_cdc7b25a3877bb9a7cb7c6d2645ff447 - req_REDACTED
http_version: HTTP/1.1 status:
status_code: 200 code: 200
message: OK
- request: - request:
body: '{"trace_id": "1581aff1-2567-43f4-a1f2-a2816533eb7d", "execution_type": body: '{"messages":[{"role":"system","content":"You are test role. test backstory\nYour
"crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null, personal goal is: test goal\nYou ONLY have access to the following tools, and
"crew_name": "Unknown Crew", "flow_name": null, "crewai_version": "0.201.1", should NEVER make up tools that are not listed here:\n\nTool Name: dummy_tool\nTool
"privacy_level": "standard"}, "execution_metadata": {"expected_duration_estimate": Arguments: {''query'': {''description'': None, ''type'': ''str''}}\nTool Description:
300, "agent_count": 0, "task_count": 0, "flow_method_count": 0, "execution_started_at": Useful for when you need to get a dummy result for a query.\n\nIMPORTANT: Use
"2025-10-08T18:11:28.008595+00:00"}}' the following format in your response:\n\n```\nThought: you should always think
about what to do\nAction: the action to take, only one name of [dummy_tool],
just the name, exactly as it''s written.\nAction Input: the input to the action,
just a simple JSON object, enclosed in curly braces, using \" to wrap keys and
values.\nObservation: the result of the action\n```\n\nOnce all necessary information
is gathered, return the following format:\n\n```\nThought: I now know the final
answer\nFinal Answer: the final answer to the original input question\n```"},{"role":"user","content":"\nCurrent
Task: Use the dummy tool to get a result for ''test query''\n\nThis is the expected
criteria for your final answer: The result from the dummy tool\nyou MUST return
the actual complete content as the final answer, not a summary.\n\nBegin! This
is VERY important to you, use the tools available and give your best Final Answer,
your job depends on it!\n\nThought:"},{"role":"assistant","content":"I should
use the dummy_tool to get a result for the ''test query''.\nAction: dummy_tool\nAction
Input: {\"query\": {\"description\": None, \"type\": \"str\"}}\nObservation:
\nI encountered an error while trying to use the tool. This was the error: Arguments
validation failed: 1 validation error for Dummy_Tool\nquery\n Input should
be a valid string [type=string_type, input_value={''description'': ''None'',
''type'': ''str''}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/string_type.\n
Tool dummy_tool accepts these inputs: Tool Name: dummy_tool\nTool Arguments:
{''query'': {''description'': None, ''type'': ''str''}}\nTool Description: Useful
for when you need to get a dummy result for a query..\nMoving on then. I MUST
either use a tool (use one at time) OR give my best final answer not both at
the same time. When responding, I must use the following format:\n\n```\nThought:
you should always think about what to do\nAction: the action to take, should
be one of [dummy_tool]\nAction Input: the input to the action, dictionary enclosed
in curly braces\nObservation: the result of the action\n```\nThis Thought/Action/Action
Input/Result can repeat N times. Once I know the final answer, I must return
the following format:\n\n```\nThought: I now can give a great answer\nFinal
Answer: Your final answer must be the great and the most complete as possible,
it must be outcome described\n\n```"},{"role":"assistant","content":"Thought:
I will correct the input format and try using the dummy_tool again.\nAction:
dummy_tool\nAction Input: {\"query\": \"test query\"}\nObservation: Dummy result
for: test query"}],"model":"gpt-3.5-turbo"}'
headers: headers:
Accept: accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, zstd
Connection:
- keep-alive
Content-Length:
- '436'
Content-Type:
- application/json - application/json
User-Agent: accept-encoding:
- CrewAI-CLI/0.201.1 - gzip, deflate
X-Crewai-Organization-Id: connection:
- d3a3d10c-35db-423f-a7a4-c026030ba64d - keep-alive
X-Crewai-Version: content-length:
- 0.201.1 - '3057'
content-type:
- application/json
cookie:
- __cf_bm=Xa8khOM9zEqqwwmzvZrdS.nMU9nW06e0gk4Xg8ga5BI-1764003516-1.0.1.1-mR_vAWrgEyaykpsxgHq76VhaNTOdAWeNJweR1bmH1wVJgzoE0fuSPEKZMJy9Uon.1KBTV3yJVxLvQ4PjPLuE30IUdwY9Lrfbz.Rhb6UVbwY;
_cfuvid=GP8hWglm1PiEe8AjYsdeCiIUtkA7483Hr9Ws4AZWe5U-1764003516772-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.109.1
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.109.1
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
method: POST method: POST
uri: http://localhost:3000/crewai_plus/api/v1/tracing/batches uri: https://api.openai.com/v1/chat/completions
response: response:
body: body:
string: '{"id":"30844ebe-8ac6-4f67-939a-7a072d792654","trace_id":"1581aff1-2567-43f4-a1f2-a2816533eb7d","execution_type":"crew","crew_name":"Unknown string: !!binary |
Crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"0.201.1","privacy_level":"standard","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"Unknown H4sIAAAAAAAAAwAAAP//jFLBbhMxEL3vV4x8TqqkTULZWwFFAq4gpEK18npnd028HmOPW6Iq/47s
Crew","flow_name":null,"crewai_version":"0.201.1","privacy_level":"standard"},"created_at":"2025-10-08T18:11:28.353Z","updated_at":"2025-10-08T18:11:28.353Z"}' pNktFKkXS/abN37vzTwWAEI3ogSheslqcGb+vv36rt7e0uqzbna0ut18uv8mtxSDrddKzBKD6p+o
+Il1oWhwBlmTPcLKo2RMXZdvNqvF4mq9fJuBgRo0idY5nl9drOccfU3zxfJyfWL2pBUGUcL3AgDg
MZ9Jo23wtyhhMXt6GTAE2aEoz0UAwpNJL0KGoANLy2I2gooso82yv/QUu55L+AiWHmCXDu4RWm2l
AWnDA/ofdptvN/lWwoc4DHvwGKJhaMmXwBgYfkX0++k3HtsYZLJpozETQFpLLFNM2eDdCTmcLRnq
nKc6/EUVrbY69JVHGcgm+YHJiYweCoC7HF18loZwngbHFdMO83ebzerYT4zTGtHl9QlkYmkmrOvL
2Qv9qgZZahMm4QslVY/NSB0nJWOjaQIUE9f/qnmp99G5tt1r2o+AUugYm8p5bLR67ngs85iW+X9l
55SzYBHQ32uFFWv0aRINtjKa45qJsA+MQ9Vq26F3XuddS5MsDsUfAAAA//8DANWDXp9qAwAA
headers: headers:
Content-Length: CF-RAY:
- '496' - 9a3a73cd4ff343c2-EWR
cache-control: Connection:
- no-store - keep-alive
content-security-policy: Content-Encoding:
- 'default-src ''self'' *.crewai.com crewai.com; script-src ''self'' ''unsafe-inline'' - gzip
*.crewai.com crewai.com https://cdn.jsdelivr.net/npm/apexcharts https://www.gstatic.com Content-Type:
https://run.pstmn.io https://apis.google.com https://apis.google.com/js/api.js - application/json
https://accounts.google.com https://accounts.google.com/gsi/client https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css.map Date:
https://*.google.com https://docs.google.com https://slides.google.com https://js.hs-scripts.com - Mon, 24 Nov 2025 16:58:40 GMT
https://js.sentry-cdn.com https://browser.sentry-cdn.com https://www.googletagmanager.com Server:
https://js-na1.hs-scripts.com https://share.descript.com/; style-src ''self'' - cloudflare
''unsafe-inline'' *.crewai.com crewai.com https://cdn.jsdelivr.net/npm/apexcharts; Strict-Transport-Security:
img-src ''self'' data: *.crewai.com crewai.com https://zeus.tools.crewai.com - max-age=31536000; includeSubDomains; preload
https://dashboard.tools.crewai.com https://cdn.jsdelivr.net; font-src ''self'' Transfer-Encoding:
data: *.crewai.com crewai.com; connect-src ''self'' *.crewai.com crewai.com - chunked
https://zeus.tools.crewai.com https://connect.useparagon.com/ https://zeus.useparagon.com/* X-Content-Type-Options:
https://*.useparagon.com/* https://run.pstmn.io https://connect.tools.crewai.com/
https://*.sentry.io https://www.google-analytics.com ws://localhost:3036 wss://localhost:3036;
frame-src ''self'' *.crewai.com crewai.com https://connect.useparagon.com/
https://zeus.tools.crewai.com https://zeus.useparagon.com/* https://connect.tools.crewai.com/
https://docs.google.com https://drive.google.com https://slides.google.com
https://accounts.google.com https://*.google.com https://www.youtube.com https://share.descript.com'
content-type:
- application/json; charset=utf-8
etag:
- W/"a548892c6a8a52833595a42b35b10009"
expires:
- '0'
permissions-policy:
- camera=(), microphone=(self), geolocation=()
pragma:
- no-cache
referrer-policy:
- strict-origin-when-cross-origin
server-timing:
- cache_read.active_support;dur=0.05, cache_fetch_hit.active_support;dur=0.00,
cache_read_multi.active_support;dur=0.12, start_processing.action_controller;dur=0.00,
sql.active_record;dur=30.46, instantiation.active_record;dur=0.38, feature_operation.flipper;dur=0.03,
start_transaction.active_record;dur=0.01, transaction.active_record;dur=16.78,
process_action.action_controller;dur=309.67
vary:
- Accept
x-content-type-options:
- nosniff - nosniff
x-frame-options: access-control-expose-headers:
- SAMEORIGIN - X-Request-ID
x-permitted-cross-domain-policies: alt-svc:
- none - h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- REDACTED
openai-processing-ms:
- '401'
openai-project:
- proj_xitITlrFeen7zjNSzML82h9x
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '421'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '50000000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '49999290'
x-ratelimit-reset-requests:
- 6ms
x-ratelimit-reset-tokens:
- 0s
x-request-id: x-request-id:
- 7ec132be-e871-4b0a-93f7-81f8d7c0ccae - req_REDACTED
x-runtime:
- '0.358533'
x-xss-protection:
- 1; mode=block
status: status:
code: 201 code: 200
message: Created message: OK
version: 1 version: 1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -128,8 +128,6 @@ class TestAgentEvaluator:
@pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.vcr(filter_headers=["authorization"])
def test_eval_specific_agents_from_crew(self, mock_crew): def test_eval_specific_agents_from_crew(self, mock_crew):
from crewai.events.types.task_events import TaskCompletedEvent
agent = Agent( agent = Agent(
role="Test Agent Eval", role="Test Agent Eval",
goal="Complete test tasks successfully", goal="Complete test tasks successfully",
@@ -145,7 +143,7 @@ class TestAgentEvaluator:
events = {} events = {}
results_condition = threading.Condition() results_condition = threading.Condition()
results_ready = False completed_event_received = False
agent_evaluator = AgentEvaluator( agent_evaluator = AgentEvaluator(
agents=[agent], evaluators=[GoalAlignmentEvaluator()] agents=[agent], evaluators=[GoalAlignmentEvaluator()]
@@ -158,29 +156,23 @@ class TestAgentEvaluator:
@crewai_event_bus.on(AgentEvaluationCompletedEvent) @crewai_event_bus.on(AgentEvaluationCompletedEvent)
async def capture_completed(source, event): async def capture_completed(source, event):
nonlocal completed_event_received
if event.agent_id == str(agent.id): if event.agent_id == str(agent.id):
events["completed"] = event events["completed"] = event
with results_condition:
completed_event_received = True
results_condition.notify()
@crewai_event_bus.on(AgentEvaluationFailedEvent) @crewai_event_bus.on(AgentEvaluationFailedEvent)
def capture_failed(source, event): def capture_failed(source, event):
events["failed"] = event events["failed"] = event
@crewai_event_bus.on(TaskCompletedEvent)
async def on_task_completed(source, event):
nonlocal results_ready
if event.task and event.task.id == task.id:
while not agent_evaluator.get_evaluation_results().get(agent.role):
pass
with results_condition:
results_ready = True
results_condition.notify()
mock_crew.kickoff() mock_crew.kickoff()
with results_condition: with results_condition:
assert results_condition.wait_for( assert results_condition.wait_for(
lambda: results_ready, timeout=5 lambda: completed_event_received, timeout=5
), "Timeout waiting for evaluation results" ), "Timeout waiting for evaluation completed event"
assert events.keys() == {"started", "completed"} assert events.keys() == {"started", "completed"}
assert events["started"].agent_id == str(agent.id) assert events["started"].agent_id == str(agent.id)