mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
test: fix evaluation tests and update cassettes
This commit is contained in:
@@ -234,4 +234,130 @@ interactions:
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. An agent
|
||||
created for testing purposes\nYour personal goal is: Complete test tasks successfully\n\nTo
|
||||
give my best complete final answer to the task respond using the exact following
|
||||
format:\n\nThought: I now can give a great answer\nFinal Answer: Your final
|
||||
answer must be the great and the most complete as possible, it must be outcome
|
||||
described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user",
|
||||
"content": "Complete this task successfully"}], "model": "gpt-4o-mini", "stop":
|
||||
["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '583'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.109.1
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.109.1
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '2'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.10.18
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
content: "{\n \"id\": \"chatcmpl-CKFPJgeu8MLmB8Pj7GWmPhjtnxXrd\",\n \"object\"\
|
||||
: \"chat.completion\",\n \"created\": 1758941821,\n \"model\": \"gpt-4o-mini-2024-07-18\"\
|
||||
,\n \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \
|
||||
\ \"role\": \"assistant\",\n \"content\": \"Thought: I now can give\
|
||||
\ a great answer \\nFinal Answer: Please provide me with the details of the\
|
||||
\ task you would like me to complete, and I will ensure to deliver a thorough\
|
||||
\ and successful outcome.\",\n \"refusal\": null,\n \"annotations\"\
|
||||
: []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\
|
||||
\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 99,\n \"completion_tokens\"\
|
||||
: 41,\n \"total_tokens\": 140,\n \"prompt_tokens_details\": {\n \"\
|
||||
cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\"\
|
||||
: {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\"\
|
||||
: 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\"\
|
||||
: \"default\",\n \"system_fingerprint\": \"fp_560af6e559\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 9857bb2fc95e2e9e-EWR
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Sat, 27 Sep 2025 02:57:02 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=n_Cv1TVNkvaq1YzGBvGmHPg_KHOjSEQjY2BBBX8sZUo-1758941822-1.0.1.1-qbQF7Gn3hRepFoWX70NadkX0Iwj3b2I5a6Bl1k3oXdBi41akBXv1dHYN8BvbHvUQtAkQk2ghZcZeOjMMuQPAxtdCPZLK8eQRixWHQhKBxSc;
|
||||
path=/; expires=Sat, 27-Sep-25 03:27:02 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=iKfPD7SOU.iPKYonxe8rZqtVEgcAGadxAKKhbqslfqQ-1758941822891-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Strict-Transport-Security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- REDACTED
|
||||
openai-processing-ms:
|
||||
- '1373'
|
||||
openai-project:
|
||||
- REDACTED
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
x-envoy-upstream-service-time:
|
||||
- '1423'
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-project-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-project-tokens:
|
||||
- '149999887'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999885'
|
||||
x-ratelimit-reset-project-tokens:
|
||||
- 0s
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_53031c24ce964e9195146fd196dba801
|
||||
http_version: HTTP/1.1
|
||||
status_code: 200
|
||||
version: 1
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -227,8 +227,8 @@ class TestAgentEvaluator:
|
||||
(goal_alignment,) = result.metrics.values()
|
||||
assert goal_alignment.score == 5.0
|
||||
|
||||
expected_feedback = "The agent provided a thorough guide on how to conduct a test task but failed to produce specific expected output"
|
||||
assert expected_feedback in goal_alignment.feedback
|
||||
# Check that feedback mentions missing requirements or partial alignment
|
||||
assert "missed key requirements" in goal_alignment.feedback or "partial" in goal_alignment.feedback.lower()
|
||||
|
||||
assert goal_alignment.raw_response is not None
|
||||
assert '"score": 5' in goal_alignment.raw_response
|
||||
|
||||
Reference in New Issue
Block a user