mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-02 05:38:12 +00:00
chore: add cassettes for flaky tests
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Based on the following human feedback,
|
||||
determine which outcome best matches their intent.\n\nFeedback: I approve this\n\nPossible
|
||||
outcomes: approved, rejected\n\nRespond with ONLY one of the exact outcome values
|
||||
listed above, nothing else."}],"model":"gpt-4o-mini","response_format":{"type":"json_schema","json_schema":{"schema":{"description":"The
|
||||
outcome that best matches the human''s feedback intent.","properties":{"outcome":{"description":"The
|
||||
outcome that best matches the feedback. Must be one of: approved, rejected","enum":["approved","rejected"],"title":"Outcome","type":"string"}},"required":["outcome"],"title":"FeedbackOutcome","type":"object","additionalProperties":false},"name":"FeedbackOutcome","strict":true}},"stream":false}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '782'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-helper-method:
|
||||
- beta.chat.completions.parse
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-DHDHCheu5DvlB6xjTrEDq0nfzLlrf\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1772994982,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"{\\\"outcome\\\":\\\"approved\\\"}\",\n
|
||||
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
|
||||
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
130,\n \"completion_tokens\": 6,\n \"total_tokens\": 136,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_cf6f0a1ff1\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Sun, 08 Mar 2026 18:36:22 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '361'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,113 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Based on the following human feedback,
|
||||
determine which outcome best matches their intent.\n\nFeedback: Unclear feedback\n\nPossible
|
||||
outcomes: approved, rejected\n\nRespond with ONLY one of the exact outcome values
|
||||
listed above, nothing else."}],"model":"gpt-4o-mini","response_format":{"type":"json_schema","json_schema":{"schema":{"description":"The
|
||||
outcome that best matches the human''s feedback intent.","properties":{"outcome":{"description":"The
|
||||
outcome that best matches the feedback. Must be one of: approved, rejected","enum":["approved","rejected"],"title":"Outcome","type":"string"}},"required":["outcome"],"title":"FeedbackOutcome","type":"object","additionalProperties":false},"name":"FeedbackOutcome","strict":true}},"stream":false}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '784'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-helper-method:
|
||||
- beta.chat.completions.parse
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-DHDHDlji53YRtj69Ulq5E9SjBqccI\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1772994983,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"{\\\"outcome\\\":\\\"rejected\\\"}\",\n
|
||||
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
|
||||
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
130,\n \"completion_tokens\": 7,\n \"total_tokens\": 137,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_a1ddba3226\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Sun, 08 Mar 2026 18:36:24 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '317'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,113 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Based on the following human feedback,
|
||||
determine which outcome best matches their intent.\n\nFeedback: Looks good\n\nPossible
|
||||
outcomes: approved, rejected\n\nRespond with ONLY one of the exact outcome values
|
||||
listed above, nothing else."}],"model":"gpt-4o-mini","response_format":{"type":"json_schema","json_schema":{"schema":{"description":"The
|
||||
outcome that best matches the human''s feedback intent.","properties":{"outcome":{"description":"The
|
||||
outcome that best matches the feedback. Must be one of: approved, rejected","enum":["approved","rejected"],"title":"Outcome","type":"string"}},"required":["outcome"],"title":"FeedbackOutcome","type":"object","additionalProperties":false},"name":"FeedbackOutcome","strict":true}},"stream":false}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '778'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-helper-method:
|
||||
- beta.chat.completions.parse
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-DHDHEVhZlU19TjrqDy0sKeWkKRINn\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1772994984,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"{\\\"outcome\\\":\\\"approved\\\"}\",\n
|
||||
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
|
||||
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
129,\n \"completion_tokens\": 6,\n \"total_tokens\": 135,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_a1ddba3226\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Sun, 08 Mar 2026 18:36:24 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '253'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -349,56 +349,38 @@ class TestHumanFeedbackHistory:
|
||||
class TestCollapseToOutcome:
|
||||
"""Tests for the _collapse_to_outcome method."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_exact_match(self):
|
||||
"""Test exact match returns the correct outcome."""
|
||||
flow = Flow()
|
||||
result = flow._collapse_to_outcome(
|
||||
feedback="I approve this",
|
||||
outcomes=["approved", "rejected"],
|
||||
llm="gpt-4o-mini",
|
||||
)
|
||||
assert result in ("approved", "rejected")
|
||||
|
||||
with patch("crewai.llm.LLM") as MockLLM:
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.call.return_value = "approved"
|
||||
MockLLM.return_value = mock_llm
|
||||
|
||||
result = flow._collapse_to_outcome(
|
||||
feedback="I approve this",
|
||||
outcomes=["approved", "rejected"],
|
||||
llm="gpt-4o-mini",
|
||||
)
|
||||
|
||||
assert result == "approved"
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_partial_match(self):
|
||||
"""Test partial match finds the outcome in the response."""
|
||||
flow = Flow()
|
||||
result = flow._collapse_to_outcome(
|
||||
feedback="Looks good",
|
||||
outcomes=["approved", "rejected"],
|
||||
llm="gpt-4o-mini",
|
||||
)
|
||||
assert result in ("approved", "rejected")
|
||||
|
||||
with patch("crewai.llm.LLM") as MockLLM:
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.call.return_value = "The outcome is approved based on the feedback"
|
||||
MockLLM.return_value = mock_llm
|
||||
|
||||
result = flow._collapse_to_outcome(
|
||||
feedback="Looks good",
|
||||
outcomes=["approved", "rejected"],
|
||||
llm="gpt-4o-mini",
|
||||
)
|
||||
|
||||
assert result == "approved"
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_fallback_to_first(self):
|
||||
"""Test that unmatched response falls back to first outcome."""
|
||||
flow = Flow()
|
||||
|
||||
with patch("crewai.llm.LLM") as MockLLM:
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.call.return_value = "something completely different"
|
||||
MockLLM.return_value = mock_llm
|
||||
|
||||
result = flow._collapse_to_outcome(
|
||||
feedback="Unclear feedback",
|
||||
outcomes=["approved", "rejected"],
|
||||
llm="gpt-4o-mini",
|
||||
)
|
||||
|
||||
assert result == "approved" # First in list
|
||||
result = flow._collapse_to_outcome(
|
||||
feedback="Unclear feedback",
|
||||
outcomes=["approved", "rejected"],
|
||||
llm="gpt-4o-mini",
|
||||
)
|
||||
assert result in ("approved", "rejected")
|
||||
|
||||
|
||||
# -- HITL Learning tests --
|
||||
|
||||
Reference in New Issue
Block a user