diff --git a/conftest.py b/conftest.py index 9b2c7c5c4..09852767e 100644 --- a/conftest.py +++ b/conftest.py @@ -43,6 +43,35 @@ def _patched_make_vcr_request(httpx_request: Any, **kwargs: Any) -> Any: httpx_stubs._make_vcr_request = _patched_make_vcr_request +# Patch the response-side of VCR to fix httpx.ResponseNotRead errors. +# VCR's _from_serialized_response mocks httpx.Response.read(), which prevents +# the response's internal _content attribute from being properly initialized. +# When OpenAI's client (using with_raw_response) accesses response.content, +# httpx raises ResponseNotRead because read() was never actually called. +# This patch ensures _content is explicitly set after response creation. +_original_from_serialized_response = getattr( + httpx_stubs, "_from_serialized_response", None +) + +if _original_from_serialized_response is not None: + + def _patched_from_serialized_response( + request: Any, serialized_response: Any, history: Any = None + ) -> Any: + """Patched version that ensures response._content is properly set.""" + response = _original_from_serialized_response(request, serialized_response, history) + # Explicitly set _content to avoid ResponseNotRead errors + # The content was passed to the constructor but the mocked read() prevents + # proper initialization of the internal state + body_content = serialized_response.get("body", {}).get("string", b"") + if isinstance(body_content, str): + body_content = body_content.encode("utf-8") + response._content = body_content + return response + + httpx_stubs._from_serialized_response = _patched_from_serialized_response + + @pytest.fixture(autouse=True, scope="function") def cleanup_event_handlers() -> Generator[None, Any, None]: """Clean up event bus handlers after each test to prevent test pollution.""" diff --git a/lib/crewai/src/crewai/flow/flow.py b/lib/crewai/src/crewai/flow/flow.py index 674f551eb..8ef77e482 100644 --- a/lib/crewai/src/crewai/flow/flow.py +++ b/lib/crewai/src/crewai/flow/flow.py @@ -3086,25 +3086,35 @@ class Flow(Generic[T], metaclass=FlowMeta): logger.warning( f"Structured output failed, falling back to simple prompting: {e}" ) - response = llm_instance.call(messages=prompt) - response_clean = str(response).strip() + try: + response = llm_instance.call( + messages=[{"role": "user", "content": prompt}], + ) + response_clean = str(response).strip() - # Exact match (case-insensitive) - for outcome in outcomes: - if outcome.lower() == response_clean.lower(): - return outcome + # Exact match (case-insensitive) + for outcome in outcomes: + if outcome.lower() == response_clean.lower(): + return outcome - # Partial match - for outcome in outcomes: - if outcome.lower() in response_clean.lower(): - return outcome + # Partial match + for outcome in outcomes: + if outcome.lower() in response_clean.lower(): + return outcome - # Fallback to first outcome - logger.warning( - f"Could not match LLM response '{response_clean}' to outcomes {list(outcomes)}. " - f"Falling back to first outcome: {outcomes[0]}" - ) - return outcomes[0] + # Fallback to first outcome + logger.warning( + f"Could not match LLM response '{response_clean}' to outcomes {list(outcomes)}. " + f"Falling back to first outcome: {outcomes[0]}" + ) + return outcomes[0] + + except Exception as fallback_err: + logger.warning( + f"Simple prompting also failed: {fallback_err}. " + f"Falling back to first outcome: {outcomes[0]}" + ) + return outcomes[0] def _log_flow_event( self, diff --git a/lib/crewai/src/crewai/flow/human_feedback.py b/lib/crewai/src/crewai/flow/human_feedback.py index fa4e20ced..7389b8a9e 100644 --- a/lib/crewai/src/crewai/flow/human_feedback.py +++ b/lib/crewai/src/crewai/flow/human_feedback.py @@ -76,6 +76,24 @@ if TYPE_CHECKING: F = TypeVar("F", bound=Callable[..., Any]) +def _serialize_llm_for_context(llm: Any) -> str | None: + """Serialize a BaseLLM object to a model string with provider prefix. + + When persisting the LLM for HITL resume, we need to store enough info + to reconstruct a working LLM on the resume worker. Just storing the bare + model name (e.g. "gemini-3-flash-preview") causes provider inference to + fail — it defaults to OpenAI. Including the provider prefix (e.g. + "gemini/gemini-3-flash-preview") allows LLM() to correctly route. + """ + model = getattr(llm, "model", None) + if not model: + return None + provider = getattr(llm, "provider", None) + if provider and "/" not in model: + return f"{provider}/{model}" + return model + + @dataclass class HumanFeedbackResult: """Result from a @human_feedback decorated method. @@ -412,7 +430,7 @@ def human_feedback( emit=list(emit) if emit else None, default_outcome=default_outcome, metadata=metadata or {}, - llm=llm if isinstance(llm, str) else getattr(llm, "model", None), + llm=llm if isinstance(llm, str) else _serialize_llm_for_context(llm), ) # Determine effective provider: diff --git a/lib/crewai/src/crewai/llms/constants.py b/lib/crewai/src/crewai/llms/constants.py index 9552efada..595a0a30d 100644 --- a/lib/crewai/src/crewai/llms/constants.py +++ b/lib/crewai/src/crewai/llms/constants.py @@ -240,6 +240,7 @@ ANTHROPIC_MODELS: list[AnthropicModels] = [ GeminiModels: TypeAlias = Literal[ "gemini-3-pro-preview", + "gemini-3-flash-preview", "gemini-2.5-pro", "gemini-2.5-pro-preview-03-25", "gemini-2.5-pro-preview-05-06", @@ -294,6 +295,7 @@ GeminiModels: TypeAlias = Literal[ ] GEMINI_MODELS: list[GeminiModels] = [ "gemini-3-pro-preview", + "gemini-3-flash-preview", "gemini-2.5-pro", "gemini-2.5-pro-preview-03-25", "gemini-2.5-pro-preview-05-06", diff --git a/lib/crewai/tests/test_async_human_feedback.py b/lib/crewai/tests/test_async_human_feedback.py index 035f29dcc..f4977858b 100644 --- a/lib/crewai/tests/test_async_human_feedback.py +++ b/lib/crewai/tests/test_async_human_feedback.py @@ -989,8 +989,10 @@ class TestLLMObjectPreservedInContext: persistence = SQLiteFlowPersistence(db_path) # Create a mock BaseLLM object (not a string) + # Simulates LLM(model="gemini-2.0-flash", provider="gemini") mock_llm_obj = MagicMock() - mock_llm_obj.model = "gemini/gemini-2.0-flash" + mock_llm_obj.model = "gemini-2.0-flash" + mock_llm_obj.provider = "gemini" class PausingProvider: def __init__(self, persistence: SQLiteFlowPersistence): @@ -1086,11 +1088,36 @@ class TestLLMObjectPreservedInContext: def test_none_llm_when_no_model_attr(self) -> None: """Test that llm is None when object has no model attribute.""" - mock_obj = MagicMock(spec=[]) # No attributes + from crewai.flow.human_feedback import _serialize_llm_for_context - # Simulate what the decorator does - llm_value = mock_obj if isinstance(mock_obj, str) else getattr(mock_obj, "model", None) - assert llm_value is None + mock_obj = MagicMock(spec=[]) # No attributes + assert _serialize_llm_for_context(mock_obj) is None + + def test_provider_prefix_added_to_bare_model(self) -> None: + """Test that provider prefix is added when model has no slash.""" + from crewai.flow.human_feedback import _serialize_llm_for_context + + mock_obj = MagicMock() + mock_obj.model = "gemini-3-flash-preview" + mock_obj.provider = "gemini" + assert _serialize_llm_for_context(mock_obj) == "gemini/gemini-3-flash-preview" + + def test_provider_prefix_not_doubled_when_already_present(self) -> None: + """Test that provider prefix is not added when model already has a slash.""" + from crewai.flow.human_feedback import _serialize_llm_for_context + + mock_obj = MagicMock() + mock_obj.model = "gemini/gemini-2.0-flash" + mock_obj.provider = "gemini" + assert _serialize_llm_for_context(mock_obj) == "gemini/gemini-2.0-flash" + + def test_no_provider_attr_falls_back_to_bare_model(self) -> None: + """Test that bare model is used when no provider attribute exists.""" + from crewai.flow.human_feedback import _serialize_llm_for_context + + mock_obj = MagicMock(spec=[]) + mock_obj.model = "gpt-4o-mini" + assert _serialize_llm_for_context(mock_obj) == "gpt-4o-mini" class TestAsyncHumanFeedbackEdgeCases: diff --git a/lib/crewai/tests/test_human_feedback_decorator.py b/lib/crewai/tests/test_human_feedback_decorator.py index cd6919420..23b3d723b 100644 --- a/lib/crewai/tests/test_human_feedback_decorator.py +++ b/lib/crewai/tests/test_human_feedback_decorator.py @@ -400,6 +400,45 @@ class TestCollapseToOutcome: assert result == "approved" # First in list + def test_both_llm_calls_fail_returns_first_outcome(self): + """When both structured and simple prompting fail, return outcomes[0].""" + flow = Flow() + + with patch("crewai.llm.LLM") as MockLLM: + mock_llm = MagicMock() + # Both calls raise — simulates wrong provider / auth failure + mock_llm.call.side_effect = RuntimeError("Model not found") + MockLLM.return_value = mock_llm + + result = flow._collapse_to_outcome( + feedback="looks great, approve it", + outcomes=["needs_changes", "approved"], + llm="gemini-3-flash-preview", + ) + + assert result == "needs_changes" # First in list (safe fallback) + + def test_structured_fails_but_simple_succeeds(self): + """When structured output fails but simple prompting works, use that.""" + flow = Flow() + + with patch("crewai.llm.LLM") as MockLLM: + mock_llm = MagicMock() + # First call (structured) fails, second call (simple) succeeds + mock_llm.call.side_effect = [ + RuntimeError("Function calling not supported"), + "approved", + ] + MockLLM.return_value = mock_llm + + result = flow._collapse_to_outcome( + feedback="looks great", + outcomes=["needs_changes", "approved"], + llm="gpt-4o-mini", + ) + + assert result == "approved" + # -- HITL Learning tests --