fixes gemini

2026-04-13 14:32:47 +00:00 · 2026-01-29 15:26:42 -08:00
parent 335696d0ee
commit 0e84dc1cbb
1 changed files with 133 additions and 9 deletions
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -34,6 +34,9 @@ except ImportError:
    ) from None


+STRUCTURED_OUTPUT_TOOL_NAME = "structured_output"
+
+
 class GeminiCompletion(BaseLLM):
    """Google Gemini native completion implementation.

@@ -447,6 +450,9 @@ class GeminiCompletion(BaseLLM):
            Structured output support varies by model version:
            - Gemini 1.5 and earlier: Uses response_schema (Pydantic model)
            - Gemini 2.0+: Uses response_json_schema (JSON Schema) with propertyOrdering
+
+            When both tools AND response_model are present, we add a structured_output
+            pseudo-tool since Gemini doesn't support tools + response_schema together.
        """
        self.tools = tools
        config_params: dict[str, Any] = {}
@@ -472,7 +478,30 @@ class GeminiCompletion(BaseLLM):
            config_params["stop_sequences"] = self.stop_sequences

        if tools and self.supports_tools:
-            config_params["tools"] = self._convert_tools_for_interference(tools)
+            gemini_tools = self._convert_tools_for_interference(tools)
+
+            if response_model:
+                schema_output = generate_model_description(response_model)
+                schema = schema_output.get("json_schema", {}).get("schema", {})
+                if self.is_gemini_2_0:
+                    schema = self._add_property_ordering(schema)
+
+                structured_output_tool = types.Tool(
+                    function_declarations=[
+                        types.FunctionDeclaration(
+                            name=STRUCTURED_OUTPUT_TOOL_NAME,
+                            description=(
+                                "Use this tool to provide your final structured response. "
+                                "Call this tool when you have gathered all necessary information "
+                                "and are ready to provide the final answer in the required format."
+                            ),
+                            parameters_json_schema=schema,
+                        )
+                    ]
+                )
+                gemini_tools.append(structured_output_tool)
+
+            config_params["tools"] = gemini_tools
        elif response_model:
            config_params["response_mime_type"] = "application/json"
            schema_output = generate_model_description(response_model)
@@ -719,6 +748,47 @@ class GeminiCompletion(BaseLLM):
            messages_for_event, content, from_agent
        )

+    def _handle_structured_output_tool_call(
+        self,
+        structured_data: dict[str, Any],
+        response_model: type[BaseModel],
+        contents: list[types.Content],
+        from_task: Any | None = None,
+        from_agent: Any | None = None,
+    ) -> BaseModel:
+        """Validate and emit event for structured_output tool call.
+
+        Args:
+            structured_data: The arguments passed to the structured_output tool
+            response_model: Pydantic model to validate against
+            contents: Original contents for event conversion
+            from_task: Task that initiated the call
+            from_agent: Agent that initiated the call
+
+        Returns:
+            Validated Pydantic model instance
+
+        Raises:
+            ValueError: If validation fails
+        """
+        try:
+            validated_data = response_model.model_validate(structured_data)
+            self._emit_call_completed_event(
+                response=validated_data.model_dump_json(),
+                call_type=LLMCallType.LLM_CALL,
+                from_task=from_task,
+                from_agent=from_agent,
+                messages=self._convert_contents_to_dict(contents),
+            )
+            return validated_data
+        except Exception as e:
+            error_msg = (
+                f"Failed to validate {STRUCTURED_OUTPUT_TOOL_NAME} tool response "
+                f"with model {response_model.__name__}: {e}"
+            )
+            logging.error(error_msg)
+            raise ValueError(error_msg) from e
+
    def _process_response_with_tools(
        self,
        response: GenerateContentResponse,
@@ -749,17 +819,47 @@ class GeminiCompletion(BaseLLM):
                    part for part in candidate.content.parts if part.function_call
                ]

+                # Check for structured_output pseudo-tool call (used when tools + response_model)
+                if response_model and function_call_parts:
+                    for part in function_call_parts:
+                        if (
+                            part.function_call
+                            and part.function_call.name == STRUCTURED_OUTPUT_TOOL_NAME
+                        ):
+                            structured_data = (
+                                dict(part.function_call.args)
+                                if part.function_call.args
+                                else {}
+                            )
+                            return self._handle_structured_output_tool_call(
+                                structured_data=structured_data,
+                                response_model=response_model,
+                                contents=contents,
+                                from_task=from_task,
+                                from_agent=from_agent,
+                            )
+
+                # Filter out structured_output from function calls returned to executor
+                non_structured_output_parts = [
+                    part
+                    for part in function_call_parts
+                    if not (
+                        part.function_call
+                        and part.function_call.name == STRUCTURED_OUTPUT_TOOL_NAME
+                    )
+                ]
+
                # If there are function calls but no available_functions,
                # return them for the executor to handle (like OpenAI/Anthropic)
-                if function_call_parts and not available_functions:
+                if non_structured_output_parts and not available_functions:
                    self._emit_call_completed_event(
-                        response=function_call_parts,
+                        response=non_structured_output_parts,
                        call_type=LLMCallType.TOOL_CALL,
                        from_task=from_task,
                        from_agent=from_agent,
                        messages=self._convert_contents_to_dict(contents),
                    )
-                    return function_call_parts
+                    return non_structured_output_parts

                # Otherwise execute the tools internally
                for part in candidate.content.parts:
@@ -767,6 +867,9 @@ class GeminiCompletion(BaseLLM):
                        function_name = part.function_call.name
                        if function_name is None:
                            continue
+                        # Skip structured_output - it's handled above
+                        if function_name == STRUCTURED_OUTPUT_TOOL_NAME:
+                            continue
                        function_args = (
                            dict(part.function_call.args)
                            if part.function_call.args
@@ -899,9 +1002,27 @@ class GeminiCompletion(BaseLLM):
        """
        self._track_token_usage_internal(usage_data)

+        if response_model and function_calls:
+            for call_data in function_calls.values():
+                if call_data.get("name") == STRUCTURED_OUTPUT_TOOL_NAME:
+                    structured_data = call_data.get("args", {})
+                    return self._handle_structured_output_tool_call(
+                        structured_data=structured_data,
+                        response_model=response_model,
+                        contents=contents,
+                        from_task=from_task,
+                        from_agent=from_agent,
+                    )
+
+        non_structured_output_calls = {
+            idx: call_data
+            for idx, call_data in function_calls.items()
+            if call_data.get("name") != STRUCTURED_OUTPUT_TOOL_NAME
+        }
+
        # If there are function calls but no available_functions,
        # return them for the executor to handle
-        if function_calls and not available_functions:
+        if non_structured_output_calls and not available_functions:
            formatted_function_calls = [
                {
                    "id": call_data["id"],
@@ -911,7 +1032,7 @@ class GeminiCompletion(BaseLLM):
                    },
                    "type": "function",
                }
-                for call_data in function_calls.values()
+                for call_data in non_structured_output_calls.values()
            ]
            self._emit_call_completed_event(
                response=formatted_function_calls,
@@ -922,9 +1043,9 @@ class GeminiCompletion(BaseLLM):
            )
            return formatted_function_calls

-        # Handle completed function calls
-        if function_calls and available_functions:
-            for call_data in function_calls.values():
+        # Handle completed function calls (excluding structured_output)
+        if non_structured_output_calls and available_functions:
+            for call_data in non_structured_output_calls.values():
                function_name = call_data["name"]
                function_args = call_data["args"]

@@ -948,6 +1069,9 @@ class GeminiCompletion(BaseLLM):
                if result is not None:
                    return result

+        # When tools are present, structured output should come via the structured_output
+        # pseudo-tool, not via direct text response. If we reach here with tools present,
+        # the LLM chose to return plain text instead of calling structured_output.
        effective_response_model = None if self.tools else response_model

        return self._finalize_completion_response(