fixes gemini

2026-05-01 07:13:00 +00:00 · 2026-01-29 15:26:42 -08:00
parent 335696d0ee
commit 0e84dc1cbb
1 changed files with 133 additions and 9 deletions
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -34,6 +34,9 @@ except ImportError:
    ) from None
 STRUCTURED_OUTPUT_TOOL_NAME = "structured_output"
 class GeminiCompletion(BaseLLM):
    """Google Gemini native completion implementation.
@@ -447,6 +450,9 @@ class GeminiCompletion(BaseLLM):
            Structured output support varies by model version:
            - Gemini 1.5 and earlier: Uses response_schema (Pydantic model)
            - Gemini 2.0+: Uses response_json_schema (JSON Schema) with propertyOrdering
            When both tools AND response_model are present, we add a structured_output
            pseudo-tool since Gemini doesn't support tools + response_schema together.
        """
        self.tools = tools
        config_params: dict[str, Any] = {}
@@ -472,7 +478,30 @@ class GeminiCompletion(BaseLLM):
            config_params["stop_sequences"] = self.stop_sequences
        if tools and self.supports_tools:
-            config_params["tools"] = self._convert_tools_for_interference(tools)
+            gemini_tools = self._convert_tools_for_interference(tools)
            if response_model:
                schema_output = generate_model_description(response_model)
                schema = schema_output.get("json_schema", {}).get("schema", {})
                if self.is_gemini_2_0:
                    schema = self._add_property_ordering(schema)
                structured_output_tool = types.Tool(
                    function_declarations=[
                        types.FunctionDeclaration(
                            name=STRUCTURED_OUTPUT_TOOL_NAME,
                            description=(
                                "Use this tool to provide your final structured response. "
                                "Call this tool when you have gathered all necessary information "
                                "and are ready to provide the final answer in the required format."
                            ),
                            parameters_json_schema=schema,
                        )
                    ]
                )
                gemini_tools.append(structured_output_tool)
            config_params["tools"] = gemini_tools
        elif response_model:
            config_params["response_mime_type"] = "application/json"
            schema_output = generate_model_description(response_model)
@@ -719,6 +748,47 @@ class GeminiCompletion(BaseLLM):
            messages_for_event, content, from_agent
        )
    def _handle_structured_output_tool_call(
        self,
        structured_data: dict[str, Any],
        response_model: type[BaseModel],
        contents: list[types.Content],
        from_task: Any | None = None,
        from_agent: Any | None = None,
    ) -> BaseModel:
        """Validate and emit event for structured_output tool call.
        Args:
            structured_data: The arguments passed to the structured_output tool
            response_model: Pydantic model to validate against
            contents: Original contents for event conversion
            from_task: Task that initiated the call
            from_agent: Agent that initiated the call
        Returns:
            Validated Pydantic model instance
        Raises:
            ValueError: If validation fails
        """
        try:
            validated_data = response_model.model_validate(structured_data)
            self._emit_call_completed_event(
                response=validated_data.model_dump_json(),
                call_type=LLMCallType.LLM_CALL,
                from_task=from_task,
                from_agent=from_agent,
                messages=self._convert_contents_to_dict(contents),
            )
            return validated_data
        except Exception as e:
            error_msg = (
                f"Failed to validate {STRUCTURED_OUTPUT_TOOL_NAME} tool response "
                f"with model {response_model.__name__}: {e}"
            )
            logging.error(error_msg)
            raise ValueError(error_msg) from e
    def _process_response_with_tools(
        self,
        response: GenerateContentResponse,
@@ -749,17 +819,47 @@ class GeminiCompletion(BaseLLM):
                    part for part in candidate.content.parts if part.function_call
                ]
                # Check for structured_output pseudo-tool call (used when tools + response_model)
                if response_model and function_call_parts:
                    for part in function_call_parts:
                        if (
                            part.function_call
                            and part.function_call.name == STRUCTURED_OUTPUT_TOOL_NAME
                        ):
                            structured_data = (
                                dict(part.function_call.args)
                                if part.function_call.args
                                else {}
                            )
                            return self._handle_structured_output_tool_call(
                                structured_data=structured_data,
                                response_model=response_model,
                                contents=contents,
                                from_task=from_task,
                                from_agent=from_agent,
                            )
                # Filter out structured_output from function calls returned to executor
                non_structured_output_parts = [
                    part
                    for part in function_call_parts
                    if not (
                        part.function_call
                        and part.function_call.name == STRUCTURED_OUTPUT_TOOL_NAME
                    )
                ]
                # If there are function calls but no available_functions,
                # return them for the executor to handle (like OpenAI/Anthropic)
-                if function_call_parts and not available_functions:
+                if non_structured_output_parts and not available_functions:
                    self._emit_call_completed_event(
-                        response=function_call_parts,
+                        response=non_structured_output_parts,
                        call_type=LLMCallType.TOOL_CALL,
                        from_task=from_task,
                        from_agent=from_agent,
                        messages=self._convert_contents_to_dict(contents),
                    )
-                    return function_call_parts
+                    return non_structured_output_parts
                # Otherwise execute the tools internally
                for part in candidate.content.parts:
@@ -767,6 +867,9 @@ class GeminiCompletion(BaseLLM):
                        function_name = part.function_call.name
                        if function_name is None:
                            continue
                        # Skip structured_output - it's handled above
                        if function_name == STRUCTURED_OUTPUT_TOOL_NAME:
                            continue
                        function_args = (
                            dict(part.function_call.args)
                            if part.function_call.args
@@ -899,9 +1002,27 @@ class GeminiCompletion(BaseLLM):
        """
        self._track_token_usage_internal(usage_data)
        if response_model and function_calls:
            for call_data in function_calls.values():
                if call_data.get("name") == STRUCTURED_OUTPUT_TOOL_NAME:
                    structured_data = call_data.get("args", {})
                    return self._handle_structured_output_tool_call(
                        structured_data=structured_data,
                        response_model=response_model,
                        contents=contents,
                        from_task=from_task,
                        from_agent=from_agent,
                    )
        non_structured_output_calls = {
            idx: call_data
            for idx, call_data in function_calls.items()
            if call_data.get("name") != STRUCTURED_OUTPUT_TOOL_NAME
        }
        # If there are function calls but no available_functions,
        # return them for the executor to handle
-        if function_calls and not available_functions:
+        if non_structured_output_calls and not available_functions:
            formatted_function_calls = [
                {
                    "id": call_data["id"],
@@ -911,7 +1032,7 @@ class GeminiCompletion(BaseLLM):
                    },
                    "type": "function",
                }
-                for call_data in function_calls.values()
+                for call_data in non_structured_output_calls.values()
            ]
            self._emit_call_completed_event(
                response=formatted_function_calls,
@@ -922,9 +1043,9 @@ class GeminiCompletion(BaseLLM):
            )
            return formatted_function_calls
-        # Handle completed function calls
+        # Handle completed function calls (excluding structured_output)
-        if function_calls and available_functions:
+        if non_structured_output_calls and available_functions:
-            for call_data in function_calls.values():
+            for call_data in non_structured_output_calls.values():
                function_name = call_data["name"]
                function_args = call_data["args"]
@@ -948,6 +1069,9 @@ class GeminiCompletion(BaseLLM):
                if result is not None:
                    return result
        # When tools are present, structured output should come via the structured_output
        # pseudo-tool, not via direct text response. If we reach here with tools present,
        # the LLM chose to return plain text instead of calling structured_output.
        effective_response_model = None if self.tools else response_model
        return self._finalize_completion_response(