chore: align json schemas with providers

2025-12-16 04:18:35 +00:00 · 2025-11-05 14:07:41 -05:00
parent 6111bb6c65
commit 4fd6863a02
3 changed files with 91 additions and 28 deletions
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -1,7 +1,7 @@
 import json
 import logging
 import os
-from typing import Any, cast
+from typing import TYPE_CHECKING, Any, cast

 from pydantic import BaseModel

@@ -15,6 +15,12 @@ from crewai.utilities.exceptions.context_window_exceeding_exception import (
 from crewai.utilities.types import LLMMessage


+if TYPE_CHECKING:
+    from google.genai.types import (  # type: ignore[import-untyped]
+        GenerateContentResponse,
+    )
+
+
 try:
    from google import genai  # type: ignore[import-untyped]
    from google.genai import types  # type: ignore[import-untyped]
@@ -295,7 +301,7 @@ class GeminiCompletion(BaseLLM):

        if response_model:
            config_params["response_mime_type"] = "application/json"
-            config_params["response_schema"] = response_model.model_json_schema()
+            config_params["response_json_schema"] = response_model.model_json_schema()

        # Handle tools for supported models
        if tools and self.supports_tools:
@@ -600,7 +606,8 @@ class GeminiCompletion(BaseLLM):
        # Default context window size for Gemini models
        return int(1048576 * CONTEXT_WINDOW_USAGE_RATIO)  # 1M tokens

-    def _extract_token_usage(self, response: dict[str, Any]) -> dict[str, Any]:
+    @staticmethod
+    def _extract_token_usage(response: GenerateContentResponse) -> dict[str, Any]:  # type: ignore[no-any-unimported]
        """Extract token usage from Gemini response."""
        if hasattr(response, "usage_metadata"):
            usage = response.usage_metadata
@@ -612,10 +619,10 @@ class GeminiCompletion(BaseLLM):
            }
        return {"total_tokens": 0}

+    @staticmethod
    def _convert_contents_to_dict(  # type: ignore[no-any-unimported]
-        self,
        contents: list[types.Content],
-    ) -> list[dict[str, str]]:
+    ) -> list[dict[str, str | None]]:
        """Convert contents to dict format."""
        return [
            {
--- a/lib/crewai/src/crewai/utilities/converter.py
+++ b/lib/crewai/src/crewai/utilities/converter.py
@@ -4,7 +4,7 @@ from collections.abc import Callable
 from copy import deepcopy
 import json
 import re
-from typing import TYPE_CHECKING, Any, Final, TypedDict
+from typing import TYPE_CHECKING, Any, Final, Literal, TypedDict

 from pydantic import BaseModel, ValidationError
 from typing_extensions import Unpack
@@ -621,7 +621,10 @@ def ensure_all_properties_required(schema: dict[str, Any]) -> dict[str, Any]:
    return schema


-def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:
+def generate_model_description(
+    model: type[BaseModel],
+    provider: Literal["openai", "gemini", "anthropic", "raw"] = "openai",
+) -> dict[str, Any]:
    """Generate JSON schema description of a Pydantic model.

    This function takes a Pydantic model class and returns its JSON schema,
@@ -630,9 +633,28 @@ def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:

    Args:
        model: A Pydantic model class.
+        provider: The LLM provider format to use. Options:
+            - "openai": OpenAI's wrapped format with name and strict fields (default)
+            - "gemini": Direct JSON schema for Gemini API
+            - "anthropic": Tool input_schema format for Claude API
+            - "raw": Plain JSON schema without any provider-specific wrapper

    Returns:
-        A JSON schema dictionary representation of the model.
+        A JSON schema dictionary representation of the model in the requested format.
+
+    Examples:
+        >>> class User(BaseModel):
+        ...     name: str
+        ...     age: int
+        >>> # OpenAI format (default)
+        >>> generate_model_description(User)
+        {'type': 'json_schema', 'json_schema': {'name': 'User', 'strict': True, 'schema': {...}}}
+        >>> # Gemini format
+        >>> generate_model_description(User, provider="gemini")
+        {'type': 'object', 'properties': {...}, 'required': [...]}
+        >>> # Anthropic format (for tool use)
+        >>> generate_model_description(User, provider="anthropic")
+        {'name': 'User', 'description': '...', 'input_schema': {'type': 'object', 'properties': {...}, 'required': [...]}}
    """

    json_schema = model.model_json_schema(ref_template="#/$defs/{model}")
@@ -652,6 +674,25 @@ def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:
    json_schema = convert_oneof_to_anyof(json_schema)
    json_schema = ensure_all_properties_required(json_schema)

+    if provider == "openai":
+        return {
+            "type": "json_schema",
+            "json_schema": {
+                "name": model.__name__,
+                "strict": True,
+                "schema": json_schema,
+            },
+        }
+    if provider == "gemini":
+        return json_schema
+    if provider == "anthropic":
+        return {
+            "name": model.__name__,
+            "description": model.__doc__ or f"Schema for {model.__name__}",
+            "input_schema": json_schema,
+        }
+    if provider == "raw":
+        return json_schema
    return {
        "type": "json_schema",
        "json_schema": {
--- a/lib/crewai/tests/utilities/evaluators/test_task_evaluator.py
+++ b/lib/crewai/tests/utilities/evaluators/test_task_evaluator.py
@@ -1,3 +1,4 @@
+import json
 from unittest import mock
 from unittest.mock import MagicMock, patch

@@ -44,26 +45,40 @@ def test_evaluate_training_data(converter_mock):
    )

    assert result == function_return_value
-    converter_mock.assert_has_calls(
-        [
-            mock.call(
-                llm=original_agent.llm,
-                text="Assess the quality of the training data based on the llm output, human feedback , and llm "
-                "output improved result.\n\nIteration: data1\nInitial Output:\nInitial output 1\n\nHuman Feedback:\nHuman feedback "
-                "1\n\nImproved Output:\nImproved output 1\n\n------------------------------------------------\n\nIteration: data2\nInitial Output:\nInitial output 2\n\nHuman "
-                "Feedback:\nHuman feedback 2\n\nImproved Output:\nImproved output 2\n\n------------------------------------------------\n\nPlease provide:\n- Provide "
-                "a list of clear, actionable instructions derived from the Human Feedbacks to enhance the Agent's "
-                "performance. Analyze the differences between Initial Outputs and Improved Outputs to generate specific "
-                "action items for future tasks. Ensure all key and specificpoints from the human feedback are "
-                "incorporated into these instructions.\n- A score from 0 to 10 evaluating on completion, quality, and "
-                "overall performance from the improved output to the initial output based on the human feedback\n",
-                model=TrainingTaskEvaluation,
-                instructions="I'm gonna convert this raw text into valid JSON.\n\nThe json should have the "
-                "following structure, with the following keys:\n{\n    suggestions: List[str],\n    quality: float,\n    final_summary: str\n}",
-            ),
-            mock.call().to_pydantic(),
-        ]
-    )
+
+    # Verify converter was called once
+    assert converter_mock.call_count == 1
+
+    # Get the actual call arguments
+    call_args = converter_mock.call_args
+    assert call_args[1]["llm"] == original_agent.llm
+    assert call_args[1]["model"] == TrainingTaskEvaluation
+
+    # Verify text contains expected training data
+    text = call_args[1]["text"]
+    assert "Iteration: data1" in text
+    assert "Initial output 1" in text
+    assert "Human feedback 1" in text
+    assert "Improved output 1" in text
+    assert "Iteration: data2" in text
+    assert "Initial output 2" in text
+
+    # Verify instructions contain the OpenAPI schema format
+    instructions = call_args[1]["instructions"]
+    assert "I'm gonna convert this raw text into valid JSON" in instructions
+    assert "Ensure your final answer strictly adheres to the following OpenAPI schema" in instructions
+
+    # Parse and validate the schema structure in instructions
+    # The schema should be embedded in the instructions as JSON
+    assert '"type": "json_schema"' in instructions
+    assert '"name": "TrainingTaskEvaluation"' in instructions
+    assert '"strict": true' in instructions
+    assert '"suggestions"' in instructions
+    assert '"quality"' in instructions
+    assert '"final_summary"' in instructions
+
+    # Verify to_pydantic was called
+    converter_mock.return_value.to_pydantic.assert_called_once()


@patch("crewai.utilities.converter.Converter.to_pydantic")