Fix nested pydantic model issue (#1905)

* Fix nested pydantic model issue * fix failing tests * add in vcr * cleanup * drop prints * Fix vcr issues * added new recordings * trying to fix vcr * add in fix from lorenze.
2026-01-11 00:58:30 +00:00 · 2025-01-16 11:28:58 -05:00
parent 42311d9c7a
commit b5779dca12
10 changed files with 3624 additions and 61 deletions
--- a/src/crewai/agents/agent_builder/utilities/base_output_converter.py
+++ b/src/crewai/agents/agent_builder/utilities/base_output_converter.py
@@ -25,7 +25,7 @@ class OutputConverter(BaseModel, ABC):
    llm: Any = Field(description="The language model to be used to convert the text.")
    model: Any = Field(description="The model to be used to convert the text.")
    instructions: str = Field(description="Conversion instructions to the LLM.")
-    max_attempts: Optional[int] = Field(
+    max_attempts: int = Field(
        description="Max number of attempts to try to get the output formatted.",
        default=3,
    )
--- a/src/crewai/utilities/converter.py
+++ b/src/crewai/utilities/converter.py
@@ -26,17 +26,24 @@ class Converter(OutputConverter):
            if self.llm.supports_function_calling():
                return self._create_instructor().to_pydantic()
            else:
-                return self.llm.call(
+                response = self.llm.call(
                    [
                        {"role": "system", "content": self.instructions},
                        {"role": "user", "content": self.text},
                    ]
                )
+                return self.model.model_validate_json(response)
+        except ValidationError as e:
+            if current_attempt < self.max_attempts:
+                return self.to_pydantic(current_attempt + 1)
+            raise ConverterError(
+                f"Failed to convert text into a Pydantic model due to the following validation error: {e}"
+            )
        except Exception as e:
            if current_attempt < self.max_attempts:
                return self.to_pydantic(current_attempt + 1)
-            return ConverterError(
-                f"Failed to convert text into a pydantic model due to the following error: {e}"
+            raise ConverterError(
+                f"Failed to convert text into a Pydantic model due to the following error: {e}"
            )

    def to_json(self, current_attempt=1):
@@ -66,7 +73,6 @@ class Converter(OutputConverter):
            llm=self.llm,
            model=self.model,
            content=self.text,
-            instructions=self.instructions,
        )
        return inst

@@ -187,10 +193,15 @@ def convert_with_instructions(


 def get_conversion_instructions(model: Type[BaseModel], llm: Any) -> str:
-    instructions = "I'm gonna convert this raw text into valid JSON."
+    instructions = "Please convert the following text into valid JSON."
    if llm.supports_function_calling():
        model_schema = PydanticSchemaParser(model=model).get_schema()
-        instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
+        instructions += (
+            f"\n\nThe JSON should follow this schema:\n```json\n{model_schema}\n```"
+        )
+    else:
+        model_description = generate_model_description(model)
+        instructions += f"\n\nThe JSON should follow this format:\n{model_description}"
    return instructions


--- a/src/crewai/utilities/internal_instructor.py
+++ b/src/crewai/utilities/internal_instructor.py
@@ -11,12 +11,10 @@ class InternalInstructor:
        model: Type,
        agent: Optional[Any] = None,
        llm: Optional[str] = None,
-        instructions: Optional[str] = None,
    ):
        self.content = content
        self.agent = agent
        self.llm = llm
-        self.instructions = instructions
        self.model = model
        self._client = None
        self.set_instructor()
@@ -31,10 +29,7 @@ class InternalInstructor:
            import instructor
            from litellm import completion

-            self._client = instructor.from_litellm(
-                completion,
-                mode=instructor.Mode.TOOLS,
-            )
+            self._client = instructor.from_litellm(completion)

    def to_json(self):
        model = self.to_pydantic()
@@ -42,8 +37,6 @@ class InternalInstructor:

    def to_pydantic(self):
        messages = [{"role": "user", "content": self.content}]
-        if self.instructions:
-            messages.append({"role": "system", "content": self.instructions})
        model = self._client.chat.completions.create(
            model=self.llm.model, response_model=self.model, messages=messages
        )
--- a/src/crewai/utilities/pydantic_schema_parser.py
+++ b/src/crewai/utilities/pydantic_schema_parser.py
@@ -1,4 +1,4 @@
-from typing import Type, Union, get_args, get_origin
+from typing import Dict, List, Type, Union, get_args, get_origin

 from pydantic import BaseModel

@@ -10,40 +10,83 @@ class PydanticSchemaParser(BaseModel):
        """
        Public method to get the schema of a Pydantic model.

-        :param model: The Pydantic model class to generate schema for.
        :return: String representation of the model schema.
        """
-        return self._get_model_schema(self.model)
+        return "{\n" + self._get_model_schema(self.model) + "\n}"

-    def _get_model_schema(self, model, depth=0) -> str:
-        indent = "    " * depth
-        lines = [f"{indent}{{"]
-        for field_name, field in model.model_fields.items():
-            field_type_str = self._get_field_type(field, depth + 1)
-            lines.append(f"{indent}    {field_name}: {field_type_str},")
-        lines[-1] = lines[-1].rstrip(",")  # Remove trailing comma from last item
-        lines.append(f"{indent}}}")
-        return "\n".join(lines)
+    def _get_model_schema(self, model: Type[BaseModel], depth: int = 0) -> str:
+        indent = " " * 4 * depth
+        lines = [
+            f"{indent}    {field_name}: {self._get_field_type(field, depth + 1)}"
+            for field_name, field in model.model_fields.items()
+        ]
+        return ",\n".join(lines)

-    def _get_field_type(self, field, depth) -> str:
+    def _get_field_type(self, field, depth: int) -> str:
        field_type = field.annotation
-        if get_origin(field_type) is list:
+        origin = get_origin(field_type)
+
+        if origin in {list, List}:
            list_item_type = get_args(field_type)[0]
-            if isinstance(list_item_type, type) and issubclass(
-                list_item_type, BaseModel
-            ):
-                nested_schema = self._get_model_schema(list_item_type, depth + 1)
-                return f"List[\n{nested_schema}\n{' ' * 4 * depth}]"
+            return self._format_list_type(list_item_type, depth)
+
+        if origin in {dict, Dict}:
+            key_type, value_type = get_args(field_type)
+            return f"Dict[{key_type.__name__}, {value_type.__name__}]"
+
+        if origin is Union:
+            return self._format_union_type(field_type, depth)
+
+        if isinstance(field_type, type) and issubclass(field_type, BaseModel):
+            nested_schema = self._get_model_schema(field_type, depth)
+            nested_indent = " " * 4 * depth
+            return f"{field_type.__name__}\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}"
+
+        return field_type.__name__
+
+    def _format_list_type(self, list_item_type, depth: int) -> str:
+        if isinstance(list_item_type, type) and issubclass(list_item_type, BaseModel):
+            nested_schema = self._get_model_schema(list_item_type, depth + 1)
+            nested_indent = " " * 4 * (depth)
+            return f"List[\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}\n{nested_indent}]"
+        return f"List[{list_item_type.__name__}]"
+
+    def _format_union_type(self, field_type, depth: int) -> str:
+        args = get_args(field_type)
+        if type(None) in args:
+            # It's an Optional type
+            non_none_args = [arg for arg in args if arg is not type(None)]
+            if len(non_none_args) == 1:
+                inner_type = self._get_field_type_for_annotation(
+                    non_none_args[0], depth
+                )
+                return f"Optional[{inner_type}]"
            else:
-                return f"List[{list_item_type.__name__}]"
-        elif get_origin(field_type) is Union:
-            union_args = get_args(field_type)
-            if type(None) in union_args:
-                non_none_type = next(arg for arg in union_args if arg is not type(None))
-                return f"Optional[{self._get_field_type(field.__class__(annotation=non_none_type), depth)}]"
-            else:
-                return f"Union[{', '.join(arg.__name__ for arg in union_args)}]"
-        elif isinstance(field_type, type) and issubclass(field_type, BaseModel):
-            return self._get_model_schema(field_type, depth)
+                # Union with None and multiple other types
+                inner_types = ", ".join(
+                    self._get_field_type_for_annotation(arg, depth)
+                    for arg in non_none_args
+                )
+                return f"Optional[Union[{inner_types}]]"
        else:
-            return getattr(field_type, "__name__", str(field_type))
+            # General Union type
+            inner_types = ", ".join(
+                self._get_field_type_for_annotation(arg, depth) for arg in args
+            )
+            return f"Union[{inner_types}]"
+
+    def _get_field_type_for_annotation(self, annotation, depth: int) -> str:
+        origin = get_origin(annotation)
+        if origin in {list, List}:
+            list_item_type = get_args(annotation)[0]
+            return self._format_list_type(list_item_type, depth)
+        if origin in {dict, Dict}:
+            key_type, value_type = get_args(annotation)
+            return f"Dict[{key_type.__name__}, {value_type.__name__}]"
+        if origin is Union:
+            return self._format_union_type(annotation, depth)
+        if isinstance(annotation, type) and issubclass(annotation, BaseModel):
+            nested_schema = self._get_model_schema(annotation, depth)
+            nested_indent = " " * 4 * depth
+            return f"{annotation.__name__}\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}"
+        return annotation.__name__