mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
Fix nested pydantic model issue (#1905)
* Fix nested pydantic model issue * fix failing tests * add in vcr * cleanup * drop prints * Fix vcr issues * added new recordings * trying to fix vcr * add in fix from lorenze.
This commit is contained in:
committed by
GitHub
parent
42311d9c7a
commit
b5779dca12
@@ -25,7 +25,7 @@ class OutputConverter(BaseModel, ABC):
|
||||
llm: Any = Field(description="The language model to be used to convert the text.")
|
||||
model: Any = Field(description="The model to be used to convert the text.")
|
||||
instructions: str = Field(description="Conversion instructions to the LLM.")
|
||||
max_attempts: Optional[int] = Field(
|
||||
max_attempts: int = Field(
|
||||
description="Max number of attempts to try to get the output formatted.",
|
||||
default=3,
|
||||
)
|
||||
|
||||
@@ -26,17 +26,24 @@ class Converter(OutputConverter):
|
||||
if self.llm.supports_function_calling():
|
||||
return self._create_instructor().to_pydantic()
|
||||
else:
|
||||
return self.llm.call(
|
||||
response = self.llm.call(
|
||||
[
|
||||
{"role": "system", "content": self.instructions},
|
||||
{"role": "user", "content": self.text},
|
||||
]
|
||||
)
|
||||
return self.model.model_validate_json(response)
|
||||
except ValidationError as e:
|
||||
if current_attempt < self.max_attempts:
|
||||
return self.to_pydantic(current_attempt + 1)
|
||||
raise ConverterError(
|
||||
f"Failed to convert text into a Pydantic model due to the following validation error: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
if current_attempt < self.max_attempts:
|
||||
return self.to_pydantic(current_attempt + 1)
|
||||
return ConverterError(
|
||||
f"Failed to convert text into a pydantic model due to the following error: {e}"
|
||||
raise ConverterError(
|
||||
f"Failed to convert text into a Pydantic model due to the following error: {e}"
|
||||
)
|
||||
|
||||
def to_json(self, current_attempt=1):
|
||||
@@ -66,7 +73,6 @@ class Converter(OutputConverter):
|
||||
llm=self.llm,
|
||||
model=self.model,
|
||||
content=self.text,
|
||||
instructions=self.instructions,
|
||||
)
|
||||
return inst
|
||||
|
||||
@@ -187,10 +193,15 @@ def convert_with_instructions(
|
||||
|
||||
|
||||
def get_conversion_instructions(model: Type[BaseModel], llm: Any) -> str:
|
||||
instructions = "I'm gonna convert this raw text into valid JSON."
|
||||
instructions = "Please convert the following text into valid JSON."
|
||||
if llm.supports_function_calling():
|
||||
model_schema = PydanticSchemaParser(model=model).get_schema()
|
||||
instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
|
||||
instructions += (
|
||||
f"\n\nThe JSON should follow this schema:\n```json\n{model_schema}\n```"
|
||||
)
|
||||
else:
|
||||
model_description = generate_model_description(model)
|
||||
instructions += f"\n\nThe JSON should follow this format:\n{model_description}"
|
||||
return instructions
|
||||
|
||||
|
||||
|
||||
@@ -11,12 +11,10 @@ class InternalInstructor:
|
||||
model: Type,
|
||||
agent: Optional[Any] = None,
|
||||
llm: Optional[str] = None,
|
||||
instructions: Optional[str] = None,
|
||||
):
|
||||
self.content = content
|
||||
self.agent = agent
|
||||
self.llm = llm
|
||||
self.instructions = instructions
|
||||
self.model = model
|
||||
self._client = None
|
||||
self.set_instructor()
|
||||
@@ -31,10 +29,7 @@ class InternalInstructor:
|
||||
import instructor
|
||||
from litellm import completion
|
||||
|
||||
self._client = instructor.from_litellm(
|
||||
completion,
|
||||
mode=instructor.Mode.TOOLS,
|
||||
)
|
||||
self._client = instructor.from_litellm(completion)
|
||||
|
||||
def to_json(self):
|
||||
model = self.to_pydantic()
|
||||
@@ -42,8 +37,6 @@ class InternalInstructor:
|
||||
|
||||
def to_pydantic(self):
|
||||
messages = [{"role": "user", "content": self.content}]
|
||||
if self.instructions:
|
||||
messages.append({"role": "system", "content": self.instructions})
|
||||
model = self._client.chat.completions.create(
|
||||
model=self.llm.model, response_model=self.model, messages=messages
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Type, Union, get_args, get_origin
|
||||
from typing import Dict, List, Type, Union, get_args, get_origin
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -10,40 +10,83 @@ class PydanticSchemaParser(BaseModel):
|
||||
"""
|
||||
Public method to get the schema of a Pydantic model.
|
||||
|
||||
:param model: The Pydantic model class to generate schema for.
|
||||
:return: String representation of the model schema.
|
||||
"""
|
||||
return self._get_model_schema(self.model)
|
||||
return "{\n" + self._get_model_schema(self.model) + "\n}"
|
||||
|
||||
def _get_model_schema(self, model, depth=0) -> str:
|
||||
indent = " " * depth
|
||||
lines = [f"{indent}{{"]
|
||||
for field_name, field in model.model_fields.items():
|
||||
field_type_str = self._get_field_type(field, depth + 1)
|
||||
lines.append(f"{indent} {field_name}: {field_type_str},")
|
||||
lines[-1] = lines[-1].rstrip(",") # Remove trailing comma from last item
|
||||
lines.append(f"{indent}}}")
|
||||
return "\n".join(lines)
|
||||
def _get_model_schema(self, model: Type[BaseModel], depth: int = 0) -> str:
|
||||
indent = " " * 4 * depth
|
||||
lines = [
|
||||
f"{indent} {field_name}: {self._get_field_type(field, depth + 1)}"
|
||||
for field_name, field in model.model_fields.items()
|
||||
]
|
||||
return ",\n".join(lines)
|
||||
|
||||
def _get_field_type(self, field, depth) -> str:
|
||||
def _get_field_type(self, field, depth: int) -> str:
|
||||
field_type = field.annotation
|
||||
if get_origin(field_type) is list:
|
||||
origin = get_origin(field_type)
|
||||
|
||||
if origin in {list, List}:
|
||||
list_item_type = get_args(field_type)[0]
|
||||
if isinstance(list_item_type, type) and issubclass(
|
||||
list_item_type, BaseModel
|
||||
):
|
||||
nested_schema = self._get_model_schema(list_item_type, depth + 1)
|
||||
return f"List[\n{nested_schema}\n{' ' * 4 * depth}]"
|
||||
return self._format_list_type(list_item_type, depth)
|
||||
|
||||
if origin in {dict, Dict}:
|
||||
key_type, value_type = get_args(field_type)
|
||||
return f"Dict[{key_type.__name__}, {value_type.__name__}]"
|
||||
|
||||
if origin is Union:
|
||||
return self._format_union_type(field_type, depth)
|
||||
|
||||
if isinstance(field_type, type) and issubclass(field_type, BaseModel):
|
||||
nested_schema = self._get_model_schema(field_type, depth)
|
||||
nested_indent = " " * 4 * depth
|
||||
return f"{field_type.__name__}\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}"
|
||||
|
||||
return field_type.__name__
|
||||
|
||||
def _format_list_type(self, list_item_type, depth: int) -> str:
|
||||
if isinstance(list_item_type, type) and issubclass(list_item_type, BaseModel):
|
||||
nested_schema = self._get_model_schema(list_item_type, depth + 1)
|
||||
nested_indent = " " * 4 * (depth)
|
||||
return f"List[\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}\n{nested_indent}]"
|
||||
return f"List[{list_item_type.__name__}]"
|
||||
|
||||
def _format_union_type(self, field_type, depth: int) -> str:
|
||||
args = get_args(field_type)
|
||||
if type(None) in args:
|
||||
# It's an Optional type
|
||||
non_none_args = [arg for arg in args if arg is not type(None)]
|
||||
if len(non_none_args) == 1:
|
||||
inner_type = self._get_field_type_for_annotation(
|
||||
non_none_args[0], depth
|
||||
)
|
||||
return f"Optional[{inner_type}]"
|
||||
else:
|
||||
return f"List[{list_item_type.__name__}]"
|
||||
elif get_origin(field_type) is Union:
|
||||
union_args = get_args(field_type)
|
||||
if type(None) in union_args:
|
||||
non_none_type = next(arg for arg in union_args if arg is not type(None))
|
||||
return f"Optional[{self._get_field_type(field.__class__(annotation=non_none_type), depth)}]"
|
||||
else:
|
||||
return f"Union[{', '.join(arg.__name__ for arg in union_args)}]"
|
||||
elif isinstance(field_type, type) and issubclass(field_type, BaseModel):
|
||||
return self._get_model_schema(field_type, depth)
|
||||
# Union with None and multiple other types
|
||||
inner_types = ", ".join(
|
||||
self._get_field_type_for_annotation(arg, depth)
|
||||
for arg in non_none_args
|
||||
)
|
||||
return f"Optional[Union[{inner_types}]]"
|
||||
else:
|
||||
return getattr(field_type, "__name__", str(field_type))
|
||||
# General Union type
|
||||
inner_types = ", ".join(
|
||||
self._get_field_type_for_annotation(arg, depth) for arg in args
|
||||
)
|
||||
return f"Union[{inner_types}]"
|
||||
|
||||
def _get_field_type_for_annotation(self, annotation, depth: int) -> str:
|
||||
origin = get_origin(annotation)
|
||||
if origin in {list, List}:
|
||||
list_item_type = get_args(annotation)[0]
|
||||
return self._format_list_type(list_item_type, depth)
|
||||
if origin in {dict, Dict}:
|
||||
key_type, value_type = get_args(annotation)
|
||||
return f"Dict[{key_type.__name__}, {value_type.__name__}]"
|
||||
if origin is Union:
|
||||
return self._format_union_type(annotation, depth)
|
||||
if isinstance(annotation, type) and issubclass(annotation, BaseModel):
|
||||
nested_schema = self._get_model_schema(annotation, depth)
|
||||
nested_indent = " " * 4 * depth
|
||||
return f"{annotation.__name__}\n{nested_indent}{{\n{nested_schema}\n{nested_indent}}}"
|
||||
return annotation.__name__
|
||||
|
||||
Reference in New Issue
Block a user