mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
Json Task Output Truncation with Escape Characters (#1009)
* Fixed special character issue when converting json to models. Added numerous tests to ensure thigns work properly. * Fix linting error and cleaned up tests * Fix customer_converter_cls test failure * Fixed tests. Thank you lorenze for pointing that out. added a few more to ensure converter creation works properly * Address lorenze feedback * Fix linting issues
This commit is contained in:
committed by
GitHub
parent
88bffaa0d0
commit
da7d8256eb
@@ -1,6 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
import uuid
|
||||
from concurrent.futures import Future
|
||||
@@ -8,7 +7,6 @@ from copy import copy
|
||||
from hashlib import md5
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from opentelemetry.trace import Span
|
||||
from pydantic import UUID4, BaseModel, Field, field_validator, model_validator
|
||||
from pydantic_core import PydanticCustomError
|
||||
@@ -17,10 +15,8 @@ from crewai.agents.agent_builder.base_agent import BaseAgent
|
||||
from crewai.tasks.output_format import OutputFormat
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.telemetry.telemetry import Telemetry
|
||||
from crewai.utilities.converter import Converter, ConverterError
|
||||
from crewai.utilities.converter import Converter, convert_to_model
|
||||
from crewai.utilities.i18n import I18N
|
||||
from crewai.utilities.printer import Printer
|
||||
from crewai.utilities.pydantic_schema_parser import PydanticSchemaParser
|
||||
|
||||
|
||||
class Task(BaseModel):
|
||||
@@ -254,9 +250,7 @@ class Task(BaseModel):
|
||||
content = (
|
||||
json_output
|
||||
if json_output
|
||||
else pydantic_output.model_dump_json()
|
||||
if pydantic_output
|
||||
else result
|
||||
else pydantic_output.model_dump_json() if pydantic_output else result
|
||||
)
|
||||
self._save_file(content)
|
||||
|
||||
@@ -326,18 +320,6 @@ class Task(BaseModel):
|
||||
|
||||
return copied_task
|
||||
|
||||
def _create_converter(self, *args, **kwargs) -> Converter:
|
||||
"""Create a converter instance."""
|
||||
if self.agent and not self.converter_cls:
|
||||
converter = self.agent.get_output_converter(*args, **kwargs)
|
||||
elif self.converter_cls:
|
||||
converter = self.converter_cls(*args, **kwargs)
|
||||
|
||||
if not converter:
|
||||
raise Exception("No output converter found or set.")
|
||||
|
||||
return converter
|
||||
|
||||
def _export_output(
|
||||
self, result: str
|
||||
) -> Tuple[Optional[BaseModel], Optional[Dict[str, Any]]]:
|
||||
@@ -345,75 +327,26 @@ class Task(BaseModel):
|
||||
json_output: Optional[Dict[str, Any]] = None
|
||||
|
||||
if self.output_pydantic or self.output_json:
|
||||
model_output = self._convert_to_model(result)
|
||||
pydantic_output = (
|
||||
model_output if isinstance(model_output, BaseModel) else None
|
||||
model_output = convert_to_model(
|
||||
result,
|
||||
self.output_pydantic,
|
||||
self.output_json,
|
||||
self.agent,
|
||||
self.converter_cls,
|
||||
)
|
||||
if isinstance(model_output, str):
|
||||
|
||||
if isinstance(model_output, BaseModel):
|
||||
pydantic_output = model_output
|
||||
elif isinstance(model_output, dict):
|
||||
json_output = model_output
|
||||
elif isinstance(model_output, str):
|
||||
try:
|
||||
json_output = json.loads(model_output)
|
||||
except json.JSONDecodeError:
|
||||
json_output = None
|
||||
else:
|
||||
json_output = model_output if isinstance(model_output, dict) else None
|
||||
|
||||
return pydantic_output, json_output
|
||||
|
||||
def _convert_to_model(self, result: str) -> Union[dict, BaseModel, str]:
|
||||
model = self.output_pydantic or self.output_json
|
||||
if model is None:
|
||||
return result
|
||||
|
||||
try:
|
||||
return self._validate_model(result, model)
|
||||
except Exception:
|
||||
return self._handle_partial_json(result, model)
|
||||
|
||||
def _validate_model(
|
||||
self, result: str, model: Type[BaseModel]
|
||||
) -> Union[dict, BaseModel]:
|
||||
exported_result = model.model_validate_json(result)
|
||||
if self.output_json:
|
||||
return exported_result.model_dump()
|
||||
return exported_result
|
||||
|
||||
def _handle_partial_json(
|
||||
self, result: str, model: Type[BaseModel]
|
||||
) -> Union[dict, BaseModel, str]:
|
||||
match = re.search(r"({.*})", result, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
exported_result = model.model_validate_json(match.group(0))
|
||||
if self.output_json:
|
||||
return exported_result.model_dump()
|
||||
return exported_result
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return self._convert_with_instructions(result, model)
|
||||
|
||||
def _convert_with_instructions(
|
||||
self, result: str, model: Type[BaseModel]
|
||||
) -> Union[dict, BaseModel, str]:
|
||||
llm = self.agent.function_calling_llm or self.agent.llm # type: ignore # Item "None" of "BaseAgent | None" has no attribute "function_calling_llm"
|
||||
instructions = self._get_conversion_instructions(model, llm)
|
||||
|
||||
converter = self._create_converter(
|
||||
llm=llm, text=result, model=model, instructions=instructions
|
||||
)
|
||||
exported_result = (
|
||||
converter.to_pydantic() if self.output_pydantic else converter.to_json()
|
||||
)
|
||||
|
||||
if isinstance(exported_result, ConverterError):
|
||||
Printer().print(
|
||||
content=f"{exported_result.message} Using raw output instead.",
|
||||
color="red",
|
||||
)
|
||||
return result
|
||||
|
||||
return exported_result
|
||||
|
||||
def _get_output_format(self) -> OutputFormat:
|
||||
if self.output_json:
|
||||
return OutputFormat.JSON
|
||||
@@ -421,26 +354,6 @@ class Task(BaseModel):
|
||||
return OutputFormat.PYDANTIC
|
||||
return OutputFormat.RAW
|
||||
|
||||
def _get_conversion_instructions(self, model: Type[BaseModel], llm: Any) -> str:
|
||||
instructions = "I'm gonna convert this raw text into valid JSON."
|
||||
if not self._is_gpt(llm):
|
||||
model_schema = PydanticSchemaParser(model=model).get_schema()
|
||||
instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
|
||||
return instructions
|
||||
|
||||
def _save_output(self, content: str) -> None:
|
||||
if not self.output_file:
|
||||
raise Exception("Output file path is not set.")
|
||||
|
||||
directory = os.path.dirname(self.output_file)
|
||||
if directory and not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
with open(self.output_file, "w", encoding="utf-8") as file:
|
||||
file.write(content)
|
||||
|
||||
def _is_gpt(self, llm) -> bool:
|
||||
return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None
|
||||
|
||||
def _save_file(self, result: Any) -> None:
|
||||
directory = os.path.dirname(self.output_file) # type: ignore # Value of type variable "AnyOrLiteralStr" of "dirname" cannot be "str | None"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user