fix: improve output handling and response model integration in agents

- Refactored output handling in the Agent class to ensure proper conversion and formatting of outputs, including support for BaseModel instances.
- Enhanced the AgentExecutor class to correctly utilize response models during execution, improving the handling of structured outputs.
- Updated the Gemini and Anthropic completion providers to ensure compatibility with new response model handling, including the addition of strict mode for function definitions.
- Improved the OpenAI completion provider to enforce strict adherence to function schemas.
- Adjusted translations to clarify instructions regarding output formatting and schema adherence.
This commit is contained in:
lorenzejay
2026-01-29 11:40:09 -08:00
parent e291a97bdd
commit 55448eb6ef
7 changed files with 64 additions and 16 deletions

View File

@@ -1858,11 +1858,18 @@ class Agent(BaseAgent):
# Execute the agent (this is called from sync path, so invoke returns dict)
result = cast(dict[str, Any], executor.invoke(inputs))
raw_output = result.get("output", "")
output = result.get("output", "")
(f"output: {output}")
# Handle response format conversion
formatted_result: BaseModel | None = None
if response_format:
raw_output: str
if isinstance(output, BaseModel):
formatted_result = output
raw_output = output.model_dump_json()
elif response_format:
raw_output = str(output) if not isinstance(output, str) else output
try:
model_schema = generate_model_description(response_format)
schema = json.dumps(model_schema, indent=2)
@@ -1882,6 +1889,8 @@ class Agent(BaseAgent):
formatted_result = conversion_result
except ConverterError:
pass # Keep raw output if conversion fails
else:
raw_output = str(output) if not isinstance(output, str) else output
# Get token usage metrics
if isinstance(self.llm, BaseLLM):
@@ -1920,11 +1929,17 @@ class Agent(BaseAgent):
# Execute the agent asynchronously
result = await executor.invoke_async(inputs)
raw_output = result.get("output", "")
output = result.get("output", "")
# Handle response format conversion
formatted_result: BaseModel | None = None
if response_format:
raw_output: str
if isinstance(output, BaseModel):
formatted_result = output
raw_output = output.model_dump_json()
elif response_format:
raw_output = str(output) if not isinstance(output, str) else output
try:
model_schema = generate_model_description(response_format)
schema = json.dumps(model_schema, indent=2)
@@ -1944,6 +1959,8 @@ class Agent(BaseAgent):
formatted_result = conversion_result
except ConverterError:
pass # Keep raw output if conversion fails
else:
raw_output = str(output) if not isinstance(output, str) else output
# Get token usage metrics
if isinstance(self.llm, BaseLLM):

View File

@@ -365,7 +365,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
printer=self._printer,
from_task=self.task,
from_agent=self.agent,
response_model=None,
response_model=self.response_model,
executor_context=self,
verbose=self.agent.verbose,
)
@@ -436,7 +436,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
available_functions=None,
from_task=self.task,
from_agent=self.agent,
response_model=None,
response_model=self.response_model,
executor_context=self,
verbose=self.agent.verbose,
)
@@ -448,6 +448,16 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
return "native_tool_calls"
if isinstance(answer, BaseModel):
self.state.current_answer = AgentFinish(
thought="",
output=answer,
text=answer.model_dump_json(),
)
self._invoke_step_callback(self.state.current_answer)
self._append_message_to_state(answer.model_dump_json())
return "native_finished"
# Text response - this is the final answer
if isinstance(answer, str):
self.state.current_answer = AgentFinish(

View File

@@ -23,7 +23,7 @@ if TYPE_CHECKING:
try:
from anthropic import Anthropic, AsyncAnthropic, transform_schema
from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
from anthropic.types.beta import BetaMessage, BetaTextBlock
from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
import httpx
except ImportError:
raise ImportError(
@@ -675,6 +675,23 @@ class AnthropicCompletion(BaseLLM):
usage = self._extract_anthropic_token_usage(response)
self._track_token_usage_internal(usage)
if "tools" in params and response.content:
tool_uses = [
block
for block in response.content
if isinstance(block, (ToolUseBlock, BetaToolUseBlock))
]
if tool_uses:
if not available_functions:
self._emit_call_completed_event(
response=list(tool_uses),
call_type=LLMCallType.TOOL_CALL,
from_task=from_task,
from_agent=from_agent,
messages=params["messages"],
)
return list(tool_uses)
if _is_pydantic_model_class(response_model) and response.content:
if use_native_structured_output:
for block in response.content:

View File

@@ -471,7 +471,9 @@ class GeminiCompletion(BaseLLM):
if self.stop_sequences:
config_params["stop_sequences"] = self.stop_sequences
if response_model:
if tools and self.supports_tools:
config_params["tools"] = self._convert_tools_for_interference(tools)
elif response_model:
config_params["response_mime_type"] = "application/json"
schema_output = generate_model_description(response_model)
schema = schema_output.get("json_schema", {}).get("schema", {})
@@ -482,10 +484,6 @@ class GeminiCompletion(BaseLLM):
else:
config_params["response_schema"] = response_model
# Handle tools for supported models
if tools and self.supports_tools:
config_params["tools"] = self._convert_tools_for_interference(tools)
if self.safety_settings:
config_params["safety_settings"] = self.safety_settings
@@ -789,10 +787,12 @@ class GeminiCompletion(BaseLLM):
content = self._extract_text_from_response(response)
content = self._apply_stop_words(content)
effective_response_model = None if self.tools else response_model
return self._finalize_completion_response(
content=content,
contents=contents,
response_model=response_model,
response_model=effective_response_model,
from_task=from_task,
from_agent=from_agent,
)
@@ -948,10 +948,12 @@ class GeminiCompletion(BaseLLM):
if result is not None:
return result
effective_response_model = None if self.tools else response_model
return self._finalize_completion_response(
content=full_response,
contents=contents,
response_model=response_model,
response_model=effective_response_model,
from_task=from_task,
from_agent=from_agent,
)

View File

@@ -1530,6 +1530,7 @@ class OpenAICompletion(BaseLLM):
"function": {
"name": name,
"description": description,
"strict": True,
},
}

View File

@@ -26,12 +26,12 @@
"summarize_instruction": "Summarize the following text, make sure to include all the important information: {group}",
"summary": "This is a summary of our conversation so far:\n{merged_summary}",
"manager_request": "Your best answer to your coworker asking you this, accounting for the context shared.",
"formatted_task_instructions": "Ensure your final answer strictly adheres to the following OpenAPI schema: {output_format}\n\nDo not include the OpenAPI schema in the final output. Ensure the final output does not include any code block markers like ```json or ```python.",
"formatted_task_instructions": "Format your final answer according to the following OpenAPI schema: {output_format}\n\nIMPORTANT: Preserve the original content exactly as-is. Do NOT rewrite, paraphrase, or modify the meaning of the content. Only structure it to match the schema format.\n\nDo not include the OpenAPI schema in the final output. Ensure the final output does not include any code block markers like ```json or ```python.",
"conversation_history_instruction": "You are a member of a crew collaborating to achieve a common goal. Your task is a specific action that contributes to this larger objective. For additional context, please review the conversation history between you and the user that led to the initiation of this crew. Use any relevant information or feedback from the conversation to inform your task execution and ensure your response aligns with both the immediate task and the crew's overall goals.",
"feedback_instructions": "User feedback: {feedback}\nInstructions: Use this feedback to enhance the next output iteration.\nNote: Do not respond or add commentary.",
"lite_agent_system_prompt_with_tools": "You are {role}. {backstory}\nYour personal goal is: {goal}\n\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple JSON object, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce all necessary information is gathered, return the following format:\n\n```\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n```",
"lite_agent_system_prompt_without_tools": "You are {role}. {backstory}\nYour personal goal is: {goal}\n\nTo give my best complete final answer to the task respond using the exact following format:\n\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described.\n\nI MUST use these formats, my job depends on it!",
"lite_agent_response_format": "Ensure your final answer strictly adheres to the following OpenAPI schema: {response_format}\n\nDo not include the OpenAPI schema in the final output. Ensure the final output does not include any code block markers like ```json or ```python.",
"lite_agent_response_format": "Format your final answer according to the following OpenAPI schema: {response_format}\n\nIMPORTANT: Preserve the original content exactly as-is. Do NOT rewrite, paraphrase, or modify the meaning of the content. Only structure it to match the schema format.\n\nDo not include the OpenAPI schema in the final output. Ensure the final output does not include any code block markers like ```json or ```python.",
"knowledge_search_query": "The original query is: {task_prompt}.",
"knowledge_search_query_system_prompt": "Your goal is to rewrite the user query so that it is optimized for retrieval from a vector database. Consider how the query will be used to find relevant documents, and aim to make it more specific and context-aware. \n\n Do not include any other text than the rewritten query, especially any preamble or postamble and only add expected output format if its relevant to the rewritten query. \n\n Focus on the key words of the intended task and to retrieve the most relevant information. \n\n There will be some extra context provided that might need to be removed such as expected_output formats structured_outputs and other instructions.",
"human_feedback_collapse": "Based on the following human feedback, determine which outcome best matches their intent.\n\nFeedback: {feedback}\n\nPossible outcomes: {outcomes}\n\nRespond with ONLY one of the exact outcome values listed above, nothing else."

View File

@@ -182,6 +182,7 @@ def convert_tools_to_openai_schema(
"name": sanitized_name,
"description": description,
"parameters": parameters,
"strict": True,
},
}
openai_tools.append(schema)