Compare commits

..

23 Commits

Author SHA1 Message Date
Devin AI
652f3f5b8e refactor: incorporate improvements from PR #2178 into guardrail validation
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-21 03:24:21 +00:00
Devin AI
f3f094faad fix: sort imports in task.py
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:41:42 +00:00
Devin AI
ad0db27040 fix: use Any type for TaskOutput.raw to support both string and dict
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:38:32 +00:00
Devin AI
6cc37a38a6 fix: add missing TaskOutput import
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:37:13 +00:00
Devin AI
4bafdacd88 fix: allow both string and dict types for TaskOutput.raw
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:35:47 +00:00
Devin AI
2793a8ee87 fix: preserve dictionary type in guardrail results
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:32:36 +00:00
Devin AI
a9b0702cbe fix: type error in dictionary result handling
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:28:36 +00:00
Devin AI
54bdc8b52c fix: properly handle dictionary results in guardrails
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:26:54 +00:00
Devin AI
578164cf05 fix: preserve dictionary type in guardrail results
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:25:44 +00:00
Devin AI
fe78553c9c fix: ensure dict results are properly serialized to string
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:13:27 +00:00
Devin AI
0cb47a8d0a fix: improve Task.copy() method to handle both legacy and new style copy
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:12:02 +00:00
Devin AI
a87a7d2833 fix: restore backward compatibility for Task.copy() and fix dict handling
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:09:32 +00:00
Devin AI
5b833d932e fix: correct type error in error message formatting
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:04:35 +00:00
Devin AI
6a59194e6f fix: improve type validation logic to fix type checker error
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:03:22 +00:00
Devin AI
5f3eb3605a fix: remove duplicate return statement in copy method
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:02:06 +00:00
Devin AI
77990b6293 fix: resolve type checking errors and improve copy method
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:01:10 +00:00
Devin AI
050ead62a7 fix: resolve remaining type checking errors
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 17:00:35 +00:00
Devin AI
8f3936eb09 fix: resolve type checking errors
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 16:58:01 +00:00
Devin AI
9c1f24ee26 fix: improve type validation logic
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 16:57:10 +00:00
Devin AI
24da7be540 fix: sort imports in test file
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 16:57:07 +00:00
Devin AI
31e8b9d7f2 refactor: implement code review suggestions
- Use typing.get_type_hints for better type checking
- Add proper handling of dict return types
- Improve parameter validation for keyword-only params
- Add comprehensive test coverage

Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 16:54:27 +00:00
Devin AI
0e086d348a refactor: implement code review suggestions
- Add GuardrailValidationError exception
- Use typing.get_type_hints for better type checking
- Add descriptive error messages with context
- Support dict return type

Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 16:50:33 +00:00
Devin AI
3c5672f4ec feat: improve guardrail validation support
- Add support for new style tuple annotations
- Allow specific return types like tuple[bool, str]
- Support optional parameters in guardrail functions

Fixes #2177

Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-20 16:43:02 +00:00
19 changed files with 2222 additions and 808 deletions

3
.gitignore vendored
View File

@@ -21,5 +21,4 @@ crew_tasks_output.json
.mypy_cache
.ruff_cache
.venv
agentops.log
test_flow.html
agentops.log

View File

@@ -10,8 +10,6 @@ This notebook demonstrates how to integrate **Langfuse** with **CrewAI** using O
> **What is Langfuse?** [Langfuse](https://langfuse.com) is an open-source LLM engineering platform. It provides tracing and monitoring capabilities for LLM applications, helping developers debug, analyze, and optimize their AI systems. Langfuse integrates with various tools and frameworks via native integrations, OpenTelemetry, and APIs/SDKs.
[![Langfuse Overview Video](https://github.com/user-attachments/assets/3926b288-ff61-4b95-8aa1-45d041c70866)](https://langfuse.com/watch-demo)
## Get Started
We'll walk through a simple example of using CrewAI and integrating it with Langfuse via OpenTelemetry using OpenLit.

View File

@@ -114,15 +114,10 @@ class CrewAgentExecutorMixin:
prompt = (
"\n\n=====\n"
"## HUMAN FEEDBACK: Provide feedback on the Final Result and Agent's actions.\n"
"Please follow these guidelines:\n"
" - If you are happy with the result, simply hit Enter without typing anything.\n"
" - Otherwise, provide specific improvement requests.\n"
" - You can provide multiple rounds of feedback until satisfied.\n"
"Respond with 'looks good' to accept or provide specific improvement requests.\n"
"You can provide multiple rounds of feedback until satisfied.\n"
"=====\n"
)
self._printer.print(content=prompt, color="bold_yellow")
response = input()
if response.strip() != "":
self._printer.print(content="\nProcessing your feedback...", color="cyan")
return response
return input()

View File

@@ -31,11 +31,11 @@ class OutputConverter(BaseModel, ABC):
)
@abstractmethod
def to_pydantic(self, current_attempt=1) -> BaseModel:
def to_pydantic(self, current_attempt=1):
"""Convert text to pydantic."""
pass
@abstractmethod
def to_json(self, current_attempt=1) -> dict:
def to_json(self, current_attempt=1):
"""Convert text to json."""
pass

View File

@@ -1,9 +1,5 @@
import logging
from crewai.types.usage_metrics import UsageMetrics
logger = logging.getLogger(__name__)
class TokenProcess:
def __init__(self) -> None:
@@ -21,21 +17,7 @@ class TokenProcess:
self.completion_tokens += tokens
self.total_tokens += tokens
def sum_cached_prompt_tokens(self, tokens: int | None) -> None:
"""
Adds the given token count to cached prompt tokens.
Args:
tokens (int | None): Number of tokens to add. None values are ignored.
Raises:
ValueError: If tokens is negative.
"""
if tokens is None:
logger.debug("Received None value for token count")
return
if tokens < 0:
raise ValueError("Token count cannot be negative")
def sum_cached_prompt_tokens(self, tokens: int) -> None:
self.cached_prompt_tokens += tokens
def sum_successful_requests(self, requests: int) -> None:

View File

@@ -548,6 +548,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self, initial_answer: AgentFinish, feedback: str
) -> AgentFinish:
"""Process feedback for training scenarios with single iteration."""
self._printer.print(
content="\nProcessing training feedback.\n",
color="yellow",
)
self._handle_crew_training_output(initial_answer, feedback)
self.messages.append(
self._format_msg(
@@ -567,8 +571,9 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
answer = current_answer
while self.ask_for_human_input:
# If the user provides a blank response, assume they are happy with the result
if feedback.strip() == "":
response = self._get_llm_feedback_response(feedback)
if not self._feedback_requires_changes(response):
self.ask_for_human_input = False
else:
answer = self._process_feedback_iteration(feedback)
@@ -576,6 +581,27 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
return answer
def _get_llm_feedback_response(self, feedback: str) -> Optional[str]:
"""Get LLM classification of whether feedback requires changes."""
prompt = self._i18n.slice("human_feedback_classification").format(
feedback=feedback
)
message = self._format_msg(prompt, role="system")
for retry in range(MAX_LLM_RETRY):
try:
response = self.llm.call([message], callbacks=self.callbacks)
return response.strip().lower() if response else None
except Exception as error:
self._log_feedback_error(retry, error)
self._log_max_retries_exceeded()
return None
def _feedback_requires_changes(self, response: Optional[str]) -> bool:
"""Determine if feedback response indicates need for changes."""
return response == "true" if response else False
def _process_feedback_iteration(self, feedback: str) -> AgentFinish:
"""Process a single feedback iteration."""
self.messages.append(

View File

@@ -26,9 +26,9 @@ from crewai.utilities.events.tool_usage_events import ToolExecutionErrorEvent
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
import litellm
from litellm import Choices
from litellm import Choices, get_supported_openai_params
from litellm.types.utils import ModelResponse
from litellm.utils import get_supported_openai_params, supports_response_schema
from litellm.utils import supports_response_schema
from crewai.traces.unified_trace_controller import trace_llm_call
@@ -449,7 +449,7 @@ class LLM:
def supports_function_calling(self) -> bool:
try:
params = get_supported_openai_params(model=self.model)
return params is not None and "tools" in params
return "response_format" in params
except Exception as e:
logging.error(f"Failed to get supported params: {str(e)}")
return False
@@ -457,7 +457,7 @@ class LLM:
def supports_stop_words(self) -> bool:
try:
params = get_supported_openai_params(model=self.model)
return params is not None and "stop" in params
return "stop" in params
except Exception as e:
logging.error(f"Failed to get supported params: {str(e)}")
return False

View File

@@ -3,17 +3,20 @@ import inspect
import json
import logging
import threading
import typing
import uuid
from concurrent.futures import Future
from copy import copy
from hashlib import md5
from pathlib import Path
from typing import (
AbstractSet,
Any,
Callable,
ClassVar,
Dict,
List,
Mapping,
Optional,
Set,
Tuple,
@@ -32,6 +35,7 @@ from pydantic import (
from pydantic_core import PydanticCustomError
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.tasks.exceptions import GuardrailValidationError
from crewai.tasks.guardrail_result import GuardrailResult
from crewai.tasks.output_format import OutputFormat
from crewai.tasks.task_output import TaskOutput
@@ -113,7 +117,7 @@ class Task(BaseModel):
description="Task output, it's final result after being executed", default=None
)
tools: Optional[List[BaseTool]] = Field(
default_factory=list,
default_factory=list[BaseTool],
description="Tools the agent is limited to use for this task.",
)
id: UUID4 = Field(
@@ -129,7 +133,7 @@ class Task(BaseModel):
description="A converter class used to export structured output",
default=None,
)
processed_by_agents: Set[str] = Field(default_factory=set)
processed_by_agents: Set[str] = Field(default_factory=set[str])
guardrail: Optional[Callable[[TaskOutput], Tuple[bool, Any]]] = Field(
default=None,
description="Function to validate task output before proceeding to next task",
@@ -151,8 +155,8 @@ class Task(BaseModel):
"""Validate that the guardrail function has the correct signature and behavior.
While type hints provide static checking, this validator ensures runtime safety by:
1. Verifying the function accepts exactly one parameter (the TaskOutput)
2. Checking return type annotations match Tuple[bool, Any] if present
1. Verifying the function accepts exactly one required positional parameter (the TaskOutput)
2. Checking return type annotations match tuple[bool, Any] or specific types like tuple[bool, str]
3. Providing clear, immediate error messages for debugging
This runtime validation is crucial because:
@@ -160,6 +164,24 @@ class Task(BaseModel):
- Function signatures need immediate validation before task execution
- Clear error messages help users debug guardrail implementation issues
Examples:
Simple validation with new style annotation:
>>> def validate_output(result: TaskOutput) -> tuple[bool, str]:
... return (True, result.raw.upper())
Validation with optional parameters:
>>> def validate_with_options(result: TaskOutput, strict: bool = True) -> tuple[bool, str]:
... if strict and not result.raw.isupper():
... return (False, "Text must be uppercase")
... return (True, result.raw)
Validation with specific return type:
>>> def validate_task_output(result: TaskOutput) -> tuple[bool, TaskOutput]:
... if not result.raw:
... return (False, result)
... result.raw = result.raw.strip()
... return (True, result)
Args:
v: The guardrail function to validate
@@ -168,22 +190,46 @@ class Task(BaseModel):
Raises:
ValueError: If the function signature is invalid or return annotation
doesn't match Tuple[bool, Any]
doesn't match tuple[bool, Any] or specific allowed types
"""
if v is not None:
sig = inspect.signature(v)
if len(sig.parameters) != 1:
raise ValueError("Guardrail function must accept exactly one parameter")
# Check for exactly one required positional parameter
positional_args = [
param for param in sig.parameters.values()
if param.default is inspect.Parameter.empty
]
if len(positional_args) != 1:
raise GuardrailValidationError(
"Guardrail function must accept exactly one required parameter",
{"params": [str(p) for p in sig.parameters.values()]}
)
# Check return annotation if present, but don't require it
return_annotation = sig.return_annotation
if return_annotation != inspect.Signature.empty:
type_hints = typing.get_type_hints(v)
return_annotation = type_hints.get('return')
if return_annotation:
# Simplified type checking logic
return_annotation_args = typing.get_args(return_annotation)
if not (
return_annotation == Tuple[bool, Any]
or str(return_annotation) == "Tuple[bool, Any]"
typing.get_origin(return_annotation) is tuple
and len(return_annotation_args) == 2
and return_annotation_args[0] is bool
and (
return_annotation_args[1] is Any
or return_annotation_args[1] is str
or return_annotation_args[1] is TaskOutput
or return_annotation_args[1] == Union[str, TaskOutput]
or (
typing.get_origin(return_annotation_args[1]) is dict
and typing.get_args(return_annotation_args[1])[0] is str
and typing.get_args(return_annotation_args[1])[1] is Any
)
)
):
raise ValueError(
"If return type is annotated, it must be Tuple[bool, Any]"
raise GuardrailValidationError(
"Invalid return type annotation. Expected Tuple[bool, Any|str|TaskOutput|Dict[str, Any]]",
{"got": str(return_annotation)}
)
return v
@@ -411,6 +457,7 @@ class Task(BaseModel):
"Task guardrail returned None as result. This is not allowed."
)
# Handle different result types
if isinstance(guardrail_result.result, str):
task_output.raw = guardrail_result.result
pydantic_output, json_output = self._export_output(
@@ -420,6 +467,13 @@ class Task(BaseModel):
task_output.json_dict = json_output
elif isinstance(guardrail_result.result, TaskOutput):
task_output = guardrail_result.result
elif isinstance(guardrail_result.result, dict):
task_output.raw = guardrail_result.result
task_output.json_dict = guardrail_result.result
pydantic_output, _ = self._export_output(
json.dumps(guardrail_result.result)
)
task_output.pydantic = pydantic_output
self.output = task_output
self.end_time = datetime.datetime.now()
@@ -610,40 +664,74 @@ class Task(BaseModel):
self.delegations += 1
def copy(
self, agents: List["BaseAgent"], task_mapping: Dict[str, "Task"]
self,
agents: List["BaseAgent"] | None = None,
task_mapping: Dict[str, "Task"] | None = None,
*,
include: AbstractSet[int] | AbstractSet[str] | Mapping[int, Any] | Mapping[str, Any] | None = None,
exclude: AbstractSet[int] | AbstractSet[str] | Mapping[int, Any] | Mapping[str, Any] | None = None,
update: dict[str, Any] | None = None,
deep: bool = False,
) -> "Task":
"""Create a deep copy of the Task."""
exclude = {
"""Create a deep copy of the Task.
Args:
agents: Optional list of agents to copy agent references
task_mapping: Optional mapping of task keys to tasks for context
include: Fields to include in the copy
exclude: Fields to exclude from the copy
update: Fields to update in the copy
deep: Whether to perform a deep copy
"""
if agents is None and task_mapping is None:
# New style copy using BaseModel
copied = super().copy(
include=include,
exclude=exclude,
update=update,
deep=deep,
)
# Copy mutable fields
if self.tools:
copied.tools = copy(self.tools)
if self.context:
copied.context = copy(self.context)
return copied
# Legacy copy behavior
exclude_fields = {
"id",
"agent",
"context",
"tools",
}
copied_data = self.model_dump(exclude=exclude)
copied_data = self.model_dump(exclude=exclude_fields)
copied_data = {k: v for k, v in copied_data.items() if v is not None}
cloned_context = (
[task_mapping[context_task.key] for context_task in self.context]
if self.context
if self.context and task_mapping
else None
)
def get_agent_by_role(role: str) -> Union["BaseAgent", None]:
if not agents:
return None
return next((agent for agent in agents if agent.role == role), None)
cloned_agent = get_agent_by_role(self.agent.role) if self.agent else None
cloned_tools = copy(self.tools) if self.tools else []
copied_task = Task(
return Task(
**copied_data,
context=cloned_context,
agent=cloned_agent,
tools=cloned_tools,
)
return copied_task
def _export_output(
self, result: str
) -> Tuple[Optional[BaseModel], Optional[Dict[str, Any]]]:

View File

@@ -0,0 +1,25 @@
"""
Module for task-related exceptions.
This module provides custom exceptions used throughout the task system
to provide more specific error handling and context.
"""
from typing import Any, Dict, Optional
class GuardrailValidationError(Exception):
"""Exception raised for guardrail validation errors.
This exception provides detailed context about why a guardrail
validation failed, including the specific validation that failed
and any relevant context information.
Attributes:
message: A clear description of the validation error
context: Optional dictionary containing additional error context
"""
def __init__(self, message: str, context: Optional[Dict[str, Any]] = None):
self.message = message
self.context = context or {}
super().__init__(self.message)

View File

@@ -1,5 +1,5 @@
import json
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Union
from pydantic import BaseModel, Field, model_validator
@@ -15,7 +15,7 @@ class TaskOutput(BaseModel):
description="Expected output of the task", default=None
)
summary: Optional[str] = Field(description="Summary of the task", default=None)
raw: str = Field(description="Raw output of the task", default="")
raw: Any = Field(description="Raw output of the task", default="")
pydantic: Optional[BaseModel] = Field(
description="Pydantic output of task", default=None
)

View File

@@ -23,6 +23,7 @@
"summary": "This is a summary of our conversation so far:\n{merged_summary}",
"manager_request": "Your best answer to your coworker asking you this, accounting for the context shared.",
"formatted_task_instructions": "Ensure your final answer contains only the content in the following format: {output_format}\n\nEnsure the final output does not include any code block markers like ```json or ```python.",
"human_feedback_classification": "Determine if the following feedback indicates that the user is satisfied or if further changes are needed. Respond with 'True' if further changes are needed, or 'False' if the user is satisfied. **Important** Do not include any additional commentary outside of your 'True' or 'False' response.\n\nFeedback: \"{feedback}\"",
"conversation_history_instruction": "You are a member of a crew collaborating to achieve a common goal. Your task is a specific action that contributes to this larger objective. For additional context, please review the conversation history between you and the user that led to the initiation of this crew. Use any relevant information or feedback from the conversation to inform your task execution and ensure your response aligns with both the immediate task and the crew's overall goals.",
"feedback_instructions": "User feedback: {feedback}\nInstructions: Use this feedback to enhance the next output iteration.\nNote: Do not respond or add commentary."
},

View File

@@ -20,11 +20,11 @@ class ConverterError(Exception):
class Converter(OutputConverter):
"""Class that converts text into either pydantic or json."""
def to_pydantic(self, current_attempt=1) -> BaseModel:
def to_pydantic(self, current_attempt=1):
"""Convert text to pydantic."""
try:
if self.llm.supports_function_calling():
result = self._create_instructor().to_pydantic()
return self._create_instructor().to_pydantic()
else:
response = self.llm.call(
[
@@ -32,40 +32,18 @@ class Converter(OutputConverter):
{"role": "user", "content": self.text},
]
)
try:
# Try to directly validate the response JSON
result = self.model.model_validate_json(response)
except ValidationError:
# If direct validation fails, attempt to extract valid JSON
result = handle_partial_json(response, self.model, False, None)
# Ensure result is a BaseModel instance
if not isinstance(result, BaseModel):
if isinstance(result, dict):
result = self.model.parse_obj(result)
elif isinstance(result, str):
try:
parsed = json.loads(result)
result = self.model.parse_obj(parsed)
except Exception as parse_err:
raise ConverterError(
f"Failed to convert partial JSON result into Pydantic: {parse_err}"
)
else:
raise ConverterError(
"handle_partial_json returned an unexpected type."
)
return result
return self.model.model_validate_json(response)
except ValidationError as e:
if current_attempt < self.max_attempts:
return self.to_pydantic(current_attempt + 1)
raise ConverterError(
f"Failed to convert text into a Pydantic model due to validation error: {e}"
f"Failed to convert text into a Pydantic model due to the following validation error: {e}"
)
except Exception as e:
if current_attempt < self.max_attempts:
return self.to_pydantic(current_attempt + 1)
raise ConverterError(
f"Failed to convert text into a Pydantic model due to error: {e}"
f"Failed to convert text into a Pydantic model due to the following error: {e}"
)
def to_json(self, current_attempt=1):
@@ -219,15 +197,11 @@ def get_conversion_instructions(model: Type[BaseModel], llm: Any) -> str:
if llm.supports_function_calling():
model_schema = PydanticSchemaParser(model=model).get_schema()
instructions += (
f"\n\nOutput ONLY the valid JSON and nothing else.\n\n"
f"The JSON must follow this schema exactly:\n```json\n{model_schema}\n```"
f"\n\nThe JSON should follow this schema:\n```json\n{model_schema}\n```"
)
else:
model_description = generate_model_description(model)
instructions += (
f"\n\nOutput ONLY the valid JSON and nothing else.\n\n"
f"The JSON must follow this format exactly:\n{model_description}"
)
instructions += f"\n\nThe JSON should follow this format:\n{model_description}"
return instructions

View File

@@ -1,6 +1,7 @@
"""Test Agent creation and execution basic functionality."""
import os
from datetime import UTC, datetime, timezone
from unittest import mock
from unittest.mock import patch
@@ -8,7 +9,7 @@ import pytest
from crewai import Agent, Crew, Task
from crewai.agents.cache import CacheHandler
from crewai.agents.crew_agent_executor import AgentFinish, CrewAgentExecutor
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.parser import AgentAction, CrewAgentParser, OutputParserException
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
@@ -998,35 +999,23 @@ def test_agent_human_input():
# Side effect function for _ask_human_input to simulate multiple feedback iterations
feedback_responses = iter(
[
"Don't say hi, say Hello instead!", # First feedback: instruct change
"", # Second feedback: empty string signals acceptance
"Don't say hi, say Hello instead!", # First feedback
"looks good", # Second feedback to exit loop
]
)
def ask_human_input_side_effect(*args, **kwargs):
return next(feedback_responses)
# Patch both _ask_human_input and _invoke_loop to avoid real API/network calls.
with (
patch.object(
CrewAgentExecutor,
"_ask_human_input",
side_effect=ask_human_input_side_effect,
) as mock_human_input,
patch.object(
CrewAgentExecutor,
"_invoke_loop",
return_value=AgentFinish(output="Hello", thought="", text=""),
) as mock_invoke_loop,
):
with patch.object(
CrewAgentExecutor, "_ask_human_input", side_effect=ask_human_input_side_effect
) as mock_human_input:
# Execute the task
output = agent.execute_task(task)
# Assertions to ensure the agent behaves correctly.
# It should have requested feedback twice.
assert mock_human_input.call_count == 2
# The final result should be processed to "Hello"
assert output.strip().lower() == "hello"
# Assertions to ensure the agent behaves correctly
assert mock_human_input.call_count == 2 # Should have asked for feedback twice
assert output.strip().lower() == "hello" # Final output should be 'Hello'
def test_interpolate_inputs():

View File

@@ -0,0 +1,520 @@
interactions:
- request:
body: !!binary |
CqcXCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkS/hYKEgoQY3Jld2FpLnRl
bGVtZXRyeRJ5ChBuJJtOdNaB05mOW/p3915eEgj2tkAd3rZcASoQVG9vbCBVc2FnZSBFcnJvcjAB
OYa7/URvKBUYQUpcFEVvKBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoPCgNsbG0SCAoG
Z3B0LTRvegIYAYUBAAEAABLJBwoQifhX01E5i+5laGdALAlZBBIIBuGM1aN+OPgqDENyZXcgQ3Jl
YXRlZDABORVGruBvKBUYQaipwOBvKBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoaCg5w
eXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19rZXkSIgogN2U2NjA4OTg5ODU5YTY3ZWVj
ODhlZWY3ZmNlODUyMjVKMQoHY3Jld19pZBImCiRiOThiNWEwMC01YTI1LTQxMDctYjQwNS1hYmYz
MjBhOGYzYThKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVlbnRpYWxKEQoLY3Jld19tZW1vcnkSAhAA
ShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVjcmV3X251bWJlcl9vZl9hZ2VudHMSAhgB
SuQCCgtjcmV3X2FnZW50cxLUAgrRAlt7ImtleSI6ICIyMmFjZDYxMWU0NGVmNWZhYzA1YjUzM2Q3
NWU4ODkzYiIsICJpZCI6ICJkNWIyMzM1YS0yMmIyLTQyZWEtYmYwNS03OTc3NmU3MmYzOTIiLCAi
cm9sZSI6ICJEYXRhIFNjaWVudGlzdCIsICJ2ZXJib3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAy
MCwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJn
cHQtNG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4
ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFsi
Z2V0IGdyZWV0aW5ncyJdfV1KkgIKCmNyZXdfdGFza3MSgwIKgAJbeyJrZXkiOiAiYTI3N2IzNGIy
YzE0NmYwYzU2YzVlMTM1NmU4ZjhhNTciLCAiaWQiOiAiMjJiZWMyMzEtY2QyMS00YzU4LTgyN2Ut
MDU4MWE4ZjBjMTExIiwgImFzeW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6
IGZhbHNlLCAiYWdlbnRfcm9sZSI6ICJEYXRhIFNjaWVudGlzdCIsICJhZ2VudF9rZXkiOiAiMjJh
Y2Q2MTFlNDRlZjVmYWMwNWI1MzNkNzVlODg5M2IiLCAidG9vbHNfbmFtZXMiOiBbImdldCBncmVl
dGluZ3MiXX1degIYAYUBAAEAABKOAgoQ5WYoxRtTyPjge4BduhL0rRIIv2U6rvWALfwqDFRhc2sg
Q3JlYXRlZDABOX068uBvKBUYQZkv8+BvKBUYSi4KCGNyZXdfa2V5EiIKIDdlNjYwODk4OTg1OWE2
N2VlYzg4ZWVmN2ZjZTg1MjI1SjEKB2NyZXdfaWQSJgokYjk4YjVhMDAtNWEyNS00MTA3LWI0MDUt
YWJmMzIwYThmM2E4Si4KCHRhc2tfa2V5EiIKIGEyNzdiMzRiMmMxNDZmMGM1NmM1ZTEzNTZlOGY4
YTU3SjEKB3Rhc2tfaWQSJgokMjJiZWMyMzEtY2QyMS00YzU4LTgyN2UtMDU4MWE4ZjBjMTExegIY
AYUBAAEAABKQAQoQXyeDtJDFnyp2Fjk9YEGTpxIIaNE7gbhPNYcqClRvb2wgVXNhZ2UwATkaXTvj
bygVGEGvx0rjbygVGEoaCg5jcmV3YWlfdmVyc2lvbhIICgYwLjg2LjBKHAoJdG9vbF9uYW1lEg8K
DUdldCBHcmVldGluZ3NKDgoIYXR0ZW1wdHMSAhgBegIYAYUBAAEAABLVBwoQMWfznt0qwauEzl7T
UOQxRBII9q+pUS5EdLAqDENyZXcgQ3JlYXRlZDABORONPORvKBUYQSAoS+RvKBUYShoKDmNyZXdh
aV92ZXJzaW9uEggKBjAuODYuMEoaCg5weXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19r
ZXkSIgogYzMwNzYwMDkzMjY3NjE0NDRkNTdjNzFkMWRhM2YyN2NKMQoHY3Jld19pZBImCiQ3OTQw
MTkyNS1iOGU5LTQ3MDgtODUzMC00NDhhZmEzYmY4YjBKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVl
bnRpYWxKEQoLY3Jld19tZW1vcnkSAhAAShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVj
cmV3X251bWJlcl9vZl9hZ2VudHMSAhgBSuoCCgtjcmV3X2FnZW50cxLaAgrXAlt7ImtleSI6ICI5
OGYzYjFkNDdjZTk2OWNmMDU3NzI3Yjc4NDE0MjVjZCIsICJpZCI6ICI5OTJkZjYyZi1kY2FiLTQy
OTUtOTIwNi05MDBkNDExNGIxZTkiLCAicm9sZSI6ICJGcmllbmRseSBOZWlnaGJvciIsICJ2ZXJi
b3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAyMCwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25f
Y2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJncHQtNG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJs
ZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9s
aW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFsiZGVjaWRlIGdyZWV0aW5ncyJdfV1KmAIKCmNyZXdf
dGFza3MSiQIKhgJbeyJrZXkiOiAiODBkN2JjZDQ5MDk5MjkwMDgzODMyZjBlOTgzMzgwZGYiLCAi
aWQiOiAiMmZmNjE5N2UtYmEyNy00YjczLWI0YTctNGZhMDQ4ZTYyYjQ3IiwgImFzeW5jX2V4ZWN1
dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdlbnRfcm9sZSI6ICJGcmll
bmRseSBOZWlnaGJvciIsICJhZ2VudF9rZXkiOiAiOThmM2IxZDQ3Y2U5NjljZjA1NzcyN2I3ODQx
NDI1Y2QiLCAidG9vbHNfbmFtZXMiOiBbImRlY2lkZSBncmVldGluZ3MiXX1degIYAYUBAAEAABKO
AgoQnjTp5boK7/+DQxztYIpqihIIgGnMUkBtzHEqDFRhc2sgQ3JlYXRlZDABOcpYcuRvKBUYQalE
c+RvKBUYSi4KCGNyZXdfa2V5EiIKIGMzMDc2MDA5MzI2NzYxNDQ0ZDU3YzcxZDFkYTNmMjdjSjEK
B2NyZXdfaWQSJgokNzk0MDE5MjUtYjhlOS00NzA4LTg1MzAtNDQ4YWZhM2JmOGIwSi4KCHRhc2tf
a2V5EiIKIDgwZDdiY2Q0OTA5OTI5MDA4MzgzMmYwZTk4MzM4MGRmSjEKB3Rhc2tfaWQSJgokMmZm
NjE5N2UtYmEyNy00YjczLWI0YTctNGZhMDQ4ZTYyYjQ3egIYAYUBAAEAABKTAQoQ26H9pLUgswDN
p9XhJwwL6BIIx3bw7mAvPYwqClRvb2wgVXNhZ2UwATmy7NPlbygVGEEvb+HlbygVGEoaCg5jcmV3
YWlfdmVyc2lvbhIICgYwLjg2LjBKHwoJdG9vbF9uYW1lEhIKEERlY2lkZSBHcmVldGluZ3NKDgoI
YXR0ZW1wdHMSAhgBegIYAYUBAAEAAA==
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '2986'
Content-Type:
- application/x-protobuf
User-Agent:
- OTel-OTLP-Exporter-Python/1.27.0
method: POST
uri: https://telemetry.crewai.com:4319/v1/traces
response:
body:
string: "\n\0"
headers:
Content-Length:
- '2'
Content-Type:
- application/x-protobuf
Date:
- Fri, 27 Dec 2024 22:14:53 GMT
status:
code: 200
message: OK
- request:
body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour
personal goal is: test goal\nTo give my best complete final answer to the task
use the exact following format:\n\nThought: I now can give a great answer\nFinal
Answer: Your final answer must be the great and the most complete as possible,
it must be outcome described.\n\nI MUST use these formats, my job depends on
it!"}, {"role": "user", "content": "\nCurrent Task: Say the word: Hi\n\nThis
is the expect criteria for your final answer: The word: Hi\nyou MUST return
the actual complete content as the final answer, not a summary.\n\nBegin! This
is VERY important to you, use the tools available and give your best Final Answer,
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"],
"stream": false}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '824'
content-type:
- application/json
cookie:
- _cfuvid=ePJSDFdHag2D8lj21_ijAMWjoA6xfnPNxN4uekvC728-1727226247743-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- Linux
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AjCtZLLrWi8ZASpP9bz6HaCV7xBIn\",\n \"object\":
\"chat.completion\",\n \"created\": 1735337693,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"I now can give a great answer \\nFinal
Answer: Hi\",\n \"refusal\": null\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
158,\n \"completion_tokens\": 12,\n \"total_tokens\": 170,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_0aa8d3e20b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8f8caa83deca756b-SEA
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Fri, 27 Dec 2024 22:14:53 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw;
path=/; expires=Fri, 27-Dec-24 22:44:53 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '404'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999816'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_6ac84634bff9193743c4b0911c09b4a6
http_version: HTTP/1.1
status_code: 200
- request:
body: '{"messages": [{"role": "system", "content": "Determine if the following
feedback indicates that the user is satisfied or if further changes are needed.
Respond with ''True'' if further changes are needed, or ''False'' if the user
is satisfied. **Important** Do not include any additional commentary outside
of your ''True'' or ''False'' response.\n\nFeedback: \"Don''t say hi, say Hello
instead!\""}], "model": "gpt-4o-mini", "stop": ["\nObservation:"], "stream":
false}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '461'
content-type:
- application/json
cookie:
- _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
__cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- Linux
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AjCtZNlWdrrPZhq0MJDqd16sMuQEJ\",\n \"object\":
\"chat.completion\",\n \"created\": 1735337693,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"True\",\n \"refusal\": null\n
\ },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n
\ ],\n \"usage\": {\n \"prompt_tokens\": 78,\n \"completion_tokens\":
1,\n \"total_tokens\": 79,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_0aa8d3e20b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8f8caa87094f756b-SEA
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Fri, 27 Dec 2024 22:14:53 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '156'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999898'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_ec74bef2a9ef7b2144c03fd7f7bbeab0
http_version: HTTP/1.1
status_code: 200
- request:
body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour
personal goal is: test goal\nTo give my best complete final answer to the task
use the exact following format:\n\nThought: I now can give a great answer\nFinal
Answer: Your final answer must be the great and the most complete as possible,
it must be outcome described.\n\nI MUST use these formats, my job depends on
it!"}, {"role": "user", "content": "\nCurrent Task: Say the word: Hi\n\nThis
is the expect criteria for your final answer: The word: Hi\nyou MUST return
the actual complete content as the final answer, not a summary.\n\nBegin! This
is VERY important to you, use the tools available and give your best Final Answer,
your job depends on it!\n\nThought:"}, {"role": "assistant", "content": "I now
can give a great answer \nFinal Answer: Hi"}, {"role": "user", "content": "Feedback:
Don''t say hi, say Hello instead!"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"],
"stream": false}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '986'
content-type:
- application/json
cookie:
- _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
__cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- Linux
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AjCtZGv4f3h7GDdhyOy9G0sB1lRgC\",\n \"object\":
\"chat.completion\",\n \"created\": 1735337693,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"Thought: I understand the feedback and
will adjust my response accordingly. \\nFinal Answer: Hello\",\n \"refusal\":
null\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 188,\n \"completion_tokens\":
18,\n \"total_tokens\": 206,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_0aa8d3e20b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8f8caa88cac4756b-SEA
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Fri, 27 Dec 2024 22:14:54 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '358'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999793'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_ae1ab6b206d28ded6fee3c83ed0c2ab7
http_version: HTTP/1.1
status_code: 200
- request:
body: '{"messages": [{"role": "system", "content": "Determine if the following
feedback indicates that the user is satisfied or if further changes are needed.
Respond with ''True'' if further changes are needed, or ''False'' if the user
is satisfied. **Important** Do not include any additional commentary outside
of your ''True'' or ''False'' response.\n\nFeedback: \"looks good\""}], "model":
"gpt-4o-mini", "stop": ["\nObservation:"], "stream": false}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '439'
content-type:
- application/json
cookie:
- _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
__cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- Linux
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AjCtaiHL4TY8Dssk0j2miqmjrzquy\",\n \"object\":
\"chat.completion\",\n \"created\": 1735337694,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"False\",\n \"refusal\": null\n
\ },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n
\ ],\n \"usage\": {\n \"prompt_tokens\": 73,\n \"completion_tokens\":
1,\n \"total_tokens\": 74,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_0aa8d3e20b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8f8caa8bdd26756b-SEA
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Fri, 27 Dec 2024 22:14:54 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '184'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999902'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_652891f79c1104a7a8436275d78a69f1
http_version: HTTP/1.1
status_code: 200
version: 1

View File

@@ -1,129 +1,179 @@
"""Tests for task guardrails functionality."""
from typing import Any, Dict
from unittest.mock import Mock
import pytest
from crewai.task import Task
from crewai.tasks.exceptions import GuardrailValidationError
from crewai.tasks.task_output import TaskOutput
def test_task_without_guardrail():
"""Test that tasks work normally without guardrails (backward compatibility)."""
agent = Mock()
agent.role = "test_agent"
agent.execute_task.return_value = "test result"
agent.crew = None
class TestTaskGuardrails:
"""Test suite for task guardrail functionality."""
task = Task(description="Test task", expected_output="Output")
@pytest.fixture
def mock_agent(self):
"""Fixture providing a mock agent for testing."""
agent = Mock()
agent.role = "test_agent"
agent.crew = None
return agent
result = task.execute_sync(agent=agent)
assert isinstance(result, TaskOutput)
assert result.raw == "test result"
def test_task_without_guardrail(self, mock_agent):
"""Test that tasks work normally without guardrails (backward compatibility)."""
mock_agent.execute_task.return_value = "test result"
task = Task(description="Test task", expected_output="Output")
result = task.execute_sync(agent=mock_agent)
assert isinstance(result, TaskOutput)
assert result.raw == "test result"
def test_task_with_successful_guardrail():
"""Test that successful guardrail validation passes transformed result."""
def test_task_with_successful_guardrail(self, mock_agent):
"""Test that successful guardrail validation passes transformed result."""
def guardrail(result: TaskOutput):
return (True, result.raw.upper())
def guardrail(result: TaskOutput):
return (True, result.raw.upper())
mock_agent.execute_task.return_value = "test result"
task = Task(description="Test task", expected_output="Output", guardrail=guardrail)
agent = Mock()
agent.role = "test_agent"
agent.execute_task.return_value = "test result"
agent.crew = None
task = Task(description="Test task", expected_output="Output", guardrail=guardrail)
result = task.execute_sync(agent=agent)
assert isinstance(result, TaskOutput)
assert result.raw == "TEST RESULT"
result = task.execute_sync(agent=mock_agent)
assert isinstance(result, TaskOutput)
assert result.raw == "TEST RESULT"
def test_task_with_failing_guardrail():
"""Test that failing guardrail triggers retry with error context."""
def test_task_with_failing_guardrail(self, mock_agent):
"""Test that failing guardrail triggers retry with error context."""
def guardrail(result: TaskOutput):
return (False, "Invalid format")
def guardrail(result: TaskOutput):
return (False, "Invalid format")
mock_agent.execute_task.side_effect = ["bad result", "good result"]
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=1,
)
agent = Mock()
agent.role = "test_agent"
agent.execute_task.side_effect = ["bad result", "good result"]
agent.crew = None
# First execution fails guardrail, second succeeds
mock_agent.execute_task.side_effect = ["bad result", "good result"]
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=mock_agent)
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=1,
)
# First execution fails guardrail, second succeeds
agent.execute_task.side_effect = ["bad result", "good result"]
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=agent)
assert "Task failed guardrail validation" in str(exc_info.value)
assert task.retry_count == 1
assert "Task failed guardrail validation" in str(exc_info.value)
assert task.retry_count == 1
def test_task_with_guardrail_retries():
"""Test that guardrail respects max_retries configuration."""
def test_task_with_guardrail_retries(self, mock_agent):
"""Test that guardrail respects max_retries configuration."""
def guardrail(result: TaskOutput):
return (False, "Invalid format")
def guardrail(result: TaskOutput):
return (False, "Invalid format")
mock_agent.execute_task.return_value = "bad result"
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=2,
)
agent = Mock()
agent.role = "test_agent"
agent.execute_task.return_value = "bad result"
agent.crew = None
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=mock_agent)
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=2,
)
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=agent)
assert task.retry_count == 2
assert "Task failed guardrail validation after 2 retries" in str(exc_info.value)
assert "Invalid format" in str(exc_info.value)
assert task.retry_count == 2
assert "Task failed guardrail validation after 2 retries" in str(exc_info.value)
assert "Invalid format" in str(exc_info.value)
def test_guardrail_error_in_context():
"""Test that guardrail error is passed in context for retry."""
def test_guardrail_error_in_context(self, mock_agent):
"""Test that guardrail error is passed in context for retry."""
def guardrail(result: TaskOutput):
return (False, "Expected JSON, got string")
def guardrail(result: TaskOutput):
return (False, "Expected JSON, got string")
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=1,
)
agent = Mock()
agent.role = "test_agent"
agent.crew = None
# Mock execute_task to succeed on second attempt
first_call = True
def execute_task(task, context, tools):
nonlocal first_call
if first_call:
first_call = False
return "invalid"
return '{"valid": "json"}'
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=1,
)
mock_agent.execute_task.side_effect = execute_task
# Mock execute_task to succeed on second attempt
first_call = True
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=mock_agent)
def execute_task(task, context, tools):
nonlocal first_call
if first_call:
first_call = False
return "invalid"
return '{"valid": "json"}'
assert "Task failed guardrail validation" in str(exc_info.value)
assert "Expected JSON, got string" in str(exc_info.value)
agent.execute_task.side_effect = execute_task
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=agent)
def test_guardrail_with_new_style_annotation(self, mock_agent):
"""Test guardrail with new style tuple annotation."""
def guardrail(result: TaskOutput) -> tuple[bool, str]:
return (True, result.raw.upper())
mock_agent.execute_task.return_value = "test result"
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail
)
assert "Task failed guardrail validation" in str(exc_info.value)
assert "Expected JSON, got string" in str(exc_info.value)
result = task.execute_sync(agent=mock_agent)
assert isinstance(result, TaskOutput)
assert result.raw == "TEST RESULT"
def test_guardrail_with_optional_params(self, mock_agent):
"""Test guardrail with optional parameters."""
def guardrail(result: TaskOutput, optional_param: str = "default") -> tuple[bool, str]:
return (True, f"{result.raw}-{optional_param}")
mock_agent.execute_task.return_value = "test"
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail
)
result = task.execute_sync(agent=mock_agent)
assert isinstance(result, TaskOutput)
assert result.raw == "test-default"
def test_guardrail_with_invalid_optional_params(self, mock_agent):
"""Test guardrail with invalid optional parameters."""
def guardrail(result: TaskOutput, *, required_kwonly: str) -> tuple[bool, str]:
return (True, result.raw)
with pytest.raises(GuardrailValidationError) as exc_info:
Task(
description="Test task",
expected_output="Output",
guardrail=guardrail
)
assert "exactly one required positional parameter" in str(exc_info.value)
def test_guardrail_with_dict_return_type(self, mock_agent):
"""Test guardrail with dict return type."""
def guardrail(result: TaskOutput) -> tuple[bool, dict[str, Any]]:
return (True, {"processed": result.raw.upper()})
mock_agent.execute_task.return_value = "test"
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail
)
result = task.execute_sync(agent=mock_agent)
assert isinstance(result, TaskOutput)
assert result.raw == {"processed": "TEST"}

View File

@@ -1,9 +1,14 @@
interactions:
- request:
body: '{"model": "llama3.2:3b", "prompt": "### System:\nPlease convert the following
text into valid JSON.\n\nOutput ONLY the valid JSON and nothing else.\n\nThe
JSON must follow this format exactly:\n{\n \"name\": str,\n \"age\": int\n}\n\n###
User:\nName: Alice Llama, Age: 30\n\n", "options": {"stop": []}, "stream": false}'
body: '{"model": "llama3.2:3b", "prompt": "### User:\nName: Alice Llama, Age:
30\n\n### System:\nProduce JSON OUTPUT ONLY! Adhere to this format {\"name\":
\"function_name\", \"arguments\":{\"argument_name\": \"argument_value\"}} The
following functions are available to you:\n{''type'': ''function'', ''function'':
{''name'': ''SimpleModel'', ''description'': ''Correctly extracted `SimpleModel`
with all the required parameters with correct types'', ''parameters'': {''properties'':
{''name'': {''title'': ''Name'', ''type'': ''string''}, ''age'': {''title'':
''Age'', ''type'': ''integer''}}, ''required'': [''age'', ''name''], ''type'':
''object''}}}\n\n\n", "options": {}, "stream": false, "format": "json"}'
headers:
accept:
- '*/*'
@@ -12,23 +17,23 @@ interactions:
connection:
- keep-alive
content-length:
- '321'
- '657'
host:
- localhost:11434
user-agent:
- litellm/1.60.2
- litellm/1.57.4
method: POST
uri: http://localhost:11434/api/generate
response:
content: '{"model":"llama3.2:3b","created_at":"2025-02-21T02:57:55.059392Z","response":"{\"name\":
\"Alice Llama\", \"age\": 30}","done":true,"done_reason":"stop","context":[128006,9125,128007,271,38766,1303,33025,2696,25,6790,220,2366,18,271,128009,128006,882,128007,271,14711,744,512,5618,5625,279,2768,1495,1139,2764,4823,382,5207,27785,279,2764,4823,323,4400,775,382,791,4823,2011,1833,420,3645,7041,512,517,220,330,609,794,610,345,220,330,425,794,528,198,633,14711,2724,512,678,25,30505,445,81101,11,13381,25,220,966,271,128009,128006,78191,128007,271,5018,609,794,330,62786,445,81101,498,330,425,794,220,966,92],"total_duration":4675906000,"load_duration":836091458,"prompt_eval_count":82,"prompt_eval_duration":3561000000,"eval_count":15,"eval_duration":275000000}'
content: '{"model":"llama3.2:3b","created_at":"2025-01-15T20:47:11.926411Z","response":"{\"name\":
\"SimpleModel\", \"arguments\":{\"name\": \"Alice Llama\", \"age\": 30}}","done":true,"done_reason":"stop","context":[128006,9125,128007,271,38766,1303,33025,2696,25,6790,220,2366,18,271,128009,128006,882,128007,271,14711,2724,512,678,25,30505,445,81101,11,13381,25,220,966,271,14711,744,512,1360,13677,4823,32090,27785,0,2467,6881,311,420,3645,5324,609,794,330,1723,1292,498,330,16774,23118,14819,1292,794,330,14819,3220,32075,578,2768,5865,527,2561,311,499,512,13922,1337,1232,364,1723,518,364,1723,1232,5473,609,1232,364,16778,1747,518,364,4789,1232,364,34192,398,28532,1595,16778,1747,63,449,682,279,2631,5137,449,4495,4595,518,364,14105,1232,5473,13495,1232,5473,609,1232,5473,2150,1232,364,678,518,364,1337,1232,364,928,25762,364,425,1232,5473,2150,1232,364,17166,518,364,1337,1232,364,11924,8439,2186,364,6413,1232,2570,425,518,364,609,4181,364,1337,1232,364,1735,23742,3818,128009,128006,78191,128007,271,5018,609,794,330,16778,1747,498,330,16774,23118,609,794,330,62786,445,81101,498,330,425,794,220,966,3500],"total_duration":3374470708,"load_duration":1075750500,"prompt_eval_count":167,"prompt_eval_duration":1871000000,"eval_count":24,"eval_duration":426000000}'
headers:
Content-Length:
- '761'
- '1263'
Content-Type:
- application/json; charset=utf-8
Date:
- Fri, 21 Feb 2025 02:57:55 GMT
- Wed, 15 Jan 2025 20:47:12 GMT
http_version: HTTP/1.1
status_code: 200
- request:
@@ -47,7 +52,7 @@ interactions:
host:
- localhost:11434
user-agent:
- litellm/1.60.2
- litellm/1.57.4
method: POST
uri: http://localhost:11434/api/show
response:
@@ -223,7 +228,7 @@ interactions:
Reporting violations of the Acceptable Use Policy or unlicensed uses of Llama
3.2: LlamaUseReport@meta.com\",\"modelfile\":\"# Modelfile generated by \\\"ollama
show\\\"\\n# To build a new Modelfile based on this, replace FROM with:\\n#
FROM llama3.2:3b\\n\\nFROM /Users/joaomoura/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff\\nTEMPLATE
FROM llama3.2:3b\\n\\nFROM /Users/brandonhancock/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff\\nTEMPLATE
\\\"\\\"\\\"\\u003c|start_header_id|\\u003esystem\\u003c|end_header_id|\\u003e\\n\\nCutting
Knowledge Date: December 2023\\n\\n{{ if .System }}{{ .System }}\\n{{- end }}\\n{{-
if .Tools }}When you receive a tool call response, use the output to format
@@ -436,12 +441,12 @@ interactions:
.Content }}\\n{{- end }}{{ if not $last }}\\u003c|eot_id|\\u003e{{ end }}\\n{{-
else if eq .Role \\\"tool\\\" }}\\u003c|start_header_id|\\u003eipython\\u003c|end_header_id|\\u003e\\n\\n{{
.Content }}\\u003c|eot_id|\\u003e{{ if $last }}\\u003c|start_header_id|\\u003eassistant\\u003c|end_header_id|\\u003e\\n\\n{{
end }}\\n{{- end }}\\n{{- end }}\",\"details\":{\"parent_model\":\"\",\"format\":\"gguf\",\"family\":\"llama\",\"families\":[\"llama\"],\"parameter_size\":\"3.2B\",\"quantization_level\":\"Q4_K_M\"},\"model_info\":{\"general.architecture\":\"llama\",\"general.basename\":\"Llama-3.2\",\"general.file_type\":15,\"general.finetune\":\"Instruct\",\"general.languages\":[\"en\",\"de\",\"fr\",\"it\",\"pt\",\"hi\",\"es\",\"th\"],\"general.parameter_count\":3212749888,\"general.quantization_version\":2,\"general.size_label\":\"3B\",\"general.tags\":[\"facebook\",\"meta\",\"pytorch\",\"llama\",\"llama-3\",\"text-generation\"],\"general.type\":\"model\",\"llama.attention.head_count\":24,\"llama.attention.head_count_kv\":8,\"llama.attention.key_length\":128,\"llama.attention.layer_norm_rms_epsilon\":0.00001,\"llama.attention.value_length\":128,\"llama.block_count\":28,\"llama.context_length\":131072,\"llama.embedding_length\":3072,\"llama.feed_forward_length\":8192,\"llama.rope.dimension_count\":128,\"llama.rope.freq_base\":500000,\"llama.vocab_size\":128256,\"tokenizer.ggml.bos_token_id\":128000,\"tokenizer.ggml.eos_token_id\":128009,\"tokenizer.ggml.merges\":null,\"tokenizer.ggml.model\":\"gpt2\",\"tokenizer.ggml.pre\":\"llama-bpe\",\"tokenizer.ggml.token_type\":null,\"tokenizer.ggml.tokens\":null},\"modified_at\":\"2025-02-20T18:55:09.150577031-08:00\"}"
end }}\\n{{- end }}\\n{{- end }}\",\"details\":{\"parent_model\":\"\",\"format\":\"gguf\",\"family\":\"llama\",\"families\":[\"llama\"],\"parameter_size\":\"3.2B\",\"quantization_level\":\"Q4_K_M\"},\"model_info\":{\"general.architecture\":\"llama\",\"general.basename\":\"Llama-3.2\",\"general.file_type\":15,\"general.finetune\":\"Instruct\",\"general.languages\":[\"en\",\"de\",\"fr\",\"it\",\"pt\",\"hi\",\"es\",\"th\"],\"general.parameter_count\":3212749888,\"general.quantization_version\":2,\"general.size_label\":\"3B\",\"general.tags\":[\"facebook\",\"meta\",\"pytorch\",\"llama\",\"llama-3\",\"text-generation\"],\"general.type\":\"model\",\"llama.attention.head_count\":24,\"llama.attention.head_count_kv\":8,\"llama.attention.key_length\":128,\"llama.attention.layer_norm_rms_epsilon\":0.00001,\"llama.attention.value_length\":128,\"llama.block_count\":28,\"llama.context_length\":131072,\"llama.embedding_length\":3072,\"llama.feed_forward_length\":8192,\"llama.rope.dimension_count\":128,\"llama.rope.freq_base\":500000,\"llama.vocab_size\":128256,\"tokenizer.ggml.bos_token_id\":128000,\"tokenizer.ggml.eos_token_id\":128009,\"tokenizer.ggml.merges\":null,\"tokenizer.ggml.model\":\"gpt2\",\"tokenizer.ggml.pre\":\"llama-bpe\",\"tokenizer.ggml.token_type\":null,\"tokenizer.ggml.tokens\":null},\"modified_at\":\"2024-12-31T11:53:14.529771974-05:00\"}"
headers:
Content-Type:
- application/json; charset=utf-8
Date:
- Fri, 21 Feb 2025 02:57:55 GMT
- Wed, 15 Jan 2025 20:47:12 GMT
Transfer-Encoding:
- chunked
http_version: HTTP/1.1
@@ -462,7 +467,7 @@ interactions:
host:
- localhost:11434
user-agent:
- litellm/1.60.2
- litellm/1.57.4
method: POST
uri: http://localhost:11434/api/show
response:
@@ -638,7 +643,7 @@ interactions:
Reporting violations of the Acceptable Use Policy or unlicensed uses of Llama
3.2: LlamaUseReport@meta.com\",\"modelfile\":\"# Modelfile generated by \\\"ollama
show\\\"\\n# To build a new Modelfile based on this, replace FROM with:\\n#
FROM llama3.2:3b\\n\\nFROM /Users/joaomoura/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff\\nTEMPLATE
FROM llama3.2:3b\\n\\nFROM /Users/brandonhancock/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff\\nTEMPLATE
\\\"\\\"\\\"\\u003c|start_header_id|\\u003esystem\\u003c|end_header_id|\\u003e\\n\\nCutting
Knowledge Date: December 2023\\n\\n{{ if .System }}{{ .System }}\\n{{- end }}\\n{{-
if .Tools }}When you receive a tool call response, use the output to format
@@ -851,12 +856,12 @@ interactions:
.Content }}\\n{{- end }}{{ if not $last }}\\u003c|eot_id|\\u003e{{ end }}\\n{{-
else if eq .Role \\\"tool\\\" }}\\u003c|start_header_id|\\u003eipython\\u003c|end_header_id|\\u003e\\n\\n{{
.Content }}\\u003c|eot_id|\\u003e{{ if $last }}\\u003c|start_header_id|\\u003eassistant\\u003c|end_header_id|\\u003e\\n\\n{{
end }}\\n{{- end }}\\n{{- end }}\",\"details\":{\"parent_model\":\"\",\"format\":\"gguf\",\"family\":\"llama\",\"families\":[\"llama\"],\"parameter_size\":\"3.2B\",\"quantization_level\":\"Q4_K_M\"},\"model_info\":{\"general.architecture\":\"llama\",\"general.basename\":\"Llama-3.2\",\"general.file_type\":15,\"general.finetune\":\"Instruct\",\"general.languages\":[\"en\",\"de\",\"fr\",\"it\",\"pt\",\"hi\",\"es\",\"th\"],\"general.parameter_count\":3212749888,\"general.quantization_version\":2,\"general.size_label\":\"3B\",\"general.tags\":[\"facebook\",\"meta\",\"pytorch\",\"llama\",\"llama-3\",\"text-generation\"],\"general.type\":\"model\",\"llama.attention.head_count\":24,\"llama.attention.head_count_kv\":8,\"llama.attention.key_length\":128,\"llama.attention.layer_norm_rms_epsilon\":0.00001,\"llama.attention.value_length\":128,\"llama.block_count\":28,\"llama.context_length\":131072,\"llama.embedding_length\":3072,\"llama.feed_forward_length\":8192,\"llama.rope.dimension_count\":128,\"llama.rope.freq_base\":500000,\"llama.vocab_size\":128256,\"tokenizer.ggml.bos_token_id\":128000,\"tokenizer.ggml.eos_token_id\":128009,\"tokenizer.ggml.merges\":null,\"tokenizer.ggml.model\":\"gpt2\",\"tokenizer.ggml.pre\":\"llama-bpe\",\"tokenizer.ggml.token_type\":null,\"tokenizer.ggml.tokens\":null},\"modified_at\":\"2025-02-20T18:55:09.150577031-08:00\"}"
end }}\\n{{- end }}\\n{{- end }}\",\"details\":{\"parent_model\":\"\",\"format\":\"gguf\",\"family\":\"llama\",\"families\":[\"llama\"],\"parameter_size\":\"3.2B\",\"quantization_level\":\"Q4_K_M\"},\"model_info\":{\"general.architecture\":\"llama\",\"general.basename\":\"Llama-3.2\",\"general.file_type\":15,\"general.finetune\":\"Instruct\",\"general.languages\":[\"en\",\"de\",\"fr\",\"it\",\"pt\",\"hi\",\"es\",\"th\"],\"general.parameter_count\":3212749888,\"general.quantization_version\":2,\"general.size_label\":\"3B\",\"general.tags\":[\"facebook\",\"meta\",\"pytorch\",\"llama\",\"llama-3\",\"text-generation\"],\"general.type\":\"model\",\"llama.attention.head_count\":24,\"llama.attention.head_count_kv\":8,\"llama.attention.key_length\":128,\"llama.attention.layer_norm_rms_epsilon\":0.00001,\"llama.attention.value_length\":128,\"llama.block_count\":28,\"llama.context_length\":131072,\"llama.embedding_length\":3072,\"llama.feed_forward_length\":8192,\"llama.rope.dimension_count\":128,\"llama.rope.freq_base\":500000,\"llama.vocab_size\":128256,\"tokenizer.ggml.bos_token_id\":128000,\"tokenizer.ggml.eos_token_id\":128009,\"tokenizer.ggml.merges\":null,\"tokenizer.ggml.model\":\"gpt2\",\"tokenizer.ggml.pre\":\"llama-bpe\",\"tokenizer.ggml.token_type\":null,\"tokenizer.ggml.tokens\":null},\"modified_at\":\"2024-12-31T11:53:14.529771974-05:00\"}"
headers:
Content-Type:
- application/json; charset=utf-8
Date:
- Fri, 21 Feb 2025 02:57:55 GMT
- Wed, 15 Jan 2025 20:47:12 GMT
Transfer-Encoding:
- chunked
http_version: HTTP/1.1

View File

@@ -1,5 +1,4 @@
import json
import os
from typing import Dict, List, Optional
from unittest.mock import MagicMock, Mock, patch
@@ -221,13 +220,10 @@ def test_get_conversion_instructions_gpt():
supports_function_calling.return_value = True
instructions = get_conversion_instructions(SimpleModel, llm)
model_schema = PydanticSchemaParser(model=SimpleModel).get_schema()
expected_instructions = (
"Please convert the following text into valid JSON.\n\n"
"Output ONLY the valid JSON and nothing else.\n\n"
"The JSON must follow this schema exactly:\n```json\n"
f"{model_schema}\n```"
assert (
instructions
== f"Please convert the following text into valid JSON.\n\nThe JSON should follow this schema:\n```json\n{model_schema}\n```"
)
assert instructions == expected_instructions
def test_get_conversion_instructions_non_gpt():
@@ -350,17 +346,12 @@ def test_convert_with_instructions():
assert output.age == 30
# Skip tests that call external APIs when running in CI/CD
skip_external_api = pytest.mark.skipif(
os.getenv("CI") is not None, reason="Skipping tests that call external API in CI/CD"
)
@skip_external_api
@pytest.mark.vcr(filter_headers=["authorization"], record_mode="once")
@pytest.mark.vcr(filter_headers=["authorization"])
def test_converter_with_llama3_2_model():
llm = LLM(model="ollama/llama3.2:3b", base_url="http://localhost:11434")
sample_text = "Name: Alice Llama, Age: 30"
instructions = get_conversion_instructions(SimpleModel, llm)
converter = Converter(
llm=llm,
@@ -368,17 +359,19 @@ def test_converter_with_llama3_2_model():
model=SimpleModel,
instructions=instructions,
)
output = converter.to_pydantic()
assert isinstance(output, SimpleModel)
assert output.name == "Alice Llama"
assert output.age == 30
@skip_external_api
@pytest.mark.vcr(filter_headers=["authorization"], record_mode="once")
@pytest.mark.vcr(filter_headers=["authorization"])
def test_converter_with_llama3_1_model():
llm = LLM(model="ollama/llama3.1", base_url="http://localhost:11434")
sample_text = "Name: Alice Llama, Age: 30"
instructions = get_conversion_instructions(SimpleModel, llm)
converter = Converter(
llm=llm,
@@ -386,19 +379,14 @@ def test_converter_with_llama3_1_model():
model=SimpleModel,
instructions=instructions,
)
output = converter.to_pydantic()
assert isinstance(output, SimpleModel)
assert output.name == "Alice Llama"
assert output.age == 30
# Skip tests that call external APIs when running in CI/CD
skip_external_api = pytest.mark.skipif(
os.getenv("CI") is not None, reason="Skipping tests that call external API in CI/CD"
)
@skip_external_api
@pytest.mark.vcr(filter_headers=["authorization"])
def test_converter_with_nested_model():
llm = LLM(model="gpt-4o-mini")
@@ -575,7 +563,7 @@ def test_converter_with_ambiguous_input():
with pytest.raises(ConverterError) as exc_info:
output = converter.to_pydantic()
assert "failed to convert text into a pydantic model" in str(exc_info.value).lower()
assert "validation error" in str(exc_info.value).lower()
# Tests for function calling support

View File

@@ -1,49 +0,0 @@
import unittest
from crewai.agents.agent_builder.utilities.base_token_process import TokenProcess
class TestTokenProcess(unittest.TestCase):
"""Test suite for TokenProcess class token counting functionality."""
def setUp(self):
"""Initialize a fresh TokenProcess instance before each test."""
self.token_process = TokenProcess()
def test_sum_cached_prompt_tokens_with_none(self):
"""Test that passing None to sum_cached_prompt_tokens doesn't modify the counter."""
initial_tokens = self.token_process.cached_prompt_tokens
self.token_process.sum_cached_prompt_tokens(None)
self.assertEqual(self.token_process.cached_prompt_tokens, initial_tokens)
def test_sum_cached_prompt_tokens_with_int(self):
"""Test that passing an integer correctly increments the counter."""
initial_tokens = self.token_process.cached_prompt_tokens
self.token_process.sum_cached_prompt_tokens(5)
self.assertEqual(self.token_process.cached_prompt_tokens, initial_tokens + 5)
def test_sum_cached_prompt_tokens_with_zero(self):
"""Test that passing zero doesn't modify the counter."""
initial_tokens = self.token_process.cached_prompt_tokens
self.token_process.sum_cached_prompt_tokens(0)
self.assertEqual(self.token_process.cached_prompt_tokens, initial_tokens)
def test_sum_cached_prompt_tokens_with_large_number(self):
"""Test that the counter works with large numbers."""
initial_tokens = self.token_process.cached_prompt_tokens
self.token_process.sum_cached_prompt_tokens(1000000)
self.assertEqual(self.token_process.cached_prompt_tokens, initial_tokens + 1000000)
def test_sum_cached_prompt_tokens_multiple_calls(self):
"""Test that multiple calls accumulate correctly, ignoring None values."""
initial_tokens = self.token_process.cached_prompt_tokens
self.token_process.sum_cached_prompt_tokens(5)
self.token_process.sum_cached_prompt_tokens(None)
self.token_process.sum_cached_prompt_tokens(3)
self.assertEqual(self.token_process.cached_prompt_tokens, initial_tokens + 8)
def test_sum_cached_prompt_tokens_with_negative(self):
"""Test that negative values raise ValueError."""
with self.assertRaises(ValueError) as context:
self.token_process.sum_cached_prompt_tokens(-1)
self.assertEqual(str(context.exception), "Token count cannot be negative")