diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..624c00413 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: uv # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/docs/en/concepts/tasks.mdx b/docs/en/concepts/tasks.mdx index 3a8334bb1..9eba77f19 100644 --- a/docs/en/concepts/tasks.mdx +++ b/docs/en/concepts/tasks.mdx @@ -60,6 +60,7 @@ crew = Crew( | **Output Pydantic** _(optional)_ | `output_pydantic` | `Optional[Type[BaseModel]]` | A Pydantic model for task output. | | **Callback** _(optional)_ | `callback` | `Optional[Any]` | Function/object to be executed after task completion. | | **Guardrail** _(optional)_ | `guardrail` | `Optional[Callable]` | Function to validate task output before proceeding to next task. | +| **Guardrails** _(optional)_ | `guardrails` | `Optional[List[Callable] | List[str]]` | List of guardrails to validate task output before proceeding to next task. | | **Guardrail Max Retries** _(optional)_ | `guardrail_max_retries` | `Optional[int]` | Maximum number of retries when guardrail validation fails. Defaults to 3. | @@ -223,6 +224,7 @@ By default, the `TaskOutput` will only include the `raw` output. A `TaskOutput` | **JSON Dict** | `json_dict` | `Optional[Dict[str, Any]]` | A dictionary representing the JSON output of the task. | | **Agent** | `agent` | `str` | The agent that executed the task. | | **Output Format** | `output_format` | `OutputFormat` | The format of the task output, with options including RAW, JSON, and Pydantic. The default is RAW. | +| **Messages** | `messages` | `list[LLMMessage]` | The messages from the last task execution. | ### Task Methods and Properties @@ -341,7 +343,11 @@ Task guardrails provide a way to validate and transform task outputs before they are passed to the next task. This feature helps ensure data quality and provides feedback to agents when their output doesn't meet specific criteria. -Guardrails are implemented as Python functions that contain custom validation logic, giving you complete control over the validation process and ensuring reliable, deterministic results. +CrewAI supports two types of guardrails: + +1. **Function-based guardrails**: Python functions with custom validation logic, giving you complete control over the validation process and ensuring reliable, deterministic results. + +2. **LLM-based guardrails**: String descriptions that use the agent's LLM to validate outputs based on natural language criteria. These are ideal for complex or subjective validation requirements. ### Function-Based Guardrails @@ -355,12 +361,12 @@ def validate_blog_content(result: TaskOutput) -> Tuple[bool, Any]: """Validate blog content meets requirements.""" try: # Check word count - word_count = len(result.split()) + word_count = len(result.raw.split()) if word_count > 200: return (False, "Blog content exceeds 200 words") # Additional validation logic here - return (True, result.strip()) + return (True, result.raw.strip()) except Exception as e: return (False, "Unexpected error during validation") @@ -372,6 +378,147 @@ blog_task = Task( ) ``` +### LLM-Based Guardrails (String Descriptions) + +Instead of writing custom validation functions, you can use string descriptions that leverage LLM-based validation. When you provide a string to the `guardrail` or `guardrails` parameter, CrewAI automatically creates an `LLMGuardrail` that uses the agent's LLM to validate the output based on your description. + +**Requirements**: +- The task must have an `agent` assigned (the guardrail uses the agent's LLM) +- Provide a clear, descriptive string explaining the validation criteria + +```python Code +from crewai import Task + +# Single LLM-based guardrail +blog_task = Task( + description="Write a blog post about AI", + expected_output="A blog post under 200 words", + agent=blog_agent, + guardrail="The blog post must be under 200 words and contain no technical jargon" +) +``` + +LLM-based guardrails are particularly useful for: +- **Complex validation logic** that's difficult to express programmatically +- **Subjective criteria** like tone, style, or quality assessments +- **Natural language requirements** that are easier to describe than code + +The LLM guardrail will: +1. Analyze the task output against your description +2. Return `(True, output)` if the output complies with the criteria +3. Return `(False, feedback)` with specific feedback if validation fails + +**Example with detailed validation criteria**: + +```python Code +research_task = Task( + description="Research the latest developments in quantum computing", + expected_output="A comprehensive research report", + agent=researcher_agent, + guardrail=""" + The research report must: + - Be at least 1000 words long + - Include at least 5 credible sources + - Cover both technical and practical applications + - Be written in a professional, academic tone + - Avoid speculation or unverified claims + """ +) +``` + +### Multiple Guardrails + +You can apply multiple guardrails to a task using the `guardrails` parameter. Multiple guardrails are executed sequentially, with each guardrail receiving the output from the previous one. This allows you to chain validation and transformation steps. + +The `guardrails` parameter accepts: +- A list of guardrail functions or string descriptions +- A single guardrail function or string (same as `guardrail`) + +**Note**: If `guardrails` is provided, it takes precedence over `guardrail`. The `guardrail` parameter will be ignored when `guardrails` is set. + +```python Code +from typing import Tuple, Any +from crewai import TaskOutput, Task + +def validate_word_count(result: TaskOutput) -> Tuple[bool, Any]: + """Validate word count is within limits.""" + word_count = len(result.raw.split()) + if word_count < 100: + return (False, f"Content too short: {word_count} words. Need at least 100 words.") + if word_count > 500: + return (False, f"Content too long: {word_count} words. Maximum is 500 words.") + return (True, result.raw) + +def validate_no_profanity(result: TaskOutput) -> Tuple[bool, Any]: + """Check for inappropriate language.""" + profanity_words = ["badword1", "badword2"] # Example list + content_lower = result.raw.lower() + for word in profanity_words: + if word in content_lower: + return (False, f"Inappropriate language detected: {word}") + return (True, result.raw) + +def format_output(result: TaskOutput) -> Tuple[bool, Any]: + """Format and clean the output.""" + formatted = result.raw.strip() + # Capitalize first letter + formatted = formatted[0].upper() + formatted[1:] if formatted else formatted + return (True, formatted) + +# Apply multiple guardrails sequentially +blog_task = Task( + description="Write a blog post about AI", + expected_output="A well-formatted blog post between 100-500 words", + agent=blog_agent, + guardrails=[ + validate_word_count, # First: validate length + validate_no_profanity, # Second: check content + format_output # Third: format the result + ], + guardrail_max_retries=3 +) +``` + +In this example, the guardrails execute in order: +1. `validate_word_count` checks the word count +2. `validate_no_profanity` checks for inappropriate language (using the output from step 1) +3. `format_output` formats the final result (using the output from step 2) + +If any guardrail fails, the error is sent back to the agent, and the task is retried up to `guardrail_max_retries` times. + +**Mixing function-based and LLM-based guardrails**: + +You can combine both function-based and string-based guardrails in the same list: + +```python Code +from typing import Tuple, Any +from crewai import TaskOutput, Task + +def validate_word_count(result: TaskOutput) -> Tuple[bool, Any]: + """Validate word count is within limits.""" + word_count = len(result.raw.split()) + if word_count < 100: + return (False, f"Content too short: {word_count} words. Need at least 100 words.") + if word_count > 500: + return (False, f"Content too long: {word_count} words. Maximum is 500 words.") + return (True, result.raw) + +# Mix function-based and LLM-based guardrails +blog_task = Task( + description="Write a blog post about AI", + expected_output="A well-formatted blog post between 100-500 words", + agent=blog_agent, + guardrails=[ + validate_word_count, # Function-based: precise word count check + "The content must be engaging and suitable for a general audience", # LLM-based: subjective quality check + "The writing style should be clear, concise, and free of technical jargon" # LLM-based: style validation + ], + guardrail_max_retries=3 +) +``` + +This approach combines the precision of programmatic validation with the flexibility of LLM-based assessment for subjective criteria. + ### Guardrail Function Requirements 1. **Function Signature**: diff --git a/lib/crewai/src/crewai/agent/core.py b/lib/crewai/src/crewai/agent/core.py index 7c2b96f71..1bd8dae48 100644 --- a/lib/crewai/src/crewai/agent/core.py +++ b/lib/crewai/src/crewai/agent/core.py @@ -119,6 +119,7 @@ class Agent(BaseAgent): _times_executed: int = PrivateAttr(default=0) _mcp_clients: list[Any] = PrivateAttr(default_factory=list) + _last_messages: list[LLMMessage] = PrivateAttr(default_factory=list) max_execution_time: int | None = Field( default=None, description="Maximum execution time for an agent to execute a task", @@ -538,6 +539,12 @@ class Agent(BaseAgent): event=AgentExecutionCompletedEvent(agent=self, task=task, output=result), ) + self._last_messages = ( + self.agent_executor.messages.copy() + if self.agent_executor and hasattr(self.agent_executor, "messages") + else [] + ) + self._cleanup_mcp_clients() return result @@ -1346,6 +1353,15 @@ class Agent(BaseAgent): def set_fingerprint(self, fingerprint: Fingerprint) -> None: self.security_config.fingerprint = fingerprint + @property + def last_messages(self) -> list[LLMMessage]: + """Get messages from the last task execution. + + Returns: + List of LLM messages from the most recent task execution. + """ + return self._last_messages + def _get_knowledge_search_query(self, task_prompt: str, task: Task) -> str | None: """Generate a search query for the knowledge base based on the task description.""" crewai_event_bus.emit( diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py index b258f3eaa..c7e1c78f2 100644 --- a/lib/crewai/src/crewai/crew.py +++ b/lib/crewai/src/crewai/crew.py @@ -809,6 +809,7 @@ class Crew(FlowTrackable, BaseModel): "json_dict": output.json_dict, "output_format": output.output_format, "agent": output.agent, + "messages": output.messages, }, "task_index": task_index, "inputs": inputs, @@ -1236,6 +1237,7 @@ class Crew(FlowTrackable, BaseModel): pydantic=stored_output["pydantic"], json_dict=stored_output["json_dict"], output_format=stored_output["output_format"], + messages=stored_output.get("messages", []), ) self.tasks[i].output = task_output diff --git a/lib/crewai/src/crewai/lite_agent.py b/lib/crewai/src/crewai/lite_agent.py index e91e6b98f..c9f2afc2e 100644 --- a/lib/crewai/src/crewai/lite_agent.py +++ b/lib/crewai/src/crewai/lite_agent.py @@ -358,6 +358,7 @@ class LiteAgent(FlowTrackable, BaseModel): pydantic=formatted_result, agent_role=self.role, usage_metrics=usage_metrics.model_dump() if usage_metrics else None, + messages=self._messages, ) # Process guardrail if set diff --git a/lib/crewai/src/crewai/lite_agent_output.py b/lib/crewai/src/crewai/lite_agent_output.py index 582f52cdd..4183dba1f 100644 --- a/lib/crewai/src/crewai/lite_agent_output.py +++ b/lib/crewai/src/crewai/lite_agent_output.py @@ -6,6 +6,8 @@ from typing import Any from pydantic import BaseModel, Field +from crewai.utilities.types import LLMMessage + class LiteAgentOutput(BaseModel): """Class that represents the result of a LiteAgent execution.""" @@ -20,6 +22,7 @@ class LiteAgentOutput(BaseModel): usage_metrics: dict[str, Any] | None = Field( description="Token usage metrics for this execution", default=None ) + messages: list[LLMMessage] = Field(description="Messages of the agent", default=[]) def to_dict(self) -> dict[str, Any]: """Convert pydantic_output to a dictionary.""" diff --git a/lib/crewai/src/crewai/llm/internal/meta.py b/lib/crewai/src/crewai/llm/internal/meta.py index 97977ad55..f91fad96d 100644 --- a/lib/crewai/src/crewai/llm/internal/meta.py +++ b/lib/crewai/src/crewai/llm/internal/meta.py @@ -99,7 +99,9 @@ class LLMMeta(ModelMetaclass): native_class = cls._get_native_provider(provider) if use_native else None if native_class and not is_litellm and provider in SUPPORTED_NATIVE_PROVIDERS: try: - kwargs_copy = {k: v for k, v in kwargs.items() if k not in ("provider", "model")} + kwargs_copy = { + k: v for k, v in kwargs.items() if k not in ("provider", "model") + } return native_class( model=model_string, provider=provider, **kwargs_copy ) @@ -114,7 +116,9 @@ class LLMMeta(ModelMetaclass): logging.error("LiteLLM is not available, falling back to LiteLLM") raise ImportError("Fallback to LiteLLM is not available") from None - kwargs_copy = {k: v for k, v in kwargs.items() if k not in ("model", "is_litellm")} + kwargs_copy = { + k: v for k, v in kwargs.items() if k not in ("model", "is_litellm") + } return super().__call__(model=model, is_litellm=True, **kwargs_copy) @staticmethod diff --git a/lib/crewai/src/crewai/task.py b/lib/crewai/src/crewai/task.py index 869419c25..dfb505d77 100644 --- a/lib/crewai/src/crewai/task.py +++ b/lib/crewai/src/crewai/task.py @@ -539,6 +539,7 @@ class Task(BaseModel): json_dict=json_output, agent=agent.role, output_format=self._get_output_format(), + messages=agent.last_messages, ) if self._guardrails: @@ -949,6 +950,7 @@ Follow these guidelines: json_dict=json_output, agent=agent.role, output_format=self._get_output_format(), + messages=agent.last_messages, ) return task_output diff --git a/lib/crewai/src/crewai/tasks/task_output.py b/lib/crewai/src/crewai/tasks/task_output.py index ba9f95c18..901604ac1 100644 --- a/lib/crewai/src/crewai/tasks/task_output.py +++ b/lib/crewai/src/crewai/tasks/task_output.py @@ -6,6 +6,7 @@ from typing import Any from pydantic import BaseModel, Field, model_validator from crewai.tasks.output_format import OutputFormat +from crewai.utilities.types import LLMMessage class TaskOutput(BaseModel): @@ -40,6 +41,7 @@ class TaskOutput(BaseModel): output_format: OutputFormat = Field( description="Output format of the task", default=OutputFormat.RAW ) + messages: list[LLMMessage] = Field(description="Messages of the task", default=[]) @model_validator(mode="after") def set_summary(self): diff --git a/lib/crewai/src/crewai/utilities/types.py b/lib/crewai/src/crewai/utilities/types.py index bc331a97e..a4627613d 100644 --- a/lib/crewai/src/crewai/utilities/types.py +++ b/lib/crewai/src/crewai/utilities/types.py @@ -1,6 +1,8 @@ """Types for CrewAI utilities.""" -from typing import Any, Literal, TypedDict +from typing import Any, Literal + +from typing_extensions import TypedDict class LLMMessage(TypedDict): diff --git a/lib/crewai/tests/agents/test_lite_agent.py b/lib/crewai/tests/agents/test_lite_agent.py index 1215c7804..824cd965a 100644 --- a/lib/crewai/tests/agents/test_lite_agent.py +++ b/lib/crewai/tests/agents/test_lite_agent.py @@ -238,6 +238,27 @@ def test_lite_agent_returns_usage_metrics(): assert result.usage_metrics["total_tokens"] > 0 +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_lite_agent_output_includes_messages(): + """Test that LiteAgentOutput includes messages from agent execution.""" + llm = LLM(model="gpt-4o-mini") + agent = Agent( + role="Research Assistant", + goal="Find information about the population of Tokyo", + backstory="You are a helpful research assistant who can search for information about the population of Tokyo.", + llm=llm, + tools=[WebSearchTool()], + verbose=True, + ) + + result = agent.kickoff("What is the population of Tokyo?") + + assert isinstance(result, LiteAgentOutput) + assert hasattr(result, "messages") + assert isinstance(result.messages, list) + assert len(result.messages) > 0 + + @pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.asyncio async def test_lite_agent_returns_usage_metrics_async(): diff --git a/lib/crewai/tests/cassettes/test_lite_agent_output_includes_messages.yaml b/lib/crewai/tests/cassettes/test_lite_agent_output_includes_messages.yaml new file mode 100644 index 000000000..c71e22690 --- /dev/null +++ b/lib/crewai/tests/cassettes/test_lite_agent_output_includes_messages.yaml @@ -0,0 +1,261 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"You are Research Assistant. You + are a helpful research assistant who can search for information about the population + of Tokyo.\nYour personal goal is: Find information about the population of Tokyo\n\nYou + ONLY have access to the following tools, and should NEVER make up tools that + are not listed here:\n\nTool Name: search_web\nTool Arguments: {''query'': {''description'': + None, ''type'': ''str''}}\nTool Description: Search the web for information + about a topic.\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: + you should always think about what to do\nAction: the action to take, only one + name of [search_web], just the name, exactly as it''s written.\nAction Input: + the input to the action, just a simple JSON object, enclosed in curly braces, + using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce + all necessary information is gathered, return the following format:\n\n```\nThought: + I now know the final answer\nFinal Answer: the final answer to the original + input question\n```"},{"role":"user","content":"What is the population of Tokyo?"}],"model":"gpt-4o-mini"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '1160' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.109.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.109.1 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jJM9b9swEIZ3/YoDZzvwd2xtRTI0HZolcIcqkGnqJLGheCx5Suoa/u+F + 5A/JTQp00XDPva/ui/sIQOhMxCBUKVlVzgzv5Lf7fDtbmcfRV1M9/l5/Wa/N6/r+7fNydycGjYK2 + P1DxWXWjqHIGWZM9YuVRMjau49vFZDleLVbzFlSUoWlkhePhjIaVtno4GU1mw9HtcLw8qUvSCoOI + 4XsEALBvv02dNsNfIobR4BypMARZoIgvSQDCk2kiQoagA0vLYtBBRZbRtqVvNpvEPpVUFyXH8AAW + MQMmCCi9KiEnD1wiGMkYGLTNyVeyaRI8FtJn2hZtgiNXmyOgHJ7oZUc3if2kmkh8ckvfcHuOwYN1 + NcewT8TPGv0uEXEiVO09Wv7IDCajyTQRh8RuNpt+Lx7zOshmnrY2pgektcStSTvF5xM5XOZmqHCe + tuEvqci11aFMPcpAtplRYHKipYcI4LndT301cuE8VY5TphdsfzeZnvYjurPo6HR5gkwsTU+1OIMr + vzRDltqE3oaFkqrErJN25yDrTFMPRL2u31fzkfexc22L/7HvgFLoGLPUecy0uu64S/PYvJp/pV2m + 3BYsAvpXrTBljb7ZRIa5rM3xlkXYBcYqzbUt0Duvjwedu3S+GMl8gfP5SkSH6A8AAAD//wMAJGbR + +94DAAA= + headers: + CF-RAY: + - 99c98dd3ddb9ce6c-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 11 Nov 2025 00:08:16 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=6maCeRS26vR_uzqYdtL7RXY7kzGdvLhWcE2RP2PnZS0-1762819696-1.0.1.1-72zCZZVBiGDdwPDvETKS_fUA4DYCLVyVHDYW2qpSxxAUuWKNPLxQQ1PpeI7YuB9v.y1e3oapeuV5mBjcP4c9_ZbH.ZI14TUNOexPUB6yCaQ; + path=/; expires=Tue, 11-Nov-25 00:38:16 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=a.XOUFuP.5IthR7ITJrIWIZSWWAkmHU._pM9.qhCnhM-1762819696364-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '1199' + openai-project: + - proj_xitITlrFeen7zjNSzML82h9x + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '1351' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-project-tokens: + - '150000000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-project-tokens: + - '149999735' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999735' + x-ratelimit-reset-project-tokens: + - 0s + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_50a8251d98f748bb8e73304a2548b694 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are Research Assistant. You + are a helpful research assistant who can search for information about the population + of Tokyo.\nYour personal goal is: Find information about the population of Tokyo\n\nYou + ONLY have access to the following tools, and should NEVER make up tools that + are not listed here:\n\nTool Name: search_web\nTool Arguments: {''query'': {''description'': + None, ''type'': ''str''}}\nTool Description: Search the web for information + about a topic.\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: + you should always think about what to do\nAction: the action to take, only one + name of [search_web], just the name, exactly as it''s written.\nAction Input: + the input to the action, just a simple JSON object, enclosed in curly braces, + using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce + all necessary information is gathered, return the following format:\n\n```\nThought: + I now know the final answer\nFinal Answer: the final answer to the original + input question\n```"},{"role":"user","content":"What is the population of Tokyo?"},{"role":"assistant","content":"```\nThought: + I need to search for the latest information regarding the population of Tokyo.\nAction: + search_web\nAction Input: {\"query\":\"current population of Tokyo 2023\"}\n```\nObservation: + Tokyo''s population in 2023 was approximately 21 million people in the city + proper, and 37 million in the greater metropolitan area."}],"model":"gpt-4o-mini"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '1521' + content-type: + - application/json + cookie: + - __cf_bm=6maCeRS26vR_uzqYdtL7RXY7kzGdvLhWcE2RP2PnZS0-1762819696-1.0.1.1-72zCZZVBiGDdwPDvETKS_fUA4DYCLVyVHDYW2qpSxxAUuWKNPLxQQ1PpeI7YuB9v.y1e3oapeuV5mBjcP4c9_ZbH.ZI14TUNOexPUB6yCaQ; + _cfuvid=a.XOUFuP.5IthR7ITJrIWIZSWWAkmHU._pM9.qhCnhM-1762819696364-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.109.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.109.1 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFPLbtswELz7KxY8W4Es+RHr1ifgQw8F3OZQBxJDrSTWFJcgqSRG4H8v + KD+kNCnQCwFyZpazs+TLBIDJkmXARMO9aI2KPvG7z81d8mWf4E/cxN9+PK5SvfkYf08Pe8+mQUEP + v1H4i+pGUGsUekn6BAuL3GOoOlstk9vZerle9UBLJaogq42P5hS1UssoiZN5FK+i2e1Z3ZAU6FgG + vyYAAC/9GnzqEp9ZBvH0ctKic7xGll1JAMySCieMOyed5/rk+QwK0h51b70oip3eNtTVjc9gA5qe + YB8W3yBUUnMFXLsntDv9td996HcZbBsEQ6ZTPLQMVMGW9gcCqSGJkxSkA26MpWfZco/qAMkMWqlU + IBskozBQwy1C+gMYSwYtcF1CuroSz4y6j9JCi96SISU918At8pudLopi3JrFqnM8xKs7pUYA15p8 + 77UP9f6MHK8xKqqNpQf3l5RVUkvX5Ba5Ix0ic54M69HjBOC+H1f3agLMWGqNzz3tsb8ujdNTPTa8 + kgGdX0BPnquRar6cvlMvL9Fzqdxo4Exw0WA5SIfXwbtS0giYjLp+6+a92qfOpa7/p/wACIHGY5kb + i6UUrzseaBbDJ/oX7Zpyb5g5tI9SYO4l2jCJEiveqfN3dAfnsc0rqWu0xsrT+65MvljGvFriYrFm + k+PkDwAAAP//AwDgLjwY7QMAAA== + headers: + CF-RAY: + - 99c98dde7fc9ce6c-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 11 Nov 2025 00:08:18 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '1339' + openai-project: + - proj_xitITlrFeen7zjNSzML82h9x + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '1523' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-project-tokens: + - '150000000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-project-tokens: + - '149999657' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999657' + x-ratelimit-reset-project-tokens: + - 0s + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_ade054352f8c4dfdba50683755eba41d + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/test_task_output_includes_messages.yaml b/lib/crewai/tests/cassettes/test_task_output_includes_messages.yaml new file mode 100644 index 000000000..5f9f33fe8 --- /dev/null +++ b/lib/crewai/tests/cassettes/test_task_output_includes_messages.yaml @@ -0,0 +1,423 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"You are Researcher. You''re an + expert researcher, specialized in technology, software engineering, AI and startups. + You work as a freelancer and is now working on doing research and analysis for + a new customer.\nYour personal goal is: Make the best research and analysis + on content about AI and AI agents\nTo give my best complete final answer to + the task respond using the exact following format:\n\nThought: I now can give + a great answer\nFinal Answer: Your final answer must be the great and the most + complete as possible, it must be outcome described.\n\nI MUST use these formats, + my job depends on it!"},{"role":"user","content":"\nCurrent Task: Give me a + list of 3 interesting ideas about AI.\n\nThis is the expected criteria for your + final answer: Bullet point list of 3 ideas.\nyou MUST return the actual complete + content as the final answer, not a summary.\n\nYou MUST follow these instructions: + \n - Include specific examples and real-world case studies to enhance the credibility + and depth of the article ideas.\n - Incorporate mentions of notable companies, + projects, or tools relevant to each topic to provide concrete context.\n - Add + diverse viewpoints such as interviews with experts, users, or thought leaders + to enrich the narrative and lend authority.\n - Address ethical, social, and + emotional considerations explicitly to reflect a balanced and comprehensive + analysis.\n - Enhance the descriptions by including implications for future + developments and the potential impact on society.\n - Use more engaging and + vivid language that draws the reader into each topic''s nuances and importance.\n + - Include notes or summaries that contextualize each set of ideas in terms of + relevance and potential reader engagement.\n - In future tasks, focus on elaborating + initial outlines into more detailed and nuanced article proposals with richer + content and insights.\n\nBegin! This is VERY important to you, use the tools + available and give your best Final Answer, your job depends on it!\n\nThought:"}],"model":"gpt-4.1-mini"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '2076' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.109.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.109.1 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA1xXS44kxw3d6xREbwwMqgsaWWPJvSvNT21roIE0kAx5NswIViWnI4MpMiKra7TR + IbTxSbz3UXQSgxFZ1S1tGuiMH/n43iPrl08Arjhe3cBVGLGEaU7Xz/HHL3/YfTz+9GJ69sPXL8sr + e/bT9JMefv4Ql39dbfyEDB8olPOpbZBpTlRYcl8OSljIb336xd8++/Lp3//6xbO2MEmk5McOc7n+ + fPv0euLM1599+tmz608/v376+Xp8FA5kVzfw708AAH5pfz3QHOn+6gY+3Zy/TGSGB7q6uWwCuFJJ + /uUKzdgK5nK1eVgMkgvlFvu7UephLDdwC1mOEDDDgRcChIMnAJjtSPo+v+KMCXbtv5v3+X2+hidP + drfXL5QXyvCW1CRj4o8U4WvCVMaASjfwHS2SqsPCHzkf4C0Wplzg21qCTGTwblSPAN4qRQ7F395l + TKfCwZ48eZ8B3o1swJEQ6H5OomQwyhF2t8AGRTHbXnTyy8fLwzCcgDIOqX3mw5hOwDnywrGuURbP + b/JY5oTZYECjCJJhXkOMWBAwR5gfQmvFsy28EgXODmygDXjtMTMZJL4juP3qDfyIxSSvUMCIC0Gi + hRQPFD30IoCe5keCyW/HBEpBNNoGDpRl4mCb9npInNt6UcZkfrIgJ1EvViCFMpLi7K/XzD9XSiew + ysVTFCAM4zmjLbwgmt9wjr//+h/zKOxkhSYY0cBGOWaYVSY2As6XrPMB7jhmcgA/VD0BoabTBgwX + X0u8kEFZqzirYEeKcyFdKHvpt3Db/mM6GhzZAXmo1KyyJzN2+ljHu4droLQQJhikjEC5jNUYbYK9 + KOxuWw6zOJEZU4dKHJBsgIPUArPyguHUlloxjUJVLqdNpwQfxpYelbEBHDnRNKF59iPm2MhjlI3X + jJxn2BP6XgJjchyK09NG3hcIUlMEpVgDQWSbUbl4YfzCh4wxBDKDoRbAZALKdmdA9xhIB2whcaaf + najlBLzvfBFHRwlqpoVyOvkDRXmoheIWXk5SGoQe0gXAgTLtucBeZYJRZmpY8DSrLOTElkMW4x7L + 5Hj0iOh+JmXKwQ/cM5UTyEIKWItMbmsQKbDX7HrCO86HLbyqpSr53YlDA8nTDqlGgt3t9SxHUoqw + sJaKaYUDLvbUK6+E6brw5ELLXEQdi0aYI6HikAgGFi+JqG1WtvoeBJtQC+kGJlECJZslm5ftEfAU + ZGV8GfFcLrovlCMk3lPLO7ioO2nOJZB9Xz4kGRzhbfekIjMHNyGlRAvm0g5RPuChxWTABYrUMLpl + 1QkzHCml64HacgGEfc0R3YQwNX/oPOa8cCG/FyNpk3zwbGJXu2tLHWSCAZObAAxUjkQZCoUxS5JD + T6Axu9GvmQxn2l68uxbJMkltRrA7NL5whudui47bbY7VCUZ2Ay/vZ8zRo/5KPGL/6qjstGzgTTUO + 3au+L6KnQsmV82fzjpSWJoUi/iL2F5thfjtTXgX9YvfNN//778um8YXNqYL+yD/qHUUKd+2ZR/v9 + +yD3bf/kgTQzFpes5B5URtWe0oEyKRZRA6vB2eeRvKj5QL75D70knHGYVVyuZF51MnpQYxgxJcoH + 8pORu/4gSye/7F0uo6iNPHd76lc6o4YTBEkJB9Eu+O6KjSOoha0YtBYTaaYcKZd0OoffousdfAvf + uWKOoilCQCOwUmN3nC69H1Ezaa/RKmun2+XJ3e117C28ozerxBo8gxays+11RY2MGeY6JLbRz+5u + r9do3EaEinpHmFHdDPrGAJEGLOSwtRkD0krnVnOnQEsUvqM933nTj+IebKHapcNTHp3f9lCOUZQ/ + SrYNHEdOBHZHs88KcBTV0+r8HnbkPnd4ITqwdPZIB0J7z+kN59JFPsjQjDthoDYbOK/Wgmzh5aVR + tCBbmfcSqvnYEGQ+qVvoBuSYqZV9cwHRqeBvhNVP3BIb1BQ98jt73FIe5BFpkuA1/3hpG24GZzA6 + D5t546lbd5Bpksj7k4fdKPso7+jFb2lzXqTpEZ0vDvFffABxLCX//utvR8nuNh7+Hi1wbpYOC6lV + O4PONnWbefSABcmZQrl0hKKUo9u7H5jdtEzmka00B2vD0IMUXJrNm6s/750hQqipVMXUSX9fHj/p + TeBcvHTy1kt7zs1etQAXo7TfPjKhgHPhBR+baxtUyDwUzq1W+2orb/5kp77YqWQzK3ul93vS3oCU + w/jIaxrGWRa8aIldxlxOFws+02l3C6+9vWan+g18VTk1r33nfjSjUi4b2IUgNZfWA79vTcw6OeGd + VitH0TKeVswfWW/vURijNg9rKVR1ckEmiu2KveJEjYRA2ap2gUP0fiRzkwEmPmR77FILpkq2AZrm + Ea0P9+UScDhtnK89Yk5t5upcYs1ktoXnf5yZX4scEl0G1Lb5DQcVk33p8zOZZ882dtKsvW0QbENz + dwPOXLgV4MHie7dowmj9+DIcOJQtzznJafqznVqgjMry4KXOPx+c1Fr784Fo4ParYY8u3TbBH3Jr + BOtobVB9fThBwiOQT5DdWDZA9+Sz0p77uru3rbLsDv8HfJ4nwjZAw+52A4p97DEJTD47YEKdVvOz + qgtx6oNBm33ZgvK0anjr3Zz03Hvf8ZS5wGsatLbd/3SJPlc87kXbiLiw5+6TrJN1JjUfkhrEkhsC + 7ZwVraEL1X8ouL7degaKsUvr8nvDf9jERUJvZfvW50KqbVqLZHzI6zB4qOkccpfKZd7utJ5VhpXT + k2grwFpZPs9tzSrdVVbnquaaX8X8fwAAAP//jFjNbtwgEL7vUyBfcmkr7SZt95pjpLxCZBF7sFEw + EMCtcth3j74BL3jbSj2PjT3AfH+squh9ZfwrhN2Iok3jxr3cp0B3UQBLjLn5++k6xs1JjhrfBjXL + N5qd2SSdh72xgO4wafbOW7M7LZ+5NGHIBbg3b3sdtcQ3e7VFdXNvi056khv7KZKBRaospvDxSSw6 + rpGgMW4p75t4do5pXM4kR+64zh6jgVMZNfOFyghWTsuFD8GwjamsGhToTSFpdfUGIKxXQgYgvP7l + kjRfduhTrEv2PHGWMA+vwcnRZCzOpgnZyQI7v5PkMQVnJ+YDpBJAe0auDfKLT6SxkQsYJffVO1Pu + uV68HFKm6p0oL/6WmW7FuWLYZ+kzC6hMer9xS1r6oIUdUBRB4gaB5GwmuUXbzR6AHNqcJpBao0RY + ZFdjmoK01qW8kUiIXkrlcs2EjJswHPHm1Q7kGOc+kIzOIv+JyfmOq5eDEC+cPa27OKmDy/KpT+6N + +HP355I9dTXzqtWfD/elmnCotXA8nrbKbsV+JOQZscmvukEOM4313Rp2Qa64pnBo+v7zf/62du5d + 2+l/lq+FAdqIxn6LRdqe62OBEAr+67HrPvMPdxGRyEB90hRwFiMpuZqc1HUZKnuFiQ8+6BzXKd8/ + DKfz96M6/zh1h8vhEwAA//8DAJPMJFq9FAAA + headers: + CF-RAY: + - 99c98602dfefcf4d-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 11 Nov 2025 00:03:08 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=ObqPLq12_9tJ06.V1RkHCM6FH_YGcLoC2ykIFBEawa8-1762819388-1.0.1.1-l7PJTVbZ1vCcKdeOe8GQVuFL59SCk0xhO_dMFY2wuH5Ybd1hhM_Xcv_QivXVhZlBGlRgRAgG631P99JOs_IYAYcNFJReE.3NpPl34VfPVeQ; + path=/; expires=Tue, 11-Nov-25 00:33:08 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=kdn.HizdlSPG7cBu_zv1ZPcu0jMwDQIA4H9YvMXu6a0-1762819388587-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '13504' + openai-project: + - proj_xitITlrFeen7zjNSzML82h9x + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '13638' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-project-tokens: + - '150000000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-project-tokens: + - '149999507' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999507' + x-ratelimit-reset-project-tokens: + - 0s + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2de40e1beb5f42ea896664df36e8ce8f + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are Researcher. You're + an expert researcher, specialized in technology, software engineering, AI and + startups. You work as a freelancer and is now working on doing research and + analysis for a new customer.\\nYour personal goal is: Make the best research + and analysis on content about AI and AI agents\\nTo give my best complete final + answer to the task respond using the exact following format:\\n\\nThought: I + now can give a great answer\\nFinal Answer: Your final answer must be the great + and the most complete as possible, it must be outcome described.\\n\\nI MUST + use these formats, my job depends on it!\"},{\"role\":\"user\",\"content\":\"\\nCurrent + Task: Summarize the ideas from the previous task.\\n\\nThis is the expected + criteria for your final answer: A summary of the ideas.\\nyou MUST return the + actual complete content as the final answer, not a summary.\\n\\nThis is the + context you're working with:\\n- **AI-Driven Personalized Healthcare: Revolutionizing + Patient Outcomes Through Predictive Analytics**\\n This idea explores how AI + is transforming healthcare by enabling highly individualized treatment plans + based on patient data and predictive models. For instance, companies like IBM + Watson Health have leveraged AI to analyze medical records, genomics, and clinical + trials to tailor cancer therapies uniquely suited to each patient. DeepMind\u2019s + AI system has shown promise in predicting kidney injury early, saving lives + through proactive intervention. Interviews with healthcare professionals and + patients reveal both enthusiasm for AI\u2019s potential and concerns about privacy + and data security, highlighting ethical dilemmas in handling sensitive information. + Socially, this shift could reduce disparities in healthcare access but also + risks exacerbating inequality if AI tools are unevenly distributed. Emotionally, + patients benefit from hope and improved prognosis but might also experience + anxiety over automated decision-making. Future implications include AI-powered + virtual health assistants and real-time monitoring with wearable biosensors, + promising a smarter, more responsive healthcare ecosystem that could extend + life expectancy and quality of life globally. This topic is relevant and engaging + as it touches human well-being at a fundamental level and invites readers to + consider the intricate balance between technology and ethics in medicine.\\n\\n- + **Autonomous AI Agents in Creative Industries: Expanding Boundaries of Art, + Music, and Storytelling**\\n This idea delves into AI agents like OpenAI\u2019s + DALL\xB7E for visual art, Jukedeck and OpenAI\u2019s Jukebox for music composition, + and narrative generators such as AI Dungeon, transforming creative processes. + These AI tools challenge traditional notions of authorship and creativity by + collaborating with human artists or independently generating content. Real-world + case studies include Warner Music experimenting with AI-driven music production + and the Guardian publishing AI-generated poetry, sparking public debate. Thought + leaders like AI artist Refik Anadol discuss how AI enhances creative horizons, + while skeptics worry about the dilution of human emotional expression and potential + job displacement for artists. Ethical discussions focus on copyright, ownership, + and the authenticity of AI-produced works. Socially, AI agents democratize access + to creative tools but may also commodify art. The emotional dimension involves + audiences' reception\u2014wonder and fascination versus skepticism and emotional + disconnect. Future trends anticipate sophisticated AI collaborators that understand + cultural context and emotions, potentially redefining art itself. This idea + captivates readers interested in the fusion of technology and the human spirit, + offering a rich narrative on innovation and identity.\\n\\n- **Ethical AI Governance: + Building Transparent, Accountable Systems for a Trustworthy Future**\\n This + topic addresses the urgent need for frameworks ensuring AI development aligns + with human values, emphasizing transparency, accountability, and fairness. Companies + like Google DeepMind and Microsoft have established AI ethics boards, while + initiatives such as OpenAI commit to responsible AI deployment. Real-world scenarios + include controversies over biased facial recognition systems used by law enforcement, + exemplified by cases involving companies like Clearview AI, raising societal + alarm about surveillance and discrimination. Experts like Timnit Gebru and Kate + Crawford provide critical perspectives on bias and structural injustice embedded + in AI systems, advocating for inclusive design and regulation. Ethically, this + topic probes the moral responsibility of creators versus users and the consequences + of autonomous AI decisions. Socially, there's a call for inclusive governance + involving diverse stakeholders to prevent marginalization. Emotionally, public + trust hinges on transparent communication and mitigation of fears related to + AI misuse or job displacement. Looking ahead, the establishment of international + AI regulatory standards and ethical certifications may become pivotal, ensuring + AI benefits are shared broadly and risks minimized. This topic strongly resonates + with readers concerned about the socio-political impact of AI and invites active + discourse on shaping a future where technology empowers rather than undermines + humanity.\\n\\nYou MUST follow these instructions: \\n - Include specific examples + and real-world case studies to enhance the credibility and depth of the article + ideas.\\n - Incorporate mentions of notable companies, projects, or tools relevant + to each topic to provide concrete context.\\n - Add diverse viewpoints such + as interviews with experts, users, or thought leaders to enrich the narrative + and lend authority.\\n - Address ethical, social, and emotional considerations + explicitly to reflect a balanced and comprehensive analysis.\\n - Enhance the + descriptions by including implications for future developments and the potential + impact on society.\\n - Use more engaging and vivid language that draws the + reader into each topic's nuances and importance.\\n - Include notes or summaries + that contextualize each set of ideas in terms of relevance and potential reader + engagement.\\n - In future tasks, focus on elaborating initial outlines into + more detailed and nuanced article proposals with richer content and insights.\\n\\nBegin! + This is VERY important to you, use the tools available and give your best Final + Answer, your job depends on it!\\n\\nThought:\"}],\"model\":\"gpt-4.1-mini\"}" + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '6552' + content-type: + - application/json + cookie: + - __cf_bm=ObqPLq12_9tJ06.V1RkHCM6FH_YGcLoC2ykIFBEawa8-1762819388-1.0.1.1-l7PJTVbZ1vCcKdeOe8GQVuFL59SCk0xhO_dMFY2wuH5Ybd1hhM_Xcv_QivXVhZlBGlRgRAgG631P99JOs_IYAYcNFJReE.3NpPl34VfPVeQ; + _cfuvid=kdn.HizdlSPG7cBu_zv1ZPcu0jMwDQIA4H9YvMXu6a0-1762819388587-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.109.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.109.1 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA1xXzY4bxxG++ykKexRIQlKcRN7b2pIcBrItSwIUIL4Ue4ozJfZ0tau6yR35oofw + JU+Sex5FTxJUz3BJ6bLAcqa66+f7qfnjG4Ab7m5u4SYMWMKY4/oHfP/sw1v99WM3vvzX8x9//3mr + rx4f3te85WflZuURsvtAoZyjNkHGHKmwpPlxUMJCfuqTv//t6bMn3/3l2XftwSgdRQ/rc1l/u3my + Hjnx+unjp39dP/52/eTbJXwQDmQ3t/DvbwAA/mh/PdHU0f3NLTxenX8ZyQx7url9eAngRiX6Lzdo + xlYwzUkvD4OkQqnl/m6Q2g/lFraQ5AQBE/R8JEDovQDAZCdSgN/SS04Y4a79f+s//JYePbrbrp8r + HynBa1KThJE/Ugf/IIxlCKh0C2/oKLF6Y/gjpx5eY2FKBX6pJchIBu8G9RzgtVLHofjtdwnjVDjY + o0d+07uBDbgjhECpkBpIgjIQFMVke9ERW1gWz1X2cLcFTjA8ZLGC00BKkC9X4PkKoIS7SDBMmXSd + r8so3oLRk80Rk0FBjqL+QNr1nIpywELQkT8zv5tTx0fuKkbIS6kdFtzAa5ZEpN4DBwsmJoPIB4Lt + 9z/Beywmaekc1MKeghdSBIz3BcrSpiNagdHrwAhKQbSzFfSUZPRq/C6jYivA1EGInNqLRRkj7D25 + 1NsKguK+eCo7siwH8skHUi9LMXtmZfDxR+6Tty2wUZzgxGUAPBf2+dN/DGri36u3VvYcaQNveeSI + GqcVPCfKP3Hq2nvYHf2KzmuyyQqNBgMevXejJCvqdIHIe1obHluXMOOOI5cJdhNgCNXfiRPsRSmg + tfwx1EJw4C7RBJw+VPXkcSDsfBo2jbnIaIA5E3rzV4AxysljsxKNucHh3FB2fB0pOV43sE3G/VD2 + 9fyA6WRzEy7ggqwYCnuEQ9PbvvTHYOB+iH4EIDRIJFRtYP386c/TwJFgxDQBjTvF4PNuvcoqIxt5 + ocBjVjlSB3ImjN8wSKZVmxaB52DcJ95zwFQgiI8yGeBO6ow+yMpHDFOLpTK0Wq3QCbWzgXNrFSXj + r5qxcMt70Uh4Di3+riSoqSO1IOppwYgfRMEkMBWM0NEOC902qmRxxXEMzvQsAjvlrqfrRmIIZAY9 + ZoMjqVVrscp2mOFYE/3uXezYivKuqcrDgRINRpzgJGqUgO55RkjHllG5MNkGXoziQRgdnw9jcoXs + wPEOQcVMBTv7/OnPHSXac2nH7FVG2FEppD6fPokts/hCM2a5mahAr5hz9NAGmJoIjUCOpIC1yNjw + flV95yRjSXZpVpzAanbxaSkMdcQEH2rXuyht4JXIoaUm6oNseABKx3YMdbCvparLVIi1I4O77bpJ + JHVwZC3eyfl+eLCJuaIToTZR3LE4LEQNOop8nOVLCeO68EgwSuIiM61cOWLXOAk2ohbSFYyi5IBM + FL4ql4LMMrCBtzUMwCnJsWHtgv8iQGlw2YA+yg5j0weg+0yhYFrw7JBwkZB9e7yCEVtbiiO2SObQ + 2JuagilFOjpLGhMuaAAlH6I/MOACRWoYaAag49tPn/t/ohjXO2qFusRijNaYipDqLHE7jC3pHZUT + UYJQG4bW5IAvFIYkUfo5+zPZGrVss/hqLZJklOpDg7u+oZQT/OCO5BTdpq46B8hu4cV9xqbr8L3U + 1GFTQGeFlhX8VI3DbAZvi+hUKDomr4y1yUUu3tXYiDzIqSH0kgDOCfjYlGzA3OT5nMueKboHhQFj + pNSfO4O14bSNQrHjudWQZB6y7M9H+PDmgDKIuh5t4F3jszky0OCXTGnRxud3r179778v3PHI/aCZ + 6VwUHNkc1Khlpmuh+4Zyx1N2T1wk13vS4sRaVu7vxbVuMeR/1gN1FA4tq6u7/fed3F91J05zEQSi + 3LcVyST1toGfz0oPrSVnr7/bwvOaepIEerUZEdjVdJrfnV2qmxKOPsNmQTivLzlKgY6OFCXPG8pC + 7N0Ed9sNvHGKnkRjB02FwsIsjrE2r30giA2+XSwaAe9RE+mMmlayc03Zr8AI1eii31mlq4F87Fr8 + rjJMS2uLYjjMUvJuIPixonaMqR24k9hBrrtzTvOB6/M4O8hCRacVuGo3HrvuxGmJcTkPUtXI7Xkf + 62Irx7YvP/QYtbAVeEN7Pvg62UkEGvOA5r12hN9tvbbG4DOQB1H+KMlW8054kdz5NGt7n6+cgTxM + 0rIoNF0a+QKw4HYTDALOFtXMSylyUwVJfrlbFal0dFGhRgA/MpwZ4d4VfTF4yNH3LDJr77ufLYY8 + e61T1CUAguRJfe9YgZx8LRk4rx449nDFLIu8n5xjXwzhJHqwB0bTxU09JEYKZ15l0jLBXnGkFrOB + txJ49tcykNHFnH3RCw69j3TF/BVIptSALu4zrqQ7N2D3WW984DwDZVfLF9YYZByl4/3UBEcdxEVg + RLP1gs22NOVavjZ+rB1TcrwoenVNLGakUwrtuBP5PuDqn+RIcUnYKeQL+YGyJ2bj3NO2OzVqXM0y + MqVld3o5G/EVX50cS2nNoJXQOPVu+JIH73Voc7jbQpAYcSeKxdvTluJI85dGIc1KbS6hxlIVI7SP + u/uv/G1xptUX/bss0SfHJ/tnBc4FLpqGWpbFb7bRgL4tN6Ap+YRs/gbq66w83q59tYXVV0bXNiB/ + OvPJMiuXlS/gtjgEwh4tcJqV6WFNPg+h1WoUzopx2RZW/mWYSkOS1zzn0Pot+4W5HHy6OvNmsdgX + Zfg/AAAA//+MWctu5DYQvPsriDnLRmxsAiM3Z5E1jE1O8XUx4FAtDdcUKfMxGwfwvwfVTUmciQ85 + GQZHlNiP6qoid87Dk3oEKfMI0K/qt2KFxDyj0WcdyedOPRgTis8c+b+qeJGR/xxLyhX8JM3taGUc + AF8+0oRDnChlO3IA+VTTTPWc2C2GQ0m5aSZFPhWmXA9PZ2jP2ECzC2/yL8s0DjJzFYnySbtC2wzN + 64EMQrWciAWWhG7QNnpK6Ub9QZqDgBQqju4qVh9DGB2t2o4f/NOCNochi6KbRelK+QqvUYegWagK + QIY4am//qR3F+8qYbRQF97fN0i05gHnMwSeLHOCHmADNmEPdQyjFZCl1daDxLLU6gQxrwBIr5tHL + 1F9kqERSStjpH4qgewxJaEcgQmX6F7r9syPNmlA9PHU8WicUMHFueyBLZJqTSjyRdcIJ8YmRNHLi + +/oJdaxFy89zUY/anXS1TOrkq7oOHcmmjXK1B5cMP9vJ26we6RAL7/4VH/M56h9DiD3Q+mR7hhub + UHNcnq+okeAhQanvqVfajSHafMRXIXZrV6UcixGQgdBGW1GC+pkpF0YrJh8dpH4w0sisYJEKvLBT + 9FqsdFFPkKy8d6SxOKDbm5rIHLW3aWpGm0HSe+4TFAuzJgCDTDrIEk/ysrVCqmlQ2TcwFHgWqjon + 31+XtGj1C5mGt9FrkekAADkjwkvFVIVhp1kbtgdW8XYx/za60giFbazhzOOKPoq9wWq9WG9CnANT + 3B7KKyED+oWOwWE2VsLDRIxARNSkIzPQ2lf1rAlIiM5eDYQb9QXzTvtmPkDDQlRxliIdFhRkhaKt + z9r6phQzUA99Q74XN25DS+7b4iu96xQNg2ysJsvgt4n2yaaSqBKTvmeA9qP6Hg4r86lw97clEfCL + 5mWHpyrehJKy6ci/XQajNJIAfFNhLPUBRWdWiKGY2T6RGhOzKKnZngJ4bw5qLDpqn4kkO1UQVINA + pBGzFve2uRMk6Ih1eBhpC4V7U7B9J1gGZxO2J5pXMYoxIY7bylcqBmBnNnfqd6yeC/sRwdWxI/UJ + MDzZ6pYtikSPElrr1SLo9DI3xSxtxjdNLC+SDBb0VtTwnhCLagJNUh8283i9vr7Gn98Bc2zc2qQO + 2rwIRuAQkTKJkVDhW3N946Cpg0ZklFgBtxN6lhXgdg7WLw4nVCvI7CVMgItJcjuODv6eU6Ieqqb2 + LFQKJyAx3VqTVAmK0uoEU7dTU3HZDtoQm5Xa98nouYqixbobGJisiBMDWwue0pkd3dJfBqEVA5pk + NRQrqGjNcaNF3HMtBzqHPtm0ZQErB2XNIzaTCcXBJIqcSqokkyptDyU7lq3r61Ha7HOj+oBgjuXI + HJJm63sQdwglTEB99k6lzxZCb6dGiw6rWfh2015PRBpK0rgj8cW5ZkF71AU/jIuRb3Xlfb0KcWGc + Yziki0d3g/U2Hfcg2cHj2iPlMO949f1KqW985VLOblF24hnsc3ghft3t7e0n2XC33fU0yz+tyxmY + sa3c3d7ddx/sua+XBs3Fzc5oc6R+e3a75QEEhGbhqjn5fz/oo73l9NaP/2f7bcHAHKJ+v9ydtIfe + fhbpOzt8H/9sjTR/8C7BSTe0z5YistHToIur92oyYveDBX2ao5V7qmHefzJ39z/fDve/3O2u3q/+ + BQAA//8DAPcawNa2GwAA + headers: + CF-RAY: + - 99c9865b6af3cf4d-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 11 Nov 2025 00:03:32 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '22788' + openai-project: + - proj_xitITlrFeen7zjNSzML82h9x + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '22942' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-project-tokens: + - '150000000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-project-tokens: + - '149998392' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998392' + x-ratelimit-reset-project-tokens: + - 0s + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_48c359c72cdc47aeb89c6d6eeffdce7d + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/test_crew.py b/lib/crewai/tests/test_crew.py index 1a1db50af..d4cf1acbf 100644 --- a/lib/crewai/tests/test_crew.py +++ b/lib/crewai/tests/test_crew.py @@ -340,7 +340,7 @@ def test_sync_task_execution(researcher, writer): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -412,7 +412,7 @@ def test_manager_agent_delegating_to_assigned_task_agent(researcher, writer): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -513,7 +513,7 @@ def test_manager_agent_delegates_with_varied_role_cases(): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) task.output = mock_task_output @@ -611,7 +611,7 @@ def test_crew_with_delegating_agents_should_not_override_task_tools(ceo, writer) ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -669,7 +669,7 @@ def test_crew_with_delegating_agents_should_not_override_agent_tools(ceo, writer ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -788,7 +788,7 @@ def test_task_tools_override_agent_tools_with_allow_delegation(researcher, write ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # We mock execute_sync to verify which tools get used at runtime @@ -1225,7 +1225,7 @@ async def test_async_task_execution_call_count(researcher, writer): # Create a valid TaskOutput instance to mock the return value mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Create a MagicMock Future instance @@ -1784,7 +1784,7 @@ def test_hierarchical_kickoff_usage_metrics_include_manager(researcher): Task, "execute_sync", return_value=TaskOutput( - description="dummy", raw="Hello", agent=researcher.role + description="dummy", raw="Hello", agent=researcher.role, messages=[] ), ): crew.kickoff() @@ -1828,7 +1828,7 @@ def test_hierarchical_crew_creation_tasks_with_agents(researcher, writer): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -1881,7 +1881,7 @@ def test_hierarchical_crew_creation_tasks_with_async_execution(researcher, write ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Create a mock Future that returns our TaskOutput @@ -2246,11 +2246,13 @@ def test_conditional_task_uses_last_output(researcher, writer): description="First task output", raw="First success output", # Will be used by third task's condition agent=researcher.role, + messages=[], ) mock_third = TaskOutput( description="Third task output", raw="Third task executed", # Output when condition succeeds using first task output agent=writer.role, + messages=[], ) # Set up mocks for task execution and conditional logic @@ -2318,11 +2320,13 @@ def test_conditional_tasks_result_collection(researcher, writer): description="Success output", raw="Success output", # Triggers third task's condition agent=researcher.role, + messages=[], ) mock_conditional = TaskOutput( description="Conditional output", raw="Conditional task executed", agent=writer.role, + messages=[], ) # Set up mocks for task execution and conditional logic @@ -2399,6 +2403,7 @@ def test_multiple_conditional_tasks(researcher, writer): description="Mock success", raw="Success and proceed output", agent=researcher.role, + messages=[], ) # Set up mocks for task execution @@ -2806,7 +2811,7 @@ def test_manager_agent(researcher, writer): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -3001,6 +3006,7 @@ def test_replay_feature(researcher, writer): output_format=OutputFormat.RAW, pydantic=None, summary="Mocked output for list of ideas", + messages=[], ) crew.kickoff() @@ -3052,6 +3058,7 @@ def test_crew_task_db_init(): output_format=OutputFormat.RAW, pydantic=None, summary="Write about AI in healthcare...", + messages=[], ) crew.kickoff() @@ -3114,6 +3121,7 @@ def test_replay_task_with_context(): output_format=OutputFormat.RAW, pydantic=None, summary="Detailed report on AI advancements...", + messages=[], ) mock_task_output2 = TaskOutput( description="Summarize the AI advancements report.", @@ -3123,6 +3131,7 @@ def test_replay_task_with_context(): output_format=OutputFormat.RAW, pydantic=None, summary="Summary of the AI advancements report...", + messages=[], ) mock_task_output3 = TaskOutput( description="Write an article based on the AI advancements summary.", @@ -3132,6 +3141,7 @@ def test_replay_task_with_context(): output_format=OutputFormat.RAW, pydantic=None, summary="Article on AI advancements...", + messages=[], ) mock_task_output4 = TaskOutput( description="Create a presentation based on the AI advancements article.", @@ -3141,6 +3151,7 @@ def test_replay_task_with_context(): output_format=OutputFormat.RAW, pydantic=None, summary="Presentation on AI advancements...", + messages=[], ) with patch.object(Task, "execute_sync") as mock_execute_task: @@ -3164,6 +3175,70 @@ def test_replay_task_with_context(): db_handler.reset() +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_replay_preserves_messages(): + """Test that replay preserves messages from stored task outputs.""" + from crewai.utilities.types import LLMMessage + + agent = Agent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + allow_delegation=False, + ) + + task = Task( + description="Say hello", + expected_output="A greeting", + agent=agent, + ) + + crew = Crew(agents=[agent], tasks=[task], process=Process.sequential) + + mock_messages: list[LLMMessage] = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Say hello"}, + {"role": "assistant", "content": "Hello!"}, + ] + + mock_task_output = TaskOutput( + description="Say hello", + raw="Hello!", + agent="Test Agent", + messages=mock_messages, + ) + + with patch.object(Task, "execute_sync", return_value=mock_task_output): + crew.kickoff() + + # Verify the task output was stored with messages + db_handler = TaskOutputStorageHandler() + stored_outputs = db_handler.load() + assert stored_outputs is not None + assert len(stored_outputs) > 0 + + # Verify messages are in the stored output + stored_output = stored_outputs[0]["output"] + assert "messages" in stored_output + assert len(stored_output["messages"]) == 3 + assert stored_output["messages"][0]["role"] == "system" + assert stored_output["messages"][1]["role"] == "user" + assert stored_output["messages"][2]["role"] == "assistant" + + # Replay the task and verify messages are preserved + with patch.object(Task, "execute_sync", return_value=mock_task_output): + replayed_output = crew.replay(str(task.id)) + + # Verify the replayed task output has messages + assert len(replayed_output.tasks_output) > 0 + replayed_task_output = replayed_output.tasks_output[0] + assert hasattr(replayed_task_output, "messages") + assert isinstance(replayed_task_output.messages, list) + assert len(replayed_task_output.messages) == 3 + + db_handler.reset() + + @pytest.mark.vcr(filter_headers=["authorization"]) def test_replay_with_context(): agent = Agent(role="test_agent", backstory="Test Description", goal="Test Goal") @@ -3181,6 +3256,7 @@ def test_replay_with_context(): pydantic=None, json_dict={}, output_format=OutputFormat.RAW, + messages=[], ) task1.output = context_output @@ -3241,6 +3317,7 @@ def test_replay_with_context_set_to_nullable(): description="Test Task Output", raw="test raw output", agent="test_agent", + messages=[], ) crew.kickoff() @@ -3264,6 +3341,7 @@ def test_replay_with_invalid_task_id(): pydantic=None, json_dict={}, output_format=OutputFormat.RAW, + messages=[], ) task1.output = context_output @@ -3328,6 +3406,7 @@ def test_replay_interpolates_inputs_properly(mock_interpolate_inputs): pydantic=None, json_dict={}, output_format=OutputFormat.RAW, + messages=[], ) task1.output = context_output @@ -3386,6 +3465,7 @@ def test_replay_setup_context(): pydantic=None, json_dict={}, output_format=OutputFormat.RAW, + messages=[], ) task1.output = context_output crew = Crew(agents=[agent], tasks=[task1, task2], process=Process.sequential) @@ -3619,6 +3699,7 @@ def test_conditional_should_skip(researcher, writer): description="Task 1 description", raw="Task 1 output", agent="Researcher", + messages=[], ) result = crew_met.kickoff() @@ -3653,6 +3734,7 @@ def test_conditional_should_execute(researcher, writer): description="Task 1 description", raw="Task 1 output", agent="Researcher", + messages=[], ) crew_met.kickoff() @@ -3824,7 +3906,7 @@ def test_task_tools_preserve_code_execution_tools(): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) with patch.object( @@ -3878,7 +3960,7 @@ def test_multimodal_flag_adds_multimodal_tools(): crew = Crew(agents=[multimodal_agent], tasks=[task], process=Process.sequential) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Mock execute_sync to verify the tools passed at runtime @@ -3942,6 +4024,7 @@ def test_multimodal_agent_image_tool_handling(): description="Mock description", raw="A detailed analysis of the image", agent="Image Analyst", + messages=[], ) with patch.object(Task, "execute_sync") as mock_execute_sync: diff --git a/lib/crewai/tests/test_task.py b/lib/crewai/tests/test_task.py index 73fedfc88..72fe23b4b 100644 --- a/lib/crewai/tests/test_task.py +++ b/lib/crewai/tests/test_task.py @@ -162,6 +162,7 @@ def test_task_callback_returns_task_output(): "name": task.name or task.description, "expected_output": "Bullet point list of 5 interesting ideas.", "output_format": OutputFormat.RAW, + "messages": [], } assert output_dict == expected_output @@ -1680,3 +1681,44 @@ def test_task_copy_with_list_context(): assert isinstance(copied_task2.context, list) assert len(copied_task2.context) == 1 assert copied_task2.context[0] is task1 + + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_task_output_includes_messages(): + """Test that TaskOutput includes messages from agent execution.""" + researcher = Agent( + role="Researcher", + goal="Make the best research and analysis on content about AI and AI agents", + backstory="You're an expert researcher, specialized in technology, software engineering, AI and startups. You work as a freelancer and is now working on doing research and analysis for a new customer.", + allow_delegation=False, + ) + + task1 = Task( + description="Give me a list of 3 interesting ideas about AI.", + expected_output="Bullet point list of 3 ideas.", + agent=researcher, + ) + + task2 = Task( + description="Summarize the ideas from the previous task.", + expected_output="A summary of the ideas.", + agent=researcher, + ) + + crew = Crew(agents=[researcher], tasks=[task1, task2], process=Process.sequential) + result = crew.kickoff() + + # Verify both tasks have messages + assert len(result.tasks_output) == 2 + + # Check first task output has messages + task1_output = result.tasks_output[0] + assert hasattr(task1_output, "messages") + assert isinstance(task1_output.messages, list) + assert len(task1_output.messages) > 0 + + # Check second task output has messages + task2_output = result.tasks_output[1] + assert hasattr(task2_output, "messages") + assert isinstance(task2_output.messages, list) + assert len(task2_output.messages) > 0 diff --git a/lib/crewai/tests/test_task_guardrails.py b/lib/crewai/tests/test_task_guardrails.py index 22572bfd3..dd24458d3 100644 --- a/lib/crewai/tests/test_task_guardrails.py +++ b/lib/crewai/tests/test_task_guardrails.py @@ -38,6 +38,7 @@ def test_task_without_guardrail(): agent.role = "test_agent" agent.execute_task.return_value = "test result" agent.crew = None + agent.last_messages = [] task = create_smart_task(description="Test task", expected_output="Output") @@ -56,6 +57,7 @@ def test_task_with_successful_guardrail_func(): agent.role = "test_agent" agent.execute_task.return_value = "test result" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test task", expected_output="Output", guardrail=guardrail @@ -76,6 +78,7 @@ def test_task_with_failing_guardrail(): agent.role = "test_agent" agent.execute_task.side_effect = ["bad result", "good result"] agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test task", @@ -103,6 +106,7 @@ def test_task_with_guardrail_retries(): agent.role = "test_agent" agent.execute_task.return_value = "bad result" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test task", @@ -128,6 +132,7 @@ def test_guardrail_error_in_context(): agent = Mock() agent.role = "test_agent" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test task", @@ -295,6 +300,7 @@ def test_hallucination_guardrail_integration(): agent.role = "test_agent" agent.execute_task.return_value = "test result" agent.crew = None + agent.last_messages = [] mock_llm = Mock(spec=LLM) guardrail = HallucinationGuardrail( @@ -342,6 +348,7 @@ def test_multiple_guardrails_sequential_processing(): agent.role = "sequential_agent" agent.execute_task.return_value = "original text" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test sequential guardrails", @@ -391,6 +398,7 @@ def test_multiple_guardrails_with_validation_failure(): agent.role = "validation_agent" agent.execute_task = mock_execute_task agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test guardrails with validation", @@ -432,6 +440,7 @@ def test_multiple_guardrails_with_mixed_string_and_taskoutput(): agent.role = "mixed_agent" agent.execute_task.return_value = "original" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test mixed return types", @@ -469,6 +478,7 @@ def test_multiple_guardrails_with_retry_on_middle_guardrail(): agent.role = "retry_agent" agent.execute_task.return_value = "base" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test retry in middle guardrail", @@ -500,6 +510,7 @@ def test_multiple_guardrails_with_max_retries_exceeded(): agent.role = "failing_agent" agent.execute_task.return_value = "test" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test max retries with multiple guardrails", @@ -523,6 +534,7 @@ def test_multiple_guardrails_empty_list(): agent.role = "empty_agent" agent.execute_task.return_value = "no guardrails" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test empty guardrails list", @@ -582,6 +594,7 @@ def test_multiple_guardrails_processing_order(): agent.role = "order_agent" agent.execute_task.return_value = "base" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test processing order", @@ -625,6 +638,7 @@ def test_multiple_guardrails_with_pydantic_output(): agent.role = "pydantic_agent" agent.execute_task.return_value = "test content" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test guardrails with Pydantic", @@ -658,6 +672,7 @@ def test_guardrails_vs_single_guardrail_mutual_exclusion(): agent.role = "exclusion_agent" agent.execute_task.return_value = "test" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test mutual exclusion", @@ -700,6 +715,7 @@ def test_per_guardrail_independent_retry_tracking(): agent.role = "independent_retry_agent" agent.execute_task.return_value = "base" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test independent retry tracking",