From 629f7f34ceba443ad05cd77b05ec3713a011437d Mon Sep 17 00:00:00 2001 From: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com> Date: Mon, 10 Nov 2025 15:35:42 -0800 Subject: [PATCH 1/4] docs: enhance task guardrail documentation with LLM-based validation support (#3879) - Added section on LLM-based guardrails, explaining their usage and requirements. - Updated examples to demonstrate the implementation of multiple guardrails, including both function-based and LLM-based approaches. - Clarified the distinction between single and multiple guardrails in task configurations. - Improved explanations of guardrail functionality to ensure better understanding of validation processes. --- docs/en/concepts/tasks.mdx | 152 ++++++++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 3 deletions(-) diff --git a/docs/en/concepts/tasks.mdx b/docs/en/concepts/tasks.mdx index 3a8334bb1..c9082dbc7 100644 --- a/docs/en/concepts/tasks.mdx +++ b/docs/en/concepts/tasks.mdx @@ -60,6 +60,7 @@ crew = Crew( | **Output Pydantic** _(optional)_ | `output_pydantic` | `Optional[Type[BaseModel]]` | A Pydantic model for task output. | | **Callback** _(optional)_ | `callback` | `Optional[Any]` | Function/object to be executed after task completion. | | **Guardrail** _(optional)_ | `guardrail` | `Optional[Callable]` | Function to validate task output before proceeding to next task. | +| **Guardrails** _(optional)_ | `guardrails` | `Optional[List[Callable] | List[str]]` | List of guardrails to validate task output before proceeding to next task. | | **Guardrail Max Retries** _(optional)_ | `guardrail_max_retries` | `Optional[int]` | Maximum number of retries when guardrail validation fails. Defaults to 3. | @@ -341,7 +342,11 @@ Task guardrails provide a way to validate and transform task outputs before they are passed to the next task. This feature helps ensure data quality and provides feedback to agents when their output doesn't meet specific criteria. -Guardrails are implemented as Python functions that contain custom validation logic, giving you complete control over the validation process and ensuring reliable, deterministic results. +CrewAI supports two types of guardrails: + +1. **Function-based guardrails**: Python functions with custom validation logic, giving you complete control over the validation process and ensuring reliable, deterministic results. + +2. **LLM-based guardrails**: String descriptions that use the agent's LLM to validate outputs based on natural language criteria. These are ideal for complex or subjective validation requirements. ### Function-Based Guardrails @@ -355,12 +360,12 @@ def validate_blog_content(result: TaskOutput) -> Tuple[bool, Any]: """Validate blog content meets requirements.""" try: # Check word count - word_count = len(result.split()) + word_count = len(result.raw.split()) if word_count > 200: return (False, "Blog content exceeds 200 words") # Additional validation logic here - return (True, result.strip()) + return (True, result.raw.strip()) except Exception as e: return (False, "Unexpected error during validation") @@ -372,6 +377,147 @@ blog_task = Task( ) ``` +### LLM-Based Guardrails (String Descriptions) + +Instead of writing custom validation functions, you can use string descriptions that leverage LLM-based validation. When you provide a string to the `guardrail` or `guardrails` parameter, CrewAI automatically creates an `LLMGuardrail` that uses the agent's LLM to validate the output based on your description. + +**Requirements**: +- The task must have an `agent` assigned (the guardrail uses the agent's LLM) +- Provide a clear, descriptive string explaining the validation criteria + +```python Code +from crewai import Task + +# Single LLM-based guardrail +blog_task = Task( + description="Write a blog post about AI", + expected_output="A blog post under 200 words", + agent=blog_agent, + guardrail="The blog post must be under 200 words and contain no technical jargon" +) +``` + +LLM-based guardrails are particularly useful for: +- **Complex validation logic** that's difficult to express programmatically +- **Subjective criteria** like tone, style, or quality assessments +- **Natural language requirements** that are easier to describe than code + +The LLM guardrail will: +1. Analyze the task output against your description +2. Return `(True, output)` if the output complies with the criteria +3. Return `(False, feedback)` with specific feedback if validation fails + +**Example with detailed validation criteria**: + +```python Code +research_task = Task( + description="Research the latest developments in quantum computing", + expected_output="A comprehensive research report", + agent=researcher_agent, + guardrail=""" + The research report must: + - Be at least 1000 words long + - Include at least 5 credible sources + - Cover both technical and practical applications + - Be written in a professional, academic tone + - Avoid speculation or unverified claims + """ +) +``` + +### Multiple Guardrails + +You can apply multiple guardrails to a task using the `guardrails` parameter. Multiple guardrails are executed sequentially, with each guardrail receiving the output from the previous one. This allows you to chain validation and transformation steps. + +The `guardrails` parameter accepts: +- A list of guardrail functions or string descriptions +- A single guardrail function or string (same as `guardrail`) + +**Note**: If `guardrails` is provided, it takes precedence over `guardrail`. The `guardrail` parameter will be ignored when `guardrails` is set. + +```python Code +from typing import Tuple, Any +from crewai import TaskOutput, Task + +def validate_word_count(result: TaskOutput) -> Tuple[bool, Any]: + """Validate word count is within limits.""" + word_count = len(result.raw.split()) + if word_count < 100: + return (False, f"Content too short: {word_count} words. Need at least 100 words.") + if word_count > 500: + return (False, f"Content too long: {word_count} words. Maximum is 500 words.") + return (True, result.raw) + +def validate_no_profanity(result: TaskOutput) -> Tuple[bool, Any]: + """Check for inappropriate language.""" + profanity_words = ["badword1", "badword2"] # Example list + content_lower = result.raw.lower() + for word in profanity_words: + if word in content_lower: + return (False, f"Inappropriate language detected: {word}") + return (True, result.raw) + +def format_output(result: TaskOutput) -> Tuple[bool, Any]: + """Format and clean the output.""" + formatted = result.raw.strip() + # Capitalize first letter + formatted = formatted[0].upper() + formatted[1:] if formatted else formatted + return (True, formatted) + +# Apply multiple guardrails sequentially +blog_task = Task( + description="Write a blog post about AI", + expected_output="A well-formatted blog post between 100-500 words", + agent=blog_agent, + guardrails=[ + validate_word_count, # First: validate length + validate_no_profanity, # Second: check content + format_output # Third: format the result + ], + guardrail_max_retries=3 +) +``` + +In this example, the guardrails execute in order: +1. `validate_word_count` checks the word count +2. `validate_no_profanity` checks for inappropriate language (using the output from step 1) +3. `format_output` formats the final result (using the output from step 2) + +If any guardrail fails, the error is sent back to the agent, and the task is retried up to `guardrail_max_retries` times. + +**Mixing function-based and LLM-based guardrails**: + +You can combine both function-based and string-based guardrails in the same list: + +```python Code +from typing import Tuple, Any +from crewai import TaskOutput, Task + +def validate_word_count(result: TaskOutput) -> Tuple[bool, Any]: + """Validate word count is within limits.""" + word_count = len(result.raw.split()) + if word_count < 100: + return (False, f"Content too short: {word_count} words. Need at least 100 words.") + if word_count > 500: + return (False, f"Content too long: {word_count} words. Maximum is 500 words.") + return (True, result.raw) + +# Mix function-based and LLM-based guardrails +blog_task = Task( + description="Write a blog post about AI", + expected_output="A well-formatted blog post between 100-500 words", + agent=blog_agent, + guardrails=[ + validate_word_count, # Function-based: precise word count check + "The content must be engaging and suitable for a general audience", # LLM-based: subjective quality check + "The writing style should be clear, concise, and free of technical jargon" # LLM-based: style validation + ], + guardrail_max_retries=3 +) +``` + +This approach combines the precision of programmatic validation with the flexibility of LLM-based assessment for subjective criteria. + ### Guardrail Function Requirements 1. **Function Signature**: From 6b52587c672c2f79c7340b454e61620ab2d792e3 Mon Sep 17 00:00:00 2001 From: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:38:30 -0800 Subject: [PATCH 2/4] feat: expose messages to TaskOutput and LiteAgentOutputs (#3880) * feat: add messages to task and agent outputs - Introduced a new field in and to capture messages from the last task execution. - Updated the class to store the last messages and provide a property for easy access. - Enhanced the and classes to include messages in their outputs. - Added tests to ensure that messages are correctly included in task outputs and agent outputs during execution. * using typing_extensions for 3.10 compatability * feat: add last_messages attribute to agent for improved task tracking - Introduced a new `last_messages` attribute in the agent class to store messages from the last task execution. - Updated the `Crew` class to handle the new messages attribute in task outputs. - Enhanced existing tests to ensure that the `last_messages` attribute is correctly initialized and utilized across various guardrail scenarios. * fix: add messages field to TaskOutput in tests for consistency - Updated multiple test cases to include the new `messages` field in the `TaskOutput` instances. - Ensured that all relevant tests reflect the latest changes in the TaskOutput structure, maintaining consistency across the test suite. - This change aligns with the recent addition of the `last_messages` attribute in the agent class for improved task tracking. * feat: preserve messages in task outputs during replay - Added functionality to the Crew class to store and retrieve messages in task outputs. - Enhanced the replay mechanism to ensure that messages from stored task outputs are preserved and accessible. - Introduced a new test case to verify that messages are correctly stored and replayed, ensuring consistency in task execution and output handling. - This change improves the overall tracking and context retention of task interactions within the CrewAI framework. * fix original test, prev was debugging --- docs/en/concepts/tasks.mdx | 1 + lib/crewai/src/crewai/agent/core.py | 16 + lib/crewai/src/crewai/crew.py | 2 + lib/crewai/src/crewai/lite_agent.py | 1 + lib/crewai/src/crewai/lite_agent_output.py | 3 + lib/crewai/src/crewai/task.py | 2 + lib/crewai/src/crewai/tasks/task_output.py | 2 + lib/crewai/src/crewai/utilities/types.py | 4 +- lib/crewai/tests/agents/test_lite_agent.py | 21 + ...t_lite_agent_output_includes_messages.yaml | 261 +++++++++++ .../test_task_output_includes_messages.yaml | 423 ++++++++++++++++++ lib/crewai/tests/test_crew.py | 109 ++++- lib/crewai/tests/test_task.py | 42 ++ lib/crewai/tests/test_task_guardrails.py | 16 + 14 files changed, 889 insertions(+), 14 deletions(-) create mode 100644 lib/crewai/tests/cassettes/test_lite_agent_output_includes_messages.yaml create mode 100644 lib/crewai/tests/cassettes/test_task_output_includes_messages.yaml diff --git a/docs/en/concepts/tasks.mdx b/docs/en/concepts/tasks.mdx index c9082dbc7..9eba77f19 100644 --- a/docs/en/concepts/tasks.mdx +++ b/docs/en/concepts/tasks.mdx @@ -224,6 +224,7 @@ By default, the `TaskOutput` will only include the `raw` output. A `TaskOutput` | **JSON Dict** | `json_dict` | `Optional[Dict[str, Any]]` | A dictionary representing the JSON output of the task. | | **Agent** | `agent` | `str` | The agent that executed the task. | | **Output Format** | `output_format` | `OutputFormat` | The format of the task output, with options including RAW, JSON, and Pydantic. The default is RAW. | +| **Messages** | `messages` | `list[LLMMessage]` | The messages from the last task execution. | ### Task Methods and Properties diff --git a/lib/crewai/src/crewai/agent/core.py b/lib/crewai/src/crewai/agent/core.py index 1d94c4d19..b4f46a216 100644 --- a/lib/crewai/src/crewai/agent/core.py +++ b/lib/crewai/src/crewai/agent/core.py @@ -119,6 +119,7 @@ class Agent(BaseAgent): _times_executed: int = PrivateAttr(default=0) _mcp_clients: list[Any] = PrivateAttr(default_factory=list) + _last_messages: list[LLMMessage] = PrivateAttr(default_factory=list) max_execution_time: int | None = Field( default=None, description="Maximum execution time for an agent to execute a task", @@ -538,6 +539,12 @@ class Agent(BaseAgent): event=AgentExecutionCompletedEvent(agent=self, task=task, output=result), ) + self._last_messages = ( + self.agent_executor.messages.copy() + if self.agent_executor and hasattr(self.agent_executor, "messages") + else [] + ) + self._cleanup_mcp_clients() return result @@ -1341,6 +1348,15 @@ class Agent(BaseAgent): def set_fingerprint(self, fingerprint: Fingerprint) -> None: self.security_config.fingerprint = fingerprint + @property + def last_messages(self) -> list[LLMMessage]: + """Get messages from the last task execution. + + Returns: + List of LLM messages from the most recent task execution. + """ + return self._last_messages + def _get_knowledge_search_query(self, task_prompt: str, task: Task) -> str | None: """Generate a search query for the knowledge base based on the task description.""" crewai_event_bus.emit( diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py index f5af4a426..cadd9d3b1 100644 --- a/lib/crewai/src/crewai/crew.py +++ b/lib/crewai/src/crewai/crew.py @@ -809,6 +809,7 @@ class Crew(FlowTrackable, BaseModel): "json_dict": output.json_dict, "output_format": output.output_format, "agent": output.agent, + "messages": output.messages, }, "task_index": task_index, "inputs": inputs, @@ -1236,6 +1237,7 @@ class Crew(FlowTrackable, BaseModel): pydantic=stored_output["pydantic"], json_dict=stored_output["json_dict"], output_format=stored_output["output_format"], + messages=stored_output.get("messages", []), ) self.tasks[i].output = task_output diff --git a/lib/crewai/src/crewai/lite_agent.py b/lib/crewai/src/crewai/lite_agent.py index 4314e900e..01b165638 100644 --- a/lib/crewai/src/crewai/lite_agent.py +++ b/lib/crewai/src/crewai/lite_agent.py @@ -358,6 +358,7 @@ class LiteAgent(FlowTrackable, BaseModel): pydantic=formatted_result, agent_role=self.role, usage_metrics=usage_metrics.model_dump() if usage_metrics else None, + messages=self._messages, ) # Process guardrail if set diff --git a/lib/crewai/src/crewai/lite_agent_output.py b/lib/crewai/src/crewai/lite_agent_output.py index 582f52cdd..4183dba1f 100644 --- a/lib/crewai/src/crewai/lite_agent_output.py +++ b/lib/crewai/src/crewai/lite_agent_output.py @@ -6,6 +6,8 @@ from typing import Any from pydantic import BaseModel, Field +from crewai.utilities.types import LLMMessage + class LiteAgentOutput(BaseModel): """Class that represents the result of a LiteAgent execution.""" @@ -20,6 +22,7 @@ class LiteAgentOutput(BaseModel): usage_metrics: dict[str, Any] | None = Field( description="Token usage metrics for this execution", default=None ) + messages: list[LLMMessage] = Field(description="Messages of the agent", default=[]) def to_dict(self) -> dict[str, Any]: """Convert pydantic_output to a dictionary.""" diff --git a/lib/crewai/src/crewai/task.py b/lib/crewai/src/crewai/task.py index 869419c25..dfb505d77 100644 --- a/lib/crewai/src/crewai/task.py +++ b/lib/crewai/src/crewai/task.py @@ -539,6 +539,7 @@ class Task(BaseModel): json_dict=json_output, agent=agent.role, output_format=self._get_output_format(), + messages=agent.last_messages, ) if self._guardrails: @@ -949,6 +950,7 @@ Follow these guidelines: json_dict=json_output, agent=agent.role, output_format=self._get_output_format(), + messages=agent.last_messages, ) return task_output diff --git a/lib/crewai/src/crewai/tasks/task_output.py b/lib/crewai/src/crewai/tasks/task_output.py index ba9f95c18..901604ac1 100644 --- a/lib/crewai/src/crewai/tasks/task_output.py +++ b/lib/crewai/src/crewai/tasks/task_output.py @@ -6,6 +6,7 @@ from typing import Any from pydantic import BaseModel, Field, model_validator from crewai.tasks.output_format import OutputFormat +from crewai.utilities.types import LLMMessage class TaskOutput(BaseModel): @@ -40,6 +41,7 @@ class TaskOutput(BaseModel): output_format: OutputFormat = Field( description="Output format of the task", default=OutputFormat.RAW ) + messages: list[LLMMessage] = Field(description="Messages of the task", default=[]) @model_validator(mode="after") def set_summary(self): diff --git a/lib/crewai/src/crewai/utilities/types.py b/lib/crewai/src/crewai/utilities/types.py index bc331a97e..a4627613d 100644 --- a/lib/crewai/src/crewai/utilities/types.py +++ b/lib/crewai/src/crewai/utilities/types.py @@ -1,6 +1,8 @@ """Types for CrewAI utilities.""" -from typing import Any, Literal, TypedDict +from typing import Any, Literal + +from typing_extensions import TypedDict class LLMMessage(TypedDict): diff --git a/lib/crewai/tests/agents/test_lite_agent.py b/lib/crewai/tests/agents/test_lite_agent.py index 0c6b00c23..f2fa4b2e6 100644 --- a/lib/crewai/tests/agents/test_lite_agent.py +++ b/lib/crewai/tests/agents/test_lite_agent.py @@ -238,6 +238,27 @@ def test_lite_agent_returns_usage_metrics(): assert result.usage_metrics["total_tokens"] > 0 +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_lite_agent_output_includes_messages(): + """Test that LiteAgentOutput includes messages from agent execution.""" + llm = LLM(model="gpt-4o-mini") + agent = Agent( + role="Research Assistant", + goal="Find information about the population of Tokyo", + backstory="You are a helpful research assistant who can search for information about the population of Tokyo.", + llm=llm, + tools=[WebSearchTool()], + verbose=True, + ) + + result = agent.kickoff("What is the population of Tokyo?") + + assert isinstance(result, LiteAgentOutput) + assert hasattr(result, "messages") + assert isinstance(result.messages, list) + assert len(result.messages) > 0 + + @pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.asyncio async def test_lite_agent_returns_usage_metrics_async(): diff --git a/lib/crewai/tests/cassettes/test_lite_agent_output_includes_messages.yaml b/lib/crewai/tests/cassettes/test_lite_agent_output_includes_messages.yaml new file mode 100644 index 000000000..c71e22690 --- /dev/null +++ b/lib/crewai/tests/cassettes/test_lite_agent_output_includes_messages.yaml @@ -0,0 +1,261 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"You are Research Assistant. You + are a helpful research assistant who can search for information about the population + of Tokyo.\nYour personal goal is: Find information about the population of Tokyo\n\nYou + ONLY have access to the following tools, and should NEVER make up tools that + are not listed here:\n\nTool Name: search_web\nTool Arguments: {''query'': {''description'': + None, ''type'': ''str''}}\nTool Description: Search the web for information + about a topic.\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: + you should always think about what to do\nAction: the action to take, only one + name of [search_web], just the name, exactly as it''s written.\nAction Input: + the input to the action, just a simple JSON object, enclosed in curly braces, + using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce + all necessary information is gathered, return the following format:\n\n```\nThought: + I now know the final answer\nFinal Answer: the final answer to the original + input question\n```"},{"role":"user","content":"What is the population of Tokyo?"}],"model":"gpt-4o-mini"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '1160' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.109.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.109.1 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jJM9b9swEIZ3/YoDZzvwd2xtRTI0HZolcIcqkGnqJLGheCx5Suoa/u+F + 5A/JTQp00XDPva/ui/sIQOhMxCBUKVlVzgzv5Lf7fDtbmcfRV1M9/l5/Wa/N6/r+7fNydycGjYK2 + P1DxWXWjqHIGWZM9YuVRMjau49vFZDleLVbzFlSUoWlkhePhjIaVtno4GU1mw9HtcLw8qUvSCoOI + 4XsEALBvv02dNsNfIobR4BypMARZoIgvSQDCk2kiQoagA0vLYtBBRZbRtqVvNpvEPpVUFyXH8AAW + MQMmCCi9KiEnD1wiGMkYGLTNyVeyaRI8FtJn2hZtgiNXmyOgHJ7oZUc3if2kmkh8ckvfcHuOwYN1 + NcewT8TPGv0uEXEiVO09Wv7IDCajyTQRh8RuNpt+Lx7zOshmnrY2pgektcStSTvF5xM5XOZmqHCe + tuEvqci11aFMPcpAtplRYHKipYcI4LndT301cuE8VY5TphdsfzeZnvYjurPo6HR5gkwsTU+1OIMr + vzRDltqE3oaFkqrErJN25yDrTFMPRL2u31fzkfexc22L/7HvgFLoGLPUecy0uu64S/PYvJp/pV2m + 3BYsAvpXrTBljb7ZRIa5rM3xlkXYBcYqzbUt0Duvjwedu3S+GMl8gfP5SkSH6A8AAAD//wMAJGbR + +94DAAA= + headers: + CF-RAY: + - 99c98dd3ddb9ce6c-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 11 Nov 2025 00:08:16 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=6maCeRS26vR_uzqYdtL7RXY7kzGdvLhWcE2RP2PnZS0-1762819696-1.0.1.1-72zCZZVBiGDdwPDvETKS_fUA4DYCLVyVHDYW2qpSxxAUuWKNPLxQQ1PpeI7YuB9v.y1e3oapeuV5mBjcP4c9_ZbH.ZI14TUNOexPUB6yCaQ; + path=/; expires=Tue, 11-Nov-25 00:38:16 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=a.XOUFuP.5IthR7ITJrIWIZSWWAkmHU._pM9.qhCnhM-1762819696364-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '1199' + openai-project: + - proj_xitITlrFeen7zjNSzML82h9x + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '1351' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-project-tokens: + - '150000000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-project-tokens: + - '149999735' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999735' + x-ratelimit-reset-project-tokens: + - 0s + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_50a8251d98f748bb8e73304a2548b694 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are Research Assistant. You + are a helpful research assistant who can search for information about the population + of Tokyo.\nYour personal goal is: Find information about the population of Tokyo\n\nYou + ONLY have access to the following tools, and should NEVER make up tools that + are not listed here:\n\nTool Name: search_web\nTool Arguments: {''query'': {''description'': + None, ''type'': ''str''}}\nTool Description: Search the web for information + about a topic.\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: + you should always think about what to do\nAction: the action to take, only one + name of [search_web], just the name, exactly as it''s written.\nAction Input: + the input to the action, just a simple JSON object, enclosed in curly braces, + using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce + all necessary information is gathered, return the following format:\n\n```\nThought: + I now know the final answer\nFinal Answer: the final answer to the original + input question\n```"},{"role":"user","content":"What is the population of Tokyo?"},{"role":"assistant","content":"```\nThought: + I need to search for the latest information regarding the population of Tokyo.\nAction: + search_web\nAction Input: {\"query\":\"current population of Tokyo 2023\"}\n```\nObservation: + Tokyo''s population in 2023 was approximately 21 million people in the city + proper, and 37 million in the greater metropolitan area."}],"model":"gpt-4o-mini"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '1521' + content-type: + - application/json + cookie: + - __cf_bm=6maCeRS26vR_uzqYdtL7RXY7kzGdvLhWcE2RP2PnZS0-1762819696-1.0.1.1-72zCZZVBiGDdwPDvETKS_fUA4DYCLVyVHDYW2qpSxxAUuWKNPLxQQ1PpeI7YuB9v.y1e3oapeuV5mBjcP4c9_ZbH.ZI14TUNOexPUB6yCaQ; + _cfuvid=a.XOUFuP.5IthR7ITJrIWIZSWWAkmHU._pM9.qhCnhM-1762819696364-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.109.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.109.1 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFPLbtswELz7KxY8W4Es+RHr1ifgQw8F3OZQBxJDrSTWFJcgqSRG4H8v + KD+kNCnQCwFyZpazs+TLBIDJkmXARMO9aI2KPvG7z81d8mWf4E/cxN9+PK5SvfkYf08Pe8+mQUEP + v1H4i+pGUGsUekn6BAuL3GOoOlstk9vZerle9UBLJaogq42P5hS1UssoiZN5FK+i2e1Z3ZAU6FgG + vyYAAC/9GnzqEp9ZBvH0ctKic7xGll1JAMySCieMOyed5/rk+QwK0h51b70oip3eNtTVjc9gA5qe + YB8W3yBUUnMFXLsntDv9td996HcZbBsEQ6ZTPLQMVMGW9gcCqSGJkxSkA26MpWfZco/qAMkMWqlU + IBskozBQwy1C+gMYSwYtcF1CuroSz4y6j9JCi96SISU918At8pudLopi3JrFqnM8xKs7pUYA15p8 + 77UP9f6MHK8xKqqNpQf3l5RVUkvX5Ba5Ix0ic54M69HjBOC+H1f3agLMWGqNzz3tsb8ujdNTPTa8 + kgGdX0BPnquRar6cvlMvL9Fzqdxo4Exw0WA5SIfXwbtS0giYjLp+6+a92qfOpa7/p/wACIHGY5kb + i6UUrzseaBbDJ/oX7Zpyb5g5tI9SYO4l2jCJEiveqfN3dAfnsc0rqWu0xsrT+65MvljGvFriYrFm + k+PkDwAAAP//AwDgLjwY7QMAAA== + headers: + CF-RAY: + - 99c98dde7fc9ce6c-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 11 Nov 2025 00:08:18 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '1339' + openai-project: + - proj_xitITlrFeen7zjNSzML82h9x + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '1523' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-project-tokens: + - '150000000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-project-tokens: + - '149999657' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999657' + x-ratelimit-reset-project-tokens: + - 0s + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_ade054352f8c4dfdba50683755eba41d + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/test_task_output_includes_messages.yaml b/lib/crewai/tests/cassettes/test_task_output_includes_messages.yaml new file mode 100644 index 000000000..5f9f33fe8 --- /dev/null +++ b/lib/crewai/tests/cassettes/test_task_output_includes_messages.yaml @@ -0,0 +1,423 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"You are Researcher. You''re an + expert researcher, specialized in technology, software engineering, AI and startups. + You work as a freelancer and is now working on doing research and analysis for + a new customer.\nYour personal goal is: Make the best research and analysis + on content about AI and AI agents\nTo give my best complete final answer to + the task respond using the exact following format:\n\nThought: I now can give + a great answer\nFinal Answer: Your final answer must be the great and the most + complete as possible, it must be outcome described.\n\nI MUST use these formats, + my job depends on it!"},{"role":"user","content":"\nCurrent Task: Give me a + list of 3 interesting ideas about AI.\n\nThis is the expected criteria for your + final answer: Bullet point list of 3 ideas.\nyou MUST return the actual complete + content as the final answer, not a summary.\n\nYou MUST follow these instructions: + \n - Include specific examples and real-world case studies to enhance the credibility + and depth of the article ideas.\n - Incorporate mentions of notable companies, + projects, or tools relevant to each topic to provide concrete context.\n - Add + diverse viewpoints such as interviews with experts, users, or thought leaders + to enrich the narrative and lend authority.\n - Address ethical, social, and + emotional considerations explicitly to reflect a balanced and comprehensive + analysis.\n - Enhance the descriptions by including implications for future + developments and the potential impact on society.\n - Use more engaging and + vivid language that draws the reader into each topic''s nuances and importance.\n + - Include notes or summaries that contextualize each set of ideas in terms of + relevance and potential reader engagement.\n - In future tasks, focus on elaborating + initial outlines into more detailed and nuanced article proposals with richer + content and insights.\n\nBegin! This is VERY important to you, use the tools + available and give your best Final Answer, your job depends on it!\n\nThought:"}],"model":"gpt-4.1-mini"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '2076' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.109.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.109.1 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA1xXS44kxw3d6xREbwwMqgsaWWPJvSvNT21roIE0kAx5NswIViWnI4MpMiKra7TR + IbTxSbz3UXQSgxFZ1S1tGuiMH/n43iPrl08Arjhe3cBVGLGEaU7Xz/HHL3/YfTz+9GJ69sPXL8sr + e/bT9JMefv4Ql39dbfyEDB8olPOpbZBpTlRYcl8OSljIb336xd8++/Lp3//6xbO2MEmk5McOc7n+ + fPv0euLM1599+tmz608/v376+Xp8FA5kVzfw708AAH5pfz3QHOn+6gY+3Zy/TGSGB7q6uWwCuFJJ + /uUKzdgK5nK1eVgMkgvlFvu7UephLDdwC1mOEDDDgRcChIMnAJjtSPo+v+KMCXbtv5v3+X2+hidP + drfXL5QXyvCW1CRj4o8U4WvCVMaASjfwHS2SqsPCHzkf4C0Wplzg21qCTGTwblSPAN4qRQ7F395l + TKfCwZ48eZ8B3o1swJEQ6H5OomQwyhF2t8AGRTHbXnTyy8fLwzCcgDIOqX3mw5hOwDnywrGuURbP + b/JY5oTZYECjCJJhXkOMWBAwR5gfQmvFsy28EgXODmygDXjtMTMZJL4juP3qDfyIxSSvUMCIC0Gi + hRQPFD30IoCe5keCyW/HBEpBNNoGDpRl4mCb9npInNt6UcZkfrIgJ1EvViCFMpLi7K/XzD9XSiew + ysVTFCAM4zmjLbwgmt9wjr//+h/zKOxkhSYY0cBGOWaYVSY2As6XrPMB7jhmcgA/VD0BoabTBgwX + X0u8kEFZqzirYEeKcyFdKHvpt3Db/mM6GhzZAXmo1KyyJzN2+ljHu4droLQQJhikjEC5jNUYbYK9 + KOxuWw6zOJEZU4dKHJBsgIPUArPyguHUlloxjUJVLqdNpwQfxpYelbEBHDnRNKF59iPm2MhjlI3X + jJxn2BP6XgJjchyK09NG3hcIUlMEpVgDQWSbUbl4YfzCh4wxBDKDoRbAZALKdmdA9xhIB2whcaaf + najlBLzvfBFHRwlqpoVyOvkDRXmoheIWXk5SGoQe0gXAgTLtucBeZYJRZmpY8DSrLOTElkMW4x7L + 5Hj0iOh+JmXKwQ/cM5UTyEIKWItMbmsQKbDX7HrCO86HLbyqpSr53YlDA8nTDqlGgt3t9SxHUoqw + sJaKaYUDLvbUK6+E6brw5ELLXEQdi0aYI6HikAgGFi+JqG1WtvoeBJtQC+kGJlECJZslm5ftEfAU + ZGV8GfFcLrovlCMk3lPLO7ioO2nOJZB9Xz4kGRzhbfekIjMHNyGlRAvm0g5RPuChxWTABYrUMLpl + 1QkzHCml64HacgGEfc0R3YQwNX/oPOa8cCG/FyNpk3zwbGJXu2tLHWSCAZObAAxUjkQZCoUxS5JD + T6Axu9GvmQxn2l68uxbJMkltRrA7NL5whudui47bbY7VCUZ2Ay/vZ8zRo/5KPGL/6qjstGzgTTUO + 3au+L6KnQsmV82fzjpSWJoUi/iL2F5thfjtTXgX9YvfNN//778um8YXNqYL+yD/qHUUKd+2ZR/v9 + +yD3bf/kgTQzFpes5B5URtWe0oEyKRZRA6vB2eeRvKj5QL75D70knHGYVVyuZF51MnpQYxgxJcoH + 8pORu/4gSye/7F0uo6iNPHd76lc6o4YTBEkJB9Eu+O6KjSOoha0YtBYTaaYcKZd0OoffousdfAvf + uWKOoilCQCOwUmN3nC69H1Ezaa/RKmun2+XJ3e117C28ozerxBo8gxays+11RY2MGeY6JLbRz+5u + r9do3EaEinpHmFHdDPrGAJEGLOSwtRkD0krnVnOnQEsUvqM933nTj+IebKHapcNTHp3f9lCOUZQ/ + SrYNHEdOBHZHs88KcBTV0+r8HnbkPnd4ITqwdPZIB0J7z+kN59JFPsjQjDthoDYbOK/Wgmzh5aVR + tCBbmfcSqvnYEGQ+qVvoBuSYqZV9cwHRqeBvhNVP3BIb1BQ98jt73FIe5BFpkuA1/3hpG24GZzA6 + D5t546lbd5Bpksj7k4fdKPso7+jFb2lzXqTpEZ0vDvFffABxLCX//utvR8nuNh7+Hi1wbpYOC6lV + O4PONnWbefSABcmZQrl0hKKUo9u7H5jdtEzmka00B2vD0IMUXJrNm6s/750hQqipVMXUSX9fHj/p + TeBcvHTy1kt7zs1etQAXo7TfPjKhgHPhBR+baxtUyDwUzq1W+2orb/5kp77YqWQzK3ul93vS3oCU + w/jIaxrGWRa8aIldxlxOFws+02l3C6+9vWan+g18VTk1r33nfjSjUi4b2IUgNZfWA79vTcw6OeGd + VitH0TKeVswfWW/vURijNg9rKVR1ckEmiu2KveJEjYRA2ap2gUP0fiRzkwEmPmR77FILpkq2AZrm + Ea0P9+UScDhtnK89Yk5t5upcYs1ktoXnf5yZX4scEl0G1Lb5DQcVk33p8zOZZ882dtKsvW0QbENz + dwPOXLgV4MHie7dowmj9+DIcOJQtzznJafqznVqgjMry4KXOPx+c1Fr784Fo4ParYY8u3TbBH3Jr + BOtobVB9fThBwiOQT5DdWDZA9+Sz0p77uru3rbLsDv8HfJ4nwjZAw+52A4p97DEJTD47YEKdVvOz + qgtx6oNBm33ZgvK0anjr3Zz03Hvf8ZS5wGsatLbd/3SJPlc87kXbiLiw5+6TrJN1JjUfkhrEkhsC + 7ZwVraEL1X8ouL7degaKsUvr8nvDf9jERUJvZfvW50KqbVqLZHzI6zB4qOkccpfKZd7utJ5VhpXT + k2grwFpZPs9tzSrdVVbnquaaX8X8fwAAAP//jFjNbtwgEL7vUyBfcmkr7SZt95pjpLxCZBF7sFEw + EMCtcth3j74BL3jbSj2PjT3AfH+squh9ZfwrhN2Iok3jxr3cp0B3UQBLjLn5++k6xs1JjhrfBjXL + N5qd2SSdh72xgO4wafbOW7M7LZ+5NGHIBbg3b3sdtcQ3e7VFdXNvi056khv7KZKBRaospvDxSSw6 + rpGgMW4p75t4do5pXM4kR+64zh6jgVMZNfOFyghWTsuFD8GwjamsGhToTSFpdfUGIKxXQgYgvP7l + kjRfduhTrEv2PHGWMA+vwcnRZCzOpgnZyQI7v5PkMQVnJ+YDpBJAe0auDfKLT6SxkQsYJffVO1Pu + uV68HFKm6p0oL/6WmW7FuWLYZ+kzC6hMer9xS1r6oIUdUBRB4gaB5GwmuUXbzR6AHNqcJpBao0RY + ZFdjmoK01qW8kUiIXkrlcs2EjJswHPHm1Q7kGOc+kIzOIv+JyfmOq5eDEC+cPa27OKmDy/KpT+6N + +HP355I9dTXzqtWfD/elmnCotXA8nrbKbsV+JOQZscmvukEOM4313Rp2Qa64pnBo+v7zf/62du5d + 2+l/lq+FAdqIxn6LRdqe62OBEAr+67HrPvMPdxGRyEB90hRwFiMpuZqc1HUZKnuFiQ8+6BzXKd8/ + DKfz96M6/zh1h8vhEwAA//8DAJPMJFq9FAAA + headers: + CF-RAY: + - 99c98602dfefcf4d-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 11 Nov 2025 00:03:08 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=ObqPLq12_9tJ06.V1RkHCM6FH_YGcLoC2ykIFBEawa8-1762819388-1.0.1.1-l7PJTVbZ1vCcKdeOe8GQVuFL59SCk0xhO_dMFY2wuH5Ybd1hhM_Xcv_QivXVhZlBGlRgRAgG631P99JOs_IYAYcNFJReE.3NpPl34VfPVeQ; + path=/; expires=Tue, 11-Nov-25 00:33:08 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=kdn.HizdlSPG7cBu_zv1ZPcu0jMwDQIA4H9YvMXu6a0-1762819388587-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '13504' + openai-project: + - proj_xitITlrFeen7zjNSzML82h9x + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '13638' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-project-tokens: + - '150000000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-project-tokens: + - '149999507' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999507' + x-ratelimit-reset-project-tokens: + - 0s + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2de40e1beb5f42ea896664df36e8ce8f + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are Researcher. You're + an expert researcher, specialized in technology, software engineering, AI and + startups. You work as a freelancer and is now working on doing research and + analysis for a new customer.\\nYour personal goal is: Make the best research + and analysis on content about AI and AI agents\\nTo give my best complete final + answer to the task respond using the exact following format:\\n\\nThought: I + now can give a great answer\\nFinal Answer: Your final answer must be the great + and the most complete as possible, it must be outcome described.\\n\\nI MUST + use these formats, my job depends on it!\"},{\"role\":\"user\",\"content\":\"\\nCurrent + Task: Summarize the ideas from the previous task.\\n\\nThis is the expected + criteria for your final answer: A summary of the ideas.\\nyou MUST return the + actual complete content as the final answer, not a summary.\\n\\nThis is the + context you're working with:\\n- **AI-Driven Personalized Healthcare: Revolutionizing + Patient Outcomes Through Predictive Analytics**\\n This idea explores how AI + is transforming healthcare by enabling highly individualized treatment plans + based on patient data and predictive models. For instance, companies like IBM + Watson Health have leveraged AI to analyze medical records, genomics, and clinical + trials to tailor cancer therapies uniquely suited to each patient. DeepMind\u2019s + AI system has shown promise in predicting kidney injury early, saving lives + through proactive intervention. Interviews with healthcare professionals and + patients reveal both enthusiasm for AI\u2019s potential and concerns about privacy + and data security, highlighting ethical dilemmas in handling sensitive information. + Socially, this shift could reduce disparities in healthcare access but also + risks exacerbating inequality if AI tools are unevenly distributed. Emotionally, + patients benefit from hope and improved prognosis but might also experience + anxiety over automated decision-making. Future implications include AI-powered + virtual health assistants and real-time monitoring with wearable biosensors, + promising a smarter, more responsive healthcare ecosystem that could extend + life expectancy and quality of life globally. This topic is relevant and engaging + as it touches human well-being at a fundamental level and invites readers to + consider the intricate balance between technology and ethics in medicine.\\n\\n- + **Autonomous AI Agents in Creative Industries: Expanding Boundaries of Art, + Music, and Storytelling**\\n This idea delves into AI agents like OpenAI\u2019s + DALL\xB7E for visual art, Jukedeck and OpenAI\u2019s Jukebox for music composition, + and narrative generators such as AI Dungeon, transforming creative processes. + These AI tools challenge traditional notions of authorship and creativity by + collaborating with human artists or independently generating content. Real-world + case studies include Warner Music experimenting with AI-driven music production + and the Guardian publishing AI-generated poetry, sparking public debate. Thought + leaders like AI artist Refik Anadol discuss how AI enhances creative horizons, + while skeptics worry about the dilution of human emotional expression and potential + job displacement for artists. Ethical discussions focus on copyright, ownership, + and the authenticity of AI-produced works. Socially, AI agents democratize access + to creative tools but may also commodify art. The emotional dimension involves + audiences' reception\u2014wonder and fascination versus skepticism and emotional + disconnect. Future trends anticipate sophisticated AI collaborators that understand + cultural context and emotions, potentially redefining art itself. This idea + captivates readers interested in the fusion of technology and the human spirit, + offering a rich narrative on innovation and identity.\\n\\n- **Ethical AI Governance: + Building Transparent, Accountable Systems for a Trustworthy Future**\\n This + topic addresses the urgent need for frameworks ensuring AI development aligns + with human values, emphasizing transparency, accountability, and fairness. Companies + like Google DeepMind and Microsoft have established AI ethics boards, while + initiatives such as OpenAI commit to responsible AI deployment. Real-world scenarios + include controversies over biased facial recognition systems used by law enforcement, + exemplified by cases involving companies like Clearview AI, raising societal + alarm about surveillance and discrimination. Experts like Timnit Gebru and Kate + Crawford provide critical perspectives on bias and structural injustice embedded + in AI systems, advocating for inclusive design and regulation. Ethically, this + topic probes the moral responsibility of creators versus users and the consequences + of autonomous AI decisions. Socially, there's a call for inclusive governance + involving diverse stakeholders to prevent marginalization. Emotionally, public + trust hinges on transparent communication and mitigation of fears related to + AI misuse or job displacement. Looking ahead, the establishment of international + AI regulatory standards and ethical certifications may become pivotal, ensuring + AI benefits are shared broadly and risks minimized. This topic strongly resonates + with readers concerned about the socio-political impact of AI and invites active + discourse on shaping a future where technology empowers rather than undermines + humanity.\\n\\nYou MUST follow these instructions: \\n - Include specific examples + and real-world case studies to enhance the credibility and depth of the article + ideas.\\n - Incorporate mentions of notable companies, projects, or tools relevant + to each topic to provide concrete context.\\n - Add diverse viewpoints such + as interviews with experts, users, or thought leaders to enrich the narrative + and lend authority.\\n - Address ethical, social, and emotional considerations + explicitly to reflect a balanced and comprehensive analysis.\\n - Enhance the + descriptions by including implications for future developments and the potential + impact on society.\\n - Use more engaging and vivid language that draws the + reader into each topic's nuances and importance.\\n - Include notes or summaries + that contextualize each set of ideas in terms of relevance and potential reader + engagement.\\n - In future tasks, focus on elaborating initial outlines into + more detailed and nuanced article proposals with richer content and insights.\\n\\nBegin! + This is VERY important to you, use the tools available and give your best Final + Answer, your job depends on it!\\n\\nThought:\"}],\"model\":\"gpt-4.1-mini\"}" + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '6552' + content-type: + - application/json + cookie: + - __cf_bm=ObqPLq12_9tJ06.V1RkHCM6FH_YGcLoC2ykIFBEawa8-1762819388-1.0.1.1-l7PJTVbZ1vCcKdeOe8GQVuFL59SCk0xhO_dMFY2wuH5Ybd1hhM_Xcv_QivXVhZlBGlRgRAgG631P99JOs_IYAYcNFJReE.3NpPl34VfPVeQ; + _cfuvid=kdn.HizdlSPG7cBu_zv1ZPcu0jMwDQIA4H9YvMXu6a0-1762819388587-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.109.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.109.1 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA1xXzY4bxxG++ykKexRIQlKcRN7b2pIcBrItSwIUIL4Ue4ozJfZ0tau6yR35oofw + JU+Sex5FTxJUz3BJ6bLAcqa66+f7qfnjG4Ab7m5u4SYMWMKY4/oHfP/sw1v99WM3vvzX8x9//3mr + rx4f3te85WflZuURsvtAoZyjNkHGHKmwpPlxUMJCfuqTv//t6bMn3/3l2XftwSgdRQ/rc1l/u3my + Hjnx+unjp39dP/52/eTbJXwQDmQ3t/DvbwAA/mh/PdHU0f3NLTxenX8ZyQx7url9eAngRiX6Lzdo + xlYwzUkvD4OkQqnl/m6Q2g/lFraQ5AQBE/R8JEDovQDAZCdSgN/SS04Y4a79f+s//JYePbrbrp8r + HynBa1KThJE/Ugf/IIxlCKh0C2/oKLF6Y/gjpx5eY2FKBX6pJchIBu8G9RzgtVLHofjtdwnjVDjY + o0d+07uBDbgjhECpkBpIgjIQFMVke9ERW1gWz1X2cLcFTjA8ZLGC00BKkC9X4PkKoIS7SDBMmXSd + r8so3oLRk80Rk0FBjqL+QNr1nIpywELQkT8zv5tTx0fuKkbIS6kdFtzAa5ZEpN4DBwsmJoPIB4Lt + 9z/Beywmaekc1MKeghdSBIz3BcrSpiNagdHrwAhKQbSzFfSUZPRq/C6jYivA1EGInNqLRRkj7D25 + 1NsKguK+eCo7siwH8skHUi9LMXtmZfDxR+6Tty2wUZzgxGUAPBf2+dN/DGri36u3VvYcaQNveeSI + GqcVPCfKP3Hq2nvYHf2KzmuyyQqNBgMevXejJCvqdIHIe1obHluXMOOOI5cJdhNgCNXfiRPsRSmg + tfwx1EJw4C7RBJw+VPXkcSDsfBo2jbnIaIA5E3rzV4AxysljsxKNucHh3FB2fB0pOV43sE3G/VD2 + 9fyA6WRzEy7ggqwYCnuEQ9PbvvTHYOB+iH4EIDRIJFRtYP386c/TwJFgxDQBjTvF4PNuvcoqIxt5 + ocBjVjlSB3ImjN8wSKZVmxaB52DcJ95zwFQgiI8yGeBO6ow+yMpHDFOLpTK0Wq3QCbWzgXNrFSXj + r5qxcMt70Uh4Di3+riSoqSO1IOppwYgfRMEkMBWM0NEOC902qmRxxXEMzvQsAjvlrqfrRmIIZAY9 + ZoMjqVVrscp2mOFYE/3uXezYivKuqcrDgRINRpzgJGqUgO55RkjHllG5MNkGXoziQRgdnw9jcoXs + wPEOQcVMBTv7/OnPHSXac2nH7FVG2FEppD6fPokts/hCM2a5mahAr5hz9NAGmJoIjUCOpIC1yNjw + flV95yRjSXZpVpzAanbxaSkMdcQEH2rXuyht4JXIoaUm6oNseABKx3YMdbCvparLVIi1I4O77bpJ + JHVwZC3eyfl+eLCJuaIToTZR3LE4LEQNOop8nOVLCeO68EgwSuIiM61cOWLXOAk2ohbSFYyi5IBM + FL4ql4LMMrCBtzUMwCnJsWHtgv8iQGlw2YA+yg5j0weg+0yhYFrw7JBwkZB9e7yCEVtbiiO2SObQ + 2JuagilFOjpLGhMuaAAlH6I/MOACRWoYaAag49tPn/t/ohjXO2qFusRijNaYipDqLHE7jC3pHZUT + UYJQG4bW5IAvFIYkUfo5+zPZGrVss/hqLZJklOpDg7u+oZQT/OCO5BTdpq46B8hu4cV9xqbr8L3U + 1GFTQGeFlhX8VI3DbAZvi+hUKDomr4y1yUUu3tXYiDzIqSH0kgDOCfjYlGzA3OT5nMueKboHhQFj + pNSfO4O14bSNQrHjudWQZB6y7M9H+PDmgDKIuh5t4F3jszky0OCXTGnRxud3r179778v3PHI/aCZ + 6VwUHNkc1Khlpmuh+4Zyx1N2T1wk13vS4sRaVu7vxbVuMeR/1gN1FA4tq6u7/fed3F91J05zEQSi + 3LcVyST1toGfz0oPrSVnr7/bwvOaepIEerUZEdjVdJrfnV2qmxKOPsNmQTivLzlKgY6OFCXPG8pC + 7N0Ed9sNvHGKnkRjB02FwsIsjrE2r30giA2+XSwaAe9RE+mMmlayc03Zr8AI1eii31mlq4F87Fr8 + rjJMS2uLYjjMUvJuIPixonaMqR24k9hBrrtzTvOB6/M4O8hCRacVuGo3HrvuxGmJcTkPUtXI7Xkf + 62Irx7YvP/QYtbAVeEN7Pvg62UkEGvOA5r12hN9tvbbG4DOQB1H+KMlW8054kdz5NGt7n6+cgTxM + 0rIoNF0a+QKw4HYTDALOFtXMSylyUwVJfrlbFal0dFGhRgA/MpwZ4d4VfTF4yNH3LDJr77ufLYY8 + e61T1CUAguRJfe9YgZx8LRk4rx449nDFLIu8n5xjXwzhJHqwB0bTxU09JEYKZ15l0jLBXnGkFrOB + txJ49tcykNHFnH3RCw69j3TF/BVIptSALu4zrqQ7N2D3WW984DwDZVfLF9YYZByl4/3UBEcdxEVg + RLP1gs22NOVavjZ+rB1TcrwoenVNLGakUwrtuBP5PuDqn+RIcUnYKeQL+YGyJ2bj3NO2OzVqXM0y + MqVld3o5G/EVX50cS2nNoJXQOPVu+JIH73Voc7jbQpAYcSeKxdvTluJI85dGIc1KbS6hxlIVI7SP + u/uv/G1xptUX/bss0SfHJ/tnBc4FLpqGWpbFb7bRgL4tN6Ap+YRs/gbq66w83q59tYXVV0bXNiB/ + OvPJMiuXlS/gtjgEwh4tcJqV6WFNPg+h1WoUzopx2RZW/mWYSkOS1zzn0Pot+4W5HHy6OvNmsdgX + Zfg/AAAA//+MWctu5DYQvPsriDnLRmxsAiM3Z5E1jE1O8XUx4FAtDdcUKfMxGwfwvwfVTUmciQ85 + GQZHlNiP6qoid87Dk3oEKfMI0K/qt2KFxDyj0WcdyedOPRgTis8c+b+qeJGR/xxLyhX8JM3taGUc + AF8+0oRDnChlO3IA+VTTTPWc2C2GQ0m5aSZFPhWmXA9PZ2jP2ECzC2/yL8s0DjJzFYnySbtC2wzN + 64EMQrWciAWWhG7QNnpK6Ub9QZqDgBQqju4qVh9DGB2t2o4f/NOCNochi6KbRelK+QqvUYegWagK + QIY4am//qR3F+8qYbRQF97fN0i05gHnMwSeLHOCHmADNmEPdQyjFZCl1daDxLLU6gQxrwBIr5tHL + 1F9kqERSStjpH4qgewxJaEcgQmX6F7r9syPNmlA9PHU8WicUMHFueyBLZJqTSjyRdcIJ8YmRNHLi + +/oJdaxFy89zUY/anXS1TOrkq7oOHcmmjXK1B5cMP9vJ26we6RAL7/4VH/M56h9DiD3Q+mR7hhub + UHNcnq+okeAhQanvqVfajSHafMRXIXZrV6UcixGQgdBGW1GC+pkpF0YrJh8dpH4w0sisYJEKvLBT + 9FqsdFFPkKy8d6SxOKDbm5rIHLW3aWpGm0HSe+4TFAuzJgCDTDrIEk/ysrVCqmlQ2TcwFHgWqjon + 31+XtGj1C5mGt9FrkekAADkjwkvFVIVhp1kbtgdW8XYx/za60giFbazhzOOKPoq9wWq9WG9CnANT + 3B7KKyED+oWOwWE2VsLDRIxARNSkIzPQ2lf1rAlIiM5eDYQb9QXzTvtmPkDDQlRxliIdFhRkhaKt + z9r6phQzUA99Q74XN25DS+7b4iu96xQNg2ysJsvgt4n2yaaSqBKTvmeA9qP6Hg4r86lw97clEfCL + 5mWHpyrehJKy6ci/XQajNJIAfFNhLPUBRWdWiKGY2T6RGhOzKKnZngJ4bw5qLDpqn4kkO1UQVINA + pBGzFve2uRMk6Ih1eBhpC4V7U7B9J1gGZxO2J5pXMYoxIY7bylcqBmBnNnfqd6yeC/sRwdWxI/UJ + MDzZ6pYtikSPElrr1SLo9DI3xSxtxjdNLC+SDBb0VtTwnhCLagJNUh8283i9vr7Gn98Bc2zc2qQO + 2rwIRuAQkTKJkVDhW3N946Cpg0ZklFgBtxN6lhXgdg7WLw4nVCvI7CVMgItJcjuODv6eU6Ieqqb2 + LFQKJyAx3VqTVAmK0uoEU7dTU3HZDtoQm5Xa98nouYqixbobGJisiBMDWwue0pkd3dJfBqEVA5pk + NRQrqGjNcaNF3HMtBzqHPtm0ZQErB2XNIzaTCcXBJIqcSqokkyptDyU7lq3r61Ha7HOj+oBgjuXI + HJJm63sQdwglTEB99k6lzxZCb6dGiw6rWfh2015PRBpK0rgj8cW5ZkF71AU/jIuRb3Xlfb0KcWGc + Yziki0d3g/U2Hfcg2cHj2iPlMO949f1KqW985VLOblF24hnsc3ghft3t7e0n2XC33fU0yz+tyxmY + sa3c3d7ddx/sua+XBs3Fzc5oc6R+e3a75QEEhGbhqjn5fz/oo73l9NaP/2f7bcHAHKJ+v9ydtIfe + fhbpOzt8H/9sjTR/8C7BSTe0z5YistHToIur92oyYveDBX2ao5V7qmHefzJ39z/fDve/3O2u3q/+ + BQAA//8DAPcawNa2GwAA + headers: + CF-RAY: + - 99c9865b6af3cf4d-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 11 Nov 2025 00:03:32 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '22788' + openai-project: + - proj_xitITlrFeen7zjNSzML82h9x + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '22942' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-project-tokens: + - '150000000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-project-tokens: + - '149998392' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998392' + x-ratelimit-reset-project-tokens: + - 0s + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_48c359c72cdc47aeb89c6d6eeffdce7d + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/test_crew.py b/lib/crewai/tests/test_crew.py index 1a1db50af..d4cf1acbf 100644 --- a/lib/crewai/tests/test_crew.py +++ b/lib/crewai/tests/test_crew.py @@ -340,7 +340,7 @@ def test_sync_task_execution(researcher, writer): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -412,7 +412,7 @@ def test_manager_agent_delegating_to_assigned_task_agent(researcher, writer): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -513,7 +513,7 @@ def test_manager_agent_delegates_with_varied_role_cases(): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) task.output = mock_task_output @@ -611,7 +611,7 @@ def test_crew_with_delegating_agents_should_not_override_task_tools(ceo, writer) ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -669,7 +669,7 @@ def test_crew_with_delegating_agents_should_not_override_agent_tools(ceo, writer ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -788,7 +788,7 @@ def test_task_tools_override_agent_tools_with_allow_delegation(researcher, write ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # We mock execute_sync to verify which tools get used at runtime @@ -1225,7 +1225,7 @@ async def test_async_task_execution_call_count(researcher, writer): # Create a valid TaskOutput instance to mock the return value mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Create a MagicMock Future instance @@ -1784,7 +1784,7 @@ def test_hierarchical_kickoff_usage_metrics_include_manager(researcher): Task, "execute_sync", return_value=TaskOutput( - description="dummy", raw="Hello", agent=researcher.role + description="dummy", raw="Hello", agent=researcher.role, messages=[] ), ): crew.kickoff() @@ -1828,7 +1828,7 @@ def test_hierarchical_crew_creation_tasks_with_agents(researcher, writer): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -1881,7 +1881,7 @@ def test_hierarchical_crew_creation_tasks_with_async_execution(researcher, write ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Create a mock Future that returns our TaskOutput @@ -2246,11 +2246,13 @@ def test_conditional_task_uses_last_output(researcher, writer): description="First task output", raw="First success output", # Will be used by third task's condition agent=researcher.role, + messages=[], ) mock_third = TaskOutput( description="Third task output", raw="Third task executed", # Output when condition succeeds using first task output agent=writer.role, + messages=[], ) # Set up mocks for task execution and conditional logic @@ -2318,11 +2320,13 @@ def test_conditional_tasks_result_collection(researcher, writer): description="Success output", raw="Success output", # Triggers third task's condition agent=researcher.role, + messages=[], ) mock_conditional = TaskOutput( description="Conditional output", raw="Conditional task executed", agent=writer.role, + messages=[], ) # Set up mocks for task execution and conditional logic @@ -2399,6 +2403,7 @@ def test_multiple_conditional_tasks(researcher, writer): description="Mock success", raw="Success and proceed output", agent=researcher.role, + messages=[], ) # Set up mocks for task execution @@ -2806,7 +2811,7 @@ def test_manager_agent(researcher, writer): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Because we are mocking execute_sync, we never hit the underlying _execute_core @@ -3001,6 +3006,7 @@ def test_replay_feature(researcher, writer): output_format=OutputFormat.RAW, pydantic=None, summary="Mocked output for list of ideas", + messages=[], ) crew.kickoff() @@ -3052,6 +3058,7 @@ def test_crew_task_db_init(): output_format=OutputFormat.RAW, pydantic=None, summary="Write about AI in healthcare...", + messages=[], ) crew.kickoff() @@ -3114,6 +3121,7 @@ def test_replay_task_with_context(): output_format=OutputFormat.RAW, pydantic=None, summary="Detailed report on AI advancements...", + messages=[], ) mock_task_output2 = TaskOutput( description="Summarize the AI advancements report.", @@ -3123,6 +3131,7 @@ def test_replay_task_with_context(): output_format=OutputFormat.RAW, pydantic=None, summary="Summary of the AI advancements report...", + messages=[], ) mock_task_output3 = TaskOutput( description="Write an article based on the AI advancements summary.", @@ -3132,6 +3141,7 @@ def test_replay_task_with_context(): output_format=OutputFormat.RAW, pydantic=None, summary="Article on AI advancements...", + messages=[], ) mock_task_output4 = TaskOutput( description="Create a presentation based on the AI advancements article.", @@ -3141,6 +3151,7 @@ def test_replay_task_with_context(): output_format=OutputFormat.RAW, pydantic=None, summary="Presentation on AI advancements...", + messages=[], ) with patch.object(Task, "execute_sync") as mock_execute_task: @@ -3164,6 +3175,70 @@ def test_replay_task_with_context(): db_handler.reset() +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_replay_preserves_messages(): + """Test that replay preserves messages from stored task outputs.""" + from crewai.utilities.types import LLMMessage + + agent = Agent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + allow_delegation=False, + ) + + task = Task( + description="Say hello", + expected_output="A greeting", + agent=agent, + ) + + crew = Crew(agents=[agent], tasks=[task], process=Process.sequential) + + mock_messages: list[LLMMessage] = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Say hello"}, + {"role": "assistant", "content": "Hello!"}, + ] + + mock_task_output = TaskOutput( + description="Say hello", + raw="Hello!", + agent="Test Agent", + messages=mock_messages, + ) + + with patch.object(Task, "execute_sync", return_value=mock_task_output): + crew.kickoff() + + # Verify the task output was stored with messages + db_handler = TaskOutputStorageHandler() + stored_outputs = db_handler.load() + assert stored_outputs is not None + assert len(stored_outputs) > 0 + + # Verify messages are in the stored output + stored_output = stored_outputs[0]["output"] + assert "messages" in stored_output + assert len(stored_output["messages"]) == 3 + assert stored_output["messages"][0]["role"] == "system" + assert stored_output["messages"][1]["role"] == "user" + assert stored_output["messages"][2]["role"] == "assistant" + + # Replay the task and verify messages are preserved + with patch.object(Task, "execute_sync", return_value=mock_task_output): + replayed_output = crew.replay(str(task.id)) + + # Verify the replayed task output has messages + assert len(replayed_output.tasks_output) > 0 + replayed_task_output = replayed_output.tasks_output[0] + assert hasattr(replayed_task_output, "messages") + assert isinstance(replayed_task_output.messages, list) + assert len(replayed_task_output.messages) == 3 + + db_handler.reset() + + @pytest.mark.vcr(filter_headers=["authorization"]) def test_replay_with_context(): agent = Agent(role="test_agent", backstory="Test Description", goal="Test Goal") @@ -3181,6 +3256,7 @@ def test_replay_with_context(): pydantic=None, json_dict={}, output_format=OutputFormat.RAW, + messages=[], ) task1.output = context_output @@ -3241,6 +3317,7 @@ def test_replay_with_context_set_to_nullable(): description="Test Task Output", raw="test raw output", agent="test_agent", + messages=[], ) crew.kickoff() @@ -3264,6 +3341,7 @@ def test_replay_with_invalid_task_id(): pydantic=None, json_dict={}, output_format=OutputFormat.RAW, + messages=[], ) task1.output = context_output @@ -3328,6 +3406,7 @@ def test_replay_interpolates_inputs_properly(mock_interpolate_inputs): pydantic=None, json_dict={}, output_format=OutputFormat.RAW, + messages=[], ) task1.output = context_output @@ -3386,6 +3465,7 @@ def test_replay_setup_context(): pydantic=None, json_dict={}, output_format=OutputFormat.RAW, + messages=[], ) task1.output = context_output crew = Crew(agents=[agent], tasks=[task1, task2], process=Process.sequential) @@ -3619,6 +3699,7 @@ def test_conditional_should_skip(researcher, writer): description="Task 1 description", raw="Task 1 output", agent="Researcher", + messages=[], ) result = crew_met.kickoff() @@ -3653,6 +3734,7 @@ def test_conditional_should_execute(researcher, writer): description="Task 1 description", raw="Task 1 output", agent="Researcher", + messages=[], ) crew_met.kickoff() @@ -3824,7 +3906,7 @@ def test_task_tools_preserve_code_execution_tools(): ) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) with patch.object( @@ -3878,7 +3960,7 @@ def test_multimodal_flag_adds_multimodal_tools(): crew = Crew(agents=[multimodal_agent], tasks=[task], process=Process.sequential) mock_task_output = TaskOutput( - description="Mock description", raw="mocked output", agent="mocked agent" + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) # Mock execute_sync to verify the tools passed at runtime @@ -3942,6 +4024,7 @@ def test_multimodal_agent_image_tool_handling(): description="Mock description", raw="A detailed analysis of the image", agent="Image Analyst", + messages=[], ) with patch.object(Task, "execute_sync") as mock_execute_sync: diff --git a/lib/crewai/tests/test_task.py b/lib/crewai/tests/test_task.py index 73fedfc88..72fe23b4b 100644 --- a/lib/crewai/tests/test_task.py +++ b/lib/crewai/tests/test_task.py @@ -162,6 +162,7 @@ def test_task_callback_returns_task_output(): "name": task.name or task.description, "expected_output": "Bullet point list of 5 interesting ideas.", "output_format": OutputFormat.RAW, + "messages": [], } assert output_dict == expected_output @@ -1680,3 +1681,44 @@ def test_task_copy_with_list_context(): assert isinstance(copied_task2.context, list) assert len(copied_task2.context) == 1 assert copied_task2.context[0] is task1 + + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_task_output_includes_messages(): + """Test that TaskOutput includes messages from agent execution.""" + researcher = Agent( + role="Researcher", + goal="Make the best research and analysis on content about AI and AI agents", + backstory="You're an expert researcher, specialized in technology, software engineering, AI and startups. You work as a freelancer and is now working on doing research and analysis for a new customer.", + allow_delegation=False, + ) + + task1 = Task( + description="Give me a list of 3 interesting ideas about AI.", + expected_output="Bullet point list of 3 ideas.", + agent=researcher, + ) + + task2 = Task( + description="Summarize the ideas from the previous task.", + expected_output="A summary of the ideas.", + agent=researcher, + ) + + crew = Crew(agents=[researcher], tasks=[task1, task2], process=Process.sequential) + result = crew.kickoff() + + # Verify both tasks have messages + assert len(result.tasks_output) == 2 + + # Check first task output has messages + task1_output = result.tasks_output[0] + assert hasattr(task1_output, "messages") + assert isinstance(task1_output.messages, list) + assert len(task1_output.messages) > 0 + + # Check second task output has messages + task2_output = result.tasks_output[1] + assert hasattr(task2_output, "messages") + assert isinstance(task2_output.messages, list) + assert len(task2_output.messages) > 0 diff --git a/lib/crewai/tests/test_task_guardrails.py b/lib/crewai/tests/test_task_guardrails.py index 22572bfd3..dd24458d3 100644 --- a/lib/crewai/tests/test_task_guardrails.py +++ b/lib/crewai/tests/test_task_guardrails.py @@ -38,6 +38,7 @@ def test_task_without_guardrail(): agent.role = "test_agent" agent.execute_task.return_value = "test result" agent.crew = None + agent.last_messages = [] task = create_smart_task(description="Test task", expected_output="Output") @@ -56,6 +57,7 @@ def test_task_with_successful_guardrail_func(): agent.role = "test_agent" agent.execute_task.return_value = "test result" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test task", expected_output="Output", guardrail=guardrail @@ -76,6 +78,7 @@ def test_task_with_failing_guardrail(): agent.role = "test_agent" agent.execute_task.side_effect = ["bad result", "good result"] agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test task", @@ -103,6 +106,7 @@ def test_task_with_guardrail_retries(): agent.role = "test_agent" agent.execute_task.return_value = "bad result" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test task", @@ -128,6 +132,7 @@ def test_guardrail_error_in_context(): agent = Mock() agent.role = "test_agent" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test task", @@ -295,6 +300,7 @@ def test_hallucination_guardrail_integration(): agent.role = "test_agent" agent.execute_task.return_value = "test result" agent.crew = None + agent.last_messages = [] mock_llm = Mock(spec=LLM) guardrail = HallucinationGuardrail( @@ -342,6 +348,7 @@ def test_multiple_guardrails_sequential_processing(): agent.role = "sequential_agent" agent.execute_task.return_value = "original text" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test sequential guardrails", @@ -391,6 +398,7 @@ def test_multiple_guardrails_with_validation_failure(): agent.role = "validation_agent" agent.execute_task = mock_execute_task agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test guardrails with validation", @@ -432,6 +440,7 @@ def test_multiple_guardrails_with_mixed_string_and_taskoutput(): agent.role = "mixed_agent" agent.execute_task.return_value = "original" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test mixed return types", @@ -469,6 +478,7 @@ def test_multiple_guardrails_with_retry_on_middle_guardrail(): agent.role = "retry_agent" agent.execute_task.return_value = "base" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test retry in middle guardrail", @@ -500,6 +510,7 @@ def test_multiple_guardrails_with_max_retries_exceeded(): agent.role = "failing_agent" agent.execute_task.return_value = "test" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test max retries with multiple guardrails", @@ -523,6 +534,7 @@ def test_multiple_guardrails_empty_list(): agent.role = "empty_agent" agent.execute_task.return_value = "no guardrails" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test empty guardrails list", @@ -582,6 +594,7 @@ def test_multiple_guardrails_processing_order(): agent.role = "order_agent" agent.execute_task.return_value = "base" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test processing order", @@ -625,6 +638,7 @@ def test_multiple_guardrails_with_pydantic_output(): agent.role = "pydantic_agent" agent.execute_task.return_value = "test content" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test guardrails with Pydantic", @@ -658,6 +672,7 @@ def test_guardrails_vs_single_guardrail_mutual_exclusion(): agent.role = "exclusion_agent" agent.execute_task.return_value = "test" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test mutual exclusion", @@ -700,6 +715,7 @@ def test_per_guardrail_independent_retry_tracking(): agent.role = "independent_retry_agent" agent.execute_task.return_value = "base" agent.crew = None + agent.last_messages = [] task = create_smart_task( description="Test independent retry tracking", From 01f0111d52d6e09fb23207ca104a856aa3d970d8 Mon Sep 17 00:00:00 2001 From: Rip&Tear <84775494+theCyberTech@users.noreply.github.com> Date: Tue, 11 Nov 2025 12:14:16 +0800 Subject: [PATCH 3/4] dependabot.yml creation (#3868) * dependabot.yml creation * Configure dependabot for pip package updates Co-authored-by: matt * Fix Dependabot package ecosystem * Refactor: Use uv package-ecosystem in dependabot Co-authored-by: matt * fix: ensure dependabot uses uv ecosystem --------- Co-authored-by: Greyson LaLonde Co-authored-by: Cursor Agent Co-authored-by: matt --- .github/dependabot.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..624c00413 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: uv # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" From d8fe83f76c4bf48b18be1f3013084f18c0671ff2 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Mon, 10 Nov 2025 16:05:23 -0500 Subject: [PATCH 4/4] chore: continue refactoring llms to base models --- lib/crewai/src/crewai/__init__.py | 2 +- lib/crewai/src/crewai/cli/crew_chat.py | 3 +- lib/crewai/src/crewai/crew.py | 2 +- .../src/crewai/events/event_listener.py | 2 +- .../experimental/evaluation/base_evaluator.py | 2 +- lib/crewai/src/crewai/lite_agent.py | 2 +- lib/crewai/src/crewai/llm/base_llm.py | 16 +- lib/crewai/src/crewai/llm/internal/meta.py | 31 ++- .../llm/providers/anthropic/completion.py | 137 +++++------- .../crewai/llm/providers/azure/completion.py | 152 +++++++------ .../llm/providers/bedrock/completion.py | 198 ++++++++--------- .../crewai/llm/providers/gemini/completion.py | 203 +++++++++--------- .../crewai/llm/providers/openai/completion.py | 20 +- .../crewai/tasks/hallucination_guardrail.py | 2 +- lib/crewai/src/crewai/tools/tool_usage.py | 2 +- lib/crewai/tests/test_llm.py | 14 +- logs.txt | 20 -- 17 files changed, 379 insertions(+), 429 deletions(-) delete mode 100644 logs.txt diff --git a/lib/crewai/src/crewai/__init__.py b/lib/crewai/src/crewai/__init__.py index ef2bcf78d..4e2365a2f 100644 --- a/lib/crewai/src/crewai/__init__.py +++ b/lib/crewai/src/crewai/__init__.py @@ -8,8 +8,8 @@ from crewai.crew import Crew from crewai.crews.crew_output import CrewOutput from crewai.flow.flow import Flow from crewai.knowledge.knowledge import Knowledge -from crewai.llm import LLM from crewai.llm.base_llm import BaseLLM +from crewai.llm.core import LLM from crewai.process import Process from crewai.task import Task from crewai.tasks.llm_guardrail import LLMGuardrail diff --git a/lib/crewai/src/crewai/cli/crew_chat.py b/lib/crewai/src/crewai/cli/crew_chat.py index feca9e4ca..f59318086 100644 --- a/lib/crewai/src/crewai/cli/crew_chat.py +++ b/lib/crewai/src/crewai/cli/crew_chat.py @@ -14,7 +14,8 @@ import tomli from crewai.cli.utils import read_toml from crewai.cli.version import get_crewai_version from crewai.crew import Crew -from crewai.llm import LLM, BaseLLM +from crewai.llm import LLM +from crewai.llm.base_llm import BaseLLM from crewai.types.crew_chat import ChatInputField, ChatInputs from crewai.utilities.llm_utils import create_llm from crewai.utilities.printer import Printer diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py index 31eb7466c..b258f3eaa 100644 --- a/lib/crewai/src/crewai/crew.py +++ b/lib/crewai/src/crewai/crew.py @@ -56,8 +56,8 @@ from crewai.events.types.crew_events import ( from crewai.flow.flow_trackable import FlowTrackable from crewai.knowledge.knowledge import Knowledge from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource -from crewai.llm import LLM from crewai.llm.base_llm import BaseLLM +from crewai.llm.core import LLM from crewai.memory.entity.entity_memory import EntityMemory from crewai.memory.external.external_memory import ExternalMemory from crewai.memory.long_term.long_term_memory import LongTermMemory diff --git a/lib/crewai/src/crewai/events/event_listener.py b/lib/crewai/src/crewai/events/event_listener.py index e07ee193c..0a88e85f0 100644 --- a/lib/crewai/src/crewai/events/event_listener.py +++ b/lib/crewai/src/crewai/events/event_listener.py @@ -89,7 +89,7 @@ from crewai.events.types.tool_usage_events import ( ToolUsageStartedEvent, ) from crewai.events.utils.console_formatter import ConsoleFormatter -from crewai.llm import LLM +from crewai.llm.core import LLM from crewai.task import Task from crewai.telemetry.telemetry import Telemetry from crewai.utilities import Logger diff --git a/lib/crewai/src/crewai/experimental/evaluation/base_evaluator.py b/lib/crewai/src/crewai/experimental/evaluation/base_evaluator.py index 69d1bb5c3..8001074d3 100644 --- a/lib/crewai/src/crewai/experimental/evaluation/base_evaluator.py +++ b/lib/crewai/src/crewai/experimental/evaluation/base_evaluator.py @@ -7,7 +7,7 @@ from pydantic import BaseModel, Field from crewai.agent import Agent from crewai.agents.agent_builder.base_agent import BaseAgent -from crewai.llm import BaseLLM +from crewai.llm.base_llm import BaseLLM from crewai.task import Task from crewai.utilities.llm_utils import create_llm diff --git a/lib/crewai/src/crewai/lite_agent.py b/lib/crewai/src/crewai/lite_agent.py index ef877e01b..e91e6b98f 100644 --- a/lib/crewai/src/crewai/lite_agent.py +++ b/lib/crewai/src/crewai/lite_agent.py @@ -39,8 +39,8 @@ from crewai.events.types.agent_events import ( from crewai.events.types.logging_events import AgentLogsExecutionEvent from crewai.flow.flow_trackable import FlowTrackable from crewai.lite_agent_output import LiteAgentOutput -from crewai.llm import LLM from crewai.llm.base_llm import BaseLLM +from crewai.llm.core import LLM from crewai.tools.base_tool import BaseTool from crewai.tools.structured_tool import CrewStructuredTool from crewai.utilities.agent_utils import ( diff --git a/lib/crewai/src/crewai/llm/base_llm.py b/lib/crewai/src/crewai/llm/base_llm.py index 122276345..f60ce500e 100644 --- a/lib/crewai/src/crewai/llm/base_llm.py +++ b/lib/crewai/src/crewai/llm/base_llm.py @@ -66,7 +66,7 @@ class BaseLLM(BaseModel, ABC, metaclass=LLMMeta): """ model_config: ClassVar[ConfigDict] = ConfigDict( - arbitrary_types_allowed=True, extra="allow", validate_assignment=True + arbitrary_types_allowed=True, extra="allow" ) # Core fields @@ -80,7 +80,9 @@ class BaseLLM(BaseModel, ABC, metaclass=LLMMeta): default="openai", description="Provider name (openai, anthropic, etc.)" ) stop: list[str] = Field( - default_factory=list, description="Stop sequences for generation" + default_factory=list, + description="Stop sequences for generation", + validation_alias="stop_sequences", ) # Internal fields @@ -112,16 +114,18 @@ class BaseLLM(BaseModel, ABC, metaclass=LLMMeta): if not values.get("model"): raise ValueError("Model name is required and cannot be empty") - # Handle stop sequences - stop = values.get("stop") + stop = values.get("stop") or values.get("stop_sequences") if stop is None: values["stop"] = [] elif isinstance(stop, str): values["stop"] = [stop] - elif not isinstance(stop, list): + elif isinstance(stop, list): + values["stop"] = stop + else: values["stop"] = [] - # Set default provider if not specified + values.pop("stop_sequences", None) + if "provider" not in values or values["provider"] is None: values["provider"] = "openai" diff --git a/lib/crewai/src/crewai/llm/internal/meta.py b/lib/crewai/src/crewai/llm/internal/meta.py index 4bf83b655..c12b1e9cc 100644 --- a/lib/crewai/src/crewai/llm/internal/meta.py +++ b/lib/crewai/src/crewai/llm/internal/meta.py @@ -33,13 +33,12 @@ class LLMMeta(ModelMetaclass): native provider implementation based on the model parameter. """ - def __call__(cls, model: str, is_litellm: bool = False, **kwargs: Any) -> Any: # noqa: N805 + def __call__(cls, *args: Any, **kwargs: Any) -> Any: # noqa: N805 """Route to appropriate provider implementation at instantiation time. Args: - model: The model identifier (e.g., "gpt-4", "claude-3-opus") - is_litellm: Force use of LiteLLM instead of native provider - **kwargs: Additional parameters for the LLM + *args: Positional arguments (model should be first for LLM class) + **kwargs: Keyword arguments including model, is_litellm, etc. Returns: Instance of the appropriate provider class or LLM class @@ -47,18 +46,18 @@ class LLMMeta(ModelMetaclass): Raises: ValueError: If model is not a valid string """ + if cls.__name__ != "LLM": + return super().__call__(*args, **kwargs) + + model = kwargs.get("model") or (args[0] if args else None) + is_litellm = kwargs.get("is_litellm", False) + if not model or not isinstance(model, str): raise ValueError("Model must be a non-empty string") - # Only perform routing if called on the base LLM class - # Subclasses (OpenAICompletion, etc.) should create normally - from crewai.llm import LLM - - if cls is not LLM: - # Direct instantiation of provider class, skip routing - return super().__call__(model=model, **kwargs) - - # Extract provider information + if args and not kwargs.get("model"): + kwargs["model"] = args[0] + args = args[1:] explicit_provider = kwargs.get("provider") if explicit_provider: @@ -97,12 +96,10 @@ class LLMMeta(ModelMetaclass): use_native = True model_string = model - # Route to native provider if available native_class = cls._get_native_provider(provider) if use_native else None if native_class and not is_litellm and provider in SUPPORTED_NATIVE_PROVIDERS: try: - # Remove 'provider' from kwargs to avoid duplicate keyword argument - kwargs_copy = {k: v for k, v in kwargs.items() if k != "provider"} + kwargs_copy = {k: v for k, v in kwargs.items() if k not in ("provider", "model")} return native_class( model=model_string, provider=provider, **kwargs_copy ) @@ -111,14 +108,12 @@ class LLMMeta(ModelMetaclass): except Exception as e: raise ImportError(f"Error importing native provider: {e}") from e - # Fallback to LiteLLM try: import litellm # noqa: F401 except ImportError: logging.error("LiteLLM is not available, falling back to LiteLLM") raise ImportError("Fallback to LiteLLM is not available") from None - # Create actual LLM instance with is_litellm=True return super().__call__(model=model, is_litellm=True, **kwargs) @staticmethod diff --git a/lib/crewai/src/crewai/llm/providers/anthropic/completion.py b/lib/crewai/src/crewai/llm/providers/anthropic/completion.py index 8ebba1673..dd13c0f5e 100644 --- a/lib/crewai/src/crewai/llm/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llm/providers/anthropic/completion.py @@ -3,9 +3,10 @@ from __future__ import annotations import json import logging import os -from typing import TYPE_CHECKING, Any, ClassVar, cast +from typing import Any, ClassVar, cast -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator +from typing_extensions import Self from crewai.events.types.llm_events import LLMCallType from crewai.llm.base_llm import BaseLLM @@ -19,9 +20,6 @@ from crewai.utilities.exceptions.context_window_exceeding_exception import ( from crewai.utilities.types import LLMMessage -if TYPE_CHECKING: - from crewai.llm.hooks.base import BaseInterceptor - try: from anthropic import Anthropic from anthropic.types import Message @@ -38,90 +36,67 @@ class AnthropicCompletion(BaseLLM): This class provides direct integration with the Anthropic Python SDK, offering native tool use, streaming support, and proper message formatting. + + Attributes: + model: Anthropic model name (e.g., 'claude-3-5-sonnet-20241022') + base_url: Custom base URL for Anthropic API + timeout: Request timeout in seconds + max_retries: Maximum number of retries + max_tokens: Maximum tokens in response (required for Anthropic) + top_p: Nucleus sampling parameter + stream: Enable streaming responses + client_params: Additional parameters for the Anthropic client + interceptor: HTTP interceptor for modifying requests/responses at transport level """ - model_config: ClassVar[ConfigDict] = ConfigDict(ignored_types=(property,)) + model_config: ClassVar[ConfigDict] = ConfigDict( + ignored_types=(property,), arbitrary_types_allowed=True + ) - def __init__( - self, - model: str = "claude-3-5-sonnet-20241022", - api_key: str | None = None, - base_url: str | None = None, - timeout: float | None = None, - max_retries: int = 2, - temperature: float | None = None, - max_tokens: int = 4096, # Required for Anthropic - top_p: float | None = None, - stop_sequences: list[str] | None = None, - stream: bool = False, - client_params: dict[str, Any] | None = None, - interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None, - **kwargs: Any, - ): - """Initialize Anthropic chat completion client. + base_url: str | None = Field( + default=None, description="Custom base URL for Anthropic API" + ) + timeout: float | None = Field( + default=None, description="Request timeout in seconds" + ) + max_retries: int = Field(default=2, description="Maximum number of retries") + max_tokens: int = Field( + default=4096, description="Maximum tokens in response (required for Anthropic)" + ) + top_p: float | None = Field(default=None, description="Nucleus sampling parameter") + stream: bool = Field(default=False, description="Enable streaming responses") + client_params: dict[str, Any] | None = Field( + default=None, description="Additional Anthropic client parameters" + ) + interceptor: Any = Field( + default=None, description="HTTP interceptor for request/response modification" + ) + client: Any = Field( + default=None, exclude=True, description="Anthropic client instance" + ) - Args: - model: Anthropic model name (e.g., 'claude-3-5-sonnet-20241022') - api_key: Anthropic API key (defaults to ANTHROPIC_API_KEY env var) - base_url: Custom base URL for Anthropic API - timeout: Request timeout in seconds - max_retries: Maximum number of retries - temperature: Sampling temperature (0-1) - max_tokens: Maximum tokens in response (required for Anthropic) - top_p: Nucleus sampling parameter - stop_sequences: Stop sequences (Anthropic uses stop_sequences, not stop) - stream: Enable streaming responses - client_params: Additional parameters for the Anthropic client - interceptor: HTTP interceptor for modifying requests/responses at transport level. - **kwargs: Additional parameters - """ - super().__init__( - model=model, temperature=temperature, stop=stop_sequences or [], **kwargs - ) - - # Client params - self.interceptor = interceptor - self.client_params = client_params - self.base_url = base_url - self.timeout = timeout - self.max_retries = max_retries + _is_claude_3: bool = PrivateAttr(default=False) + _supports_tools: bool = PrivateAttr(default=False) + @model_validator(mode="after") + def setup_client(self) -> Self: + """Initialize the Anthropic client and model-specific settings.""" self.client = Anthropic(**self._get_client_params()) - # Store completion parameters - self.max_tokens = max_tokens - self.top_p = top_p - self.stream = stream - self.stop_sequences = stop_sequences or [] + self._is_claude_3 = "claude-3" in self.model.lower() + self._supports_tools = self._is_claude_3 - # Model-specific settings - self.is_claude_3 = "claude-3" in model.lower() - self.supports_tools = self.is_claude_3 # Claude 3+ supports tool use + return self - # - # @property - # def stop(self) -> list[str]: # type: ignore[misc] - # """Get stop sequences sent to the API.""" - # return self.stop_sequences + @property + def is_claude_3(self) -> bool: + """Check if model is Claude 3.""" + return self._is_claude_3 - # @stop.setter - # def stop(self, value: list[str] | str | None) -> None: - # """Set stop sequences. - # - # Synchronizes stop_sequences to ensure values set by CrewAgentExecutor - # are properly sent to the Anthropic API. - # - # Args: - # value: Stop sequences as a list, single string, or None - # """ - # if value is None: - # self.stop_sequences = [] - # elif isinstance(value, str): - # self.stop_sequences = [value] - # elif isinstance(value, list): - # self.stop_sequences = value - # else: - # self.stop_sequences = [] + @property + def supports_tools(self) -> bool: + """Check if model supports tools.""" + return self._supports_tools def _get_client_params(self) -> dict[str, Any]: """Get client parameters.""" @@ -250,8 +225,8 @@ class AnthropicCompletion(BaseLLM): params["temperature"] = self.temperature if self.top_p is not None: params["top_p"] = self.top_p - if self.stop_sequences: - params["stop_sequences"] = self.stop_sequences + if self.stop: + params["stop_sequences"] = self.stop # Handle tools for Claude 3+ if tools and self.supports_tools: diff --git a/lib/crewai/src/crewai/llm/providers/azure/completion.py b/lib/crewai/src/crewai/llm/providers/azure/completion.py index a389c1825..6076b9f4d 100644 --- a/lib/crewai/src/crewai/llm/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llm/providers/azure/completion.py @@ -3,9 +3,10 @@ from __future__ import annotations import json import logging import os -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, ClassVar -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator +from typing_extensions import Self from crewai.llm.core import CONTEXT_WINDOW_USAGE_RATIO, LLM_CONTEXT_WINDOW_SIZES from crewai.llm.providers.utils.common import safe_tool_conversion @@ -17,7 +18,6 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai.llm.hooks.base import BaseInterceptor from crewai.tools.base_tool import BaseTool @@ -51,65 +51,77 @@ class AzureCompletion(BaseLLM): This class provides direct integration with the Azure AI Inference Python SDK, offering native function calling, streaming support, and proper Azure authentication. + + Attributes: + model: Azure deployment name or model name + endpoint: Azure endpoint URL + api_version: Azure API version + timeout: Request timeout in seconds + max_retries: Maximum number of retries + top_p: Nucleus sampling parameter + frequency_penalty: Frequency penalty (-2 to 2) + presence_penalty: Presence penalty (-2 to 2) + max_tokens: Maximum tokens in response + stream: Enable streaming responses + interceptor: HTTP interceptor (not yet supported for Azure) """ - def __init__( - self, - model: str, - api_key: str | None = None, - endpoint: str | None = None, - api_version: str | None = None, - timeout: float | None = None, - max_retries: int = 2, - temperature: float | None = None, - top_p: float | None = None, - frequency_penalty: float | None = None, - presence_penalty: float | None = None, - max_tokens: int | None = None, - stop: list[str] | None = None, - stream: bool = False, - interceptor: BaseInterceptor[Any, Any] | None = None, - **kwargs: Any, - ): - """Initialize Azure AI Inference chat completion client. + model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True) - Args: - model: Azure deployment name or model name - api_key: Azure API key (defaults to AZURE_API_KEY env var) - endpoint: Azure endpoint URL (defaults to AZURE_ENDPOINT env var) - api_version: Azure API version (defaults to AZURE_API_VERSION env var) - timeout: Request timeout in seconds - max_retries: Maximum number of retries - temperature: Sampling temperature (0-2) - top_p: Nucleus sampling parameter - frequency_penalty: Frequency penalty (-2 to 2) - presence_penalty: Presence penalty (-2 to 2) - max_tokens: Maximum tokens in response - stop: Stop sequences - stream: Enable streaming responses - interceptor: HTTP interceptor (not yet supported for Azure). - **kwargs: Additional parameters - """ - if interceptor is not None: + endpoint: str | None = Field( + default=None, + description="Azure endpoint URL (defaults to AZURE_ENDPOINT env var)", + ) + api_version: str = Field( + default="2024-06-01", + description="Azure API version (defaults to AZURE_API_VERSION env var or 2024-06-01)", + ) + timeout: float | None = Field( + default=None, description="Request timeout in seconds" + ) + max_retries: int = Field(default=2, description="Maximum number of retries") + top_p: float | None = Field(default=None, description="Nucleus sampling parameter") + frequency_penalty: float | None = Field( + default=None, description="Frequency penalty (-2 to 2)" + ) + presence_penalty: float | None = Field( + default=None, description="Presence penalty (-2 to 2)" + ) + max_tokens: int | None = Field( + default=None, description="Maximum tokens in response" + ) + stream: bool = Field(default=False, description="Enable streaming responses") + interceptor: Any = Field( + default=None, description="HTTP interceptor (not yet supported for Azure)" + ) + client: Any = Field(default=None, exclude=True, description="Azure client instance") + + _is_openai_model: bool = PrivateAttr(default=False) + _is_azure_openai_endpoint: bool = PrivateAttr(default=False) + + @model_validator(mode="after") + def setup_client(self) -> Self: + """Initialize the Azure client and validate configuration.""" + if self.interceptor is not None: raise NotImplementedError( "HTTP interceptors are not yet supported for Azure AI Inference provider. " "Interceptors are currently supported for OpenAI and Anthropic providers only." ) - super().__init__( - model=model, temperature=temperature, stop=stop or [], **kwargs - ) + if self.api_key is None: + self.api_key = os.getenv("AZURE_API_KEY") - self.api_key = api_key or os.getenv("AZURE_API_KEY") - self.endpoint = ( - endpoint - or os.getenv("AZURE_ENDPOINT") - or os.getenv("AZURE_OPENAI_ENDPOINT") - or os.getenv("AZURE_API_BASE") - ) - self.api_version = api_version or os.getenv("AZURE_API_VERSION") or "2024-06-01" - self.timeout = timeout - self.max_retries = max_retries + if self.endpoint is None: + self.endpoint = ( + os.getenv("AZURE_ENDPOINT") + or os.getenv("AZURE_OPENAI_ENDPOINT") + or os.getenv("AZURE_API_BASE") + ) + + if self.api_version == "2024-06-01": + env_version = os.getenv("AZURE_API_VERSION") + if env_version: + self.api_version = env_version if not self.api_key: raise ValueError( @@ -120,36 +132,38 @@ class AzureCompletion(BaseLLM): "Azure endpoint is required. Set AZURE_ENDPOINT environment variable or pass endpoint parameter." ) - # Validate and potentially fix Azure OpenAI endpoint URL - self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model) + self.endpoint = self._validate_and_fix_endpoint(self.endpoint, self.model) - # Build client kwargs - client_kwargs = { + client_kwargs: dict[str, Any] = { "endpoint": self.endpoint, "credential": AzureKeyCredential(self.api_key), } - # Add api_version if specified (primarily for Azure OpenAI endpoints) if self.api_version: client_kwargs["api_version"] = self.api_version - self.client = ChatCompletionsClient(**client_kwargs) # type: ignore[arg-type] + self.client = ChatCompletionsClient(**client_kwargs) - self.top_p = top_p - self.frequency_penalty = frequency_penalty - self.presence_penalty = presence_penalty - self.max_tokens = max_tokens - self.stream = stream - - self.is_openai_model = any( - prefix in model.lower() for prefix in ["gpt-", "o1-", "text-"] + self._is_openai_model = any( + prefix in self.model.lower() for prefix in ["gpt-", "o1-", "text-"] ) - - self.is_azure_openai_endpoint = ( + self._is_azure_openai_endpoint = ( "openai.azure.com" in self.endpoint and "/openai/deployments/" in self.endpoint ) + return self + + @property + def is_openai_model(self) -> bool: + """Check if model is an OpenAI model.""" + return self._is_openai_model + + @property + def is_azure_openai_endpoint(self) -> bool: + """Check if endpoint is an Azure OpenAI endpoint.""" + return self._is_azure_openai_endpoint + def _validate_and_fix_endpoint(self, endpoint: str, model: str) -> str: """Validate and fix Azure endpoint URL format. diff --git a/lib/crewai/src/crewai/llm/providers/bedrock/completion.py b/lib/crewai/src/crewai/llm/providers/bedrock/completion.py index f67414c63..ed6738c4b 100644 --- a/lib/crewai/src/crewai/llm/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llm/providers/bedrock/completion.py @@ -5,8 +5,8 @@ import logging import os from typing import TYPE_CHECKING, Any, ClassVar, TypedDict, cast -from pydantic import BaseModel, ConfigDict -from typing_extensions import Required +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator +from typing_extensions import Required, Self from crewai.events.types.llm_events import LLMCallType from crewai.llm.base_llm import BaseLLM @@ -32,8 +32,6 @@ if TYPE_CHECKING: ToolTypeDef, ) - from crewai.llm.hooks.base import BaseInterceptor - try: from boto3.session import Session @@ -143,76 +141,86 @@ class BedrockCompletion(BaseLLM): - Complete streaming event handling (messageStart, contentBlockStart, etc.) - Response metadata and trace information capture - Model-specific conversation format handling (e.g., Cohere requirements) + + Attributes: + model: The Bedrock model ID to use + aws_access_key_id: AWS access key (defaults to environment variable) + aws_secret_access_key: AWS secret key (defaults to environment variable) + aws_session_token: AWS session token for temporary credentials + region_name: AWS region name + max_tokens: Maximum tokens to generate + top_p: Nucleus sampling parameter + top_k: Top-k sampling parameter (Claude models only) + stop_sequences: List of sequences that stop generation + stream: Whether to use streaming responses + guardrail_config: Guardrail configuration for content filtering + additional_model_request_fields: Model-specific request parameters + additional_model_response_field_paths: Custom response field paths + interceptor: HTTP interceptor (not yet supported for Bedrock) """ - model_config: ClassVar[ConfigDict] = ConfigDict(ignored_types=(property,)) + model_config: ClassVar[ConfigDict] = ConfigDict( + ignored_types=(property,), arbitrary_types_allowed=True + ) - def __init__( - self, - model: str = "anthropic.claude-3-5-sonnet-20241022-v2:0", - aws_access_key_id: str | None = None, - aws_secret_access_key: str | None = None, - aws_session_token: str | None = None, - region_name: str = "us-east-1", - temperature: float | None = None, - max_tokens: int | None = None, - top_p: float | None = None, - top_k: int | None = None, - stop_sequences: Sequence[str] | None = None, - stream: bool = False, - guardrail_config: dict[str, Any] | None = None, - additional_model_request_fields: dict[str, Any] | None = None, - additional_model_response_field_paths: list[str] | None = None, - interceptor: BaseInterceptor[Any, Any] | None = None, - **kwargs: Any, - ) -> None: - """Initialize AWS Bedrock completion client. + aws_access_key_id: str | None = Field( + default=None, description="AWS access key (defaults to environment variable)" + ) + aws_secret_access_key: str | None = Field( + default=None, description="AWS secret key (defaults to environment variable)" + ) + aws_session_token: str | None = Field( + default=None, description="AWS session token for temporary credentials" + ) + region_name: str = Field(default="us-east-1", description="AWS region name") + max_tokens: int | None = Field( + default=None, description="Maximum tokens to generate" + ) + top_p: float | None = Field(default=None, description="Nucleus sampling parameter") + top_k: int | None = Field( + default=None, description="Top-k sampling parameter (Claude models only)" + ) + stream: bool = Field( + default=False, description="Whether to use streaming responses" + ) + guardrail_config: dict[str, Any] | None = Field( + default=None, description="Guardrail configuration for content filtering" + ) + additional_model_request_fields: dict[str, Any] | None = Field( + default=None, description="Model-specific request parameters" + ) + additional_model_response_field_paths: list[str] | None = Field( + default=None, description="Custom response field paths" + ) + interceptor: Any = Field( + default=None, description="HTTP interceptor (not yet supported for Bedrock)" + ) + client: Any = Field( + default=None, exclude=True, description="Bedrock client instance" + ) - Args: - model: The Bedrock model ID to use - aws_access_key_id: AWS access key (defaults to environment variable) - aws_secret_access_key: AWS secret key (defaults to environment variable) - aws_session_token: AWS session token for temporary credentials - region_name: AWS region name - temperature: Sampling temperature for response generation - max_tokens: Maximum tokens to generate - top_p: Nucleus sampling parameter - top_k: Top-k sampling parameter (Claude models only) - stop_sequences: List of sequences that stop generation - stream: Whether to use streaming responses - guardrail_config: Guardrail configuration for content filtering - additional_model_request_fields: Model-specific request parameters - additional_model_response_field_paths: Custom response field paths - interceptor: HTTP interceptor (not yet supported for Bedrock). - **kwargs: Additional parameters - """ - if interceptor is not None: + _is_claude_model: bool = PrivateAttr(default=False) + _supports_tools: bool = PrivateAttr(default=True) + _supports_streaming: bool = PrivateAttr(default=True) + _model_id: str = PrivateAttr() + + @model_validator(mode="after") + def setup_client(self) -> Self: + """Initialize the Bedrock client and validate configuration.""" + if self.interceptor is not None: raise NotImplementedError( "HTTP interceptors are not yet supported for AWS Bedrock provider. " "Interceptors are currently supported for OpenAI and Anthropic providers only." ) - # Extract provider from kwargs to avoid duplicate argument - kwargs.pop("provider", None) - - super().__init__( - model=model, - temperature=temperature, - stop=stop_sequences or [], - provider="bedrock", - **kwargs, - ) - - # Initialize Bedrock client with proper configuration session = Session( - aws_access_key_id=aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"), - aws_secret_access_key=aws_secret_access_key + aws_access_key_id=self.aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=self.aws_secret_access_key or os.getenv("AWS_SECRET_ACCESS_KEY"), - aws_session_token=aws_session_token or os.getenv("AWS_SESSION_TOKEN"), - region_name=region_name, + aws_session_token=self.aws_session_token or os.getenv("AWS_SESSION_TOKEN"), + region_name=self.region_name, ) - # Configure client with timeouts and retries following AWS best practices config = Config( read_timeout=300, retries={ @@ -223,53 +231,33 @@ class BedrockCompletion(BaseLLM): ) self.client = session.client("bedrock-runtime", config=config) - self.region_name = region_name - # Store completion parameters - self.max_tokens = max_tokens - self.top_p = top_p - self.top_k = top_k - self.stream = stream - self.stop_sequences = stop_sequences or [] + self._is_claude_model = "claude" in self.model.lower() + self._supports_tools = True + self._supports_streaming = True + self._model_id = self.model - # Store advanced features (optional) - self.guardrail_config = guardrail_config - self.additional_model_request_fields = additional_model_request_fields - self.additional_model_response_field_paths = ( - additional_model_response_field_paths - ) + return self - # Model-specific settings - self.is_claude_model = "claude" in model.lower() - self.supports_tools = True # Converse API supports tools for most models - self.supports_streaming = True + @property + def is_claude_model(self) -> bool: + """Check if model is a Claude model.""" + return self._is_claude_model - # Handle inference profiles for newer models - self.model_id = model + @property + def supports_tools(self) -> bool: + """Check if model supports tools.""" + return self._supports_tools - # @property - # def stop(self) -> list[str]: # type: ignore[misc] - # """Get stop sequences sent to the API.""" - # return list(self.stop_sequences) + @property + def supports_streaming(self) -> bool: + """Check if model supports streaming.""" + return self._supports_streaming - # @stop.setter - # def stop(self, value: Sequence[str] | str | None) -> None: - # """Set stop sequences. - # - # Synchronizes stop_sequences to ensure values set by CrewAgentExecutor - # are properly sent to the Bedrock API. - # - # Args: - # value: Stop sequences as a Sequence, single string, or None - # """ - # if value is None: - # self.stop_sequences = [] - # elif isinstance(value, str): - # self.stop_sequences = [value] - # elif isinstance(value, Sequence): - # self.stop_sequences = list(value) - # else: - # self.stop_sequences = [] + @property + def model_id(self) -> str: + """Get the model ID.""" + return self._model_id def call( self, @@ -559,7 +547,7 @@ class BedrockCompletion(BaseLLM): "Sequence[MessageTypeDef | MessageOutputTypeDef]", cast(object, messages), ), - **body, # type: ignore[arg-type] + **body, ) stream = response.get("stream") @@ -821,8 +809,8 @@ class BedrockCompletion(BaseLLM): config["temperature"] = float(self.temperature) if self.top_p is not None: config["topP"] = float(self.top_p) - if self.stop_sequences: - config["stopSequences"] = self.stop_sequences + if self.stop: + config["stopSequences"] = self.stop if self.is_claude_model and self.top_k is not None: # top_k is supported by Claude models diff --git a/lib/crewai/src/crewai/llm/providers/gemini/completion.py b/lib/crewai/src/crewai/llm/providers/gemini/completion.py index 263309910..34bff2508 100644 --- a/lib/crewai/src/crewai/llm/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llm/providers/gemini/completion.py @@ -2,12 +2,12 @@ import logging import os from typing import Any, ClassVar, cast -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator +from typing_extensions import Self from crewai.events.types.llm_events import LLMCallType from crewai.llm.base_llm import BaseLLM from crewai.llm.core import CONTEXT_WINDOW_USAGE_RATIO, LLM_CONTEXT_WINDOW_SIZES -from crewai.llm.hooks.base import BaseInterceptor from crewai.llm.providers.utils.common import safe_tool_conversion from crewai.utilities.agent_utils import is_context_length_exceeded from crewai.utilities.exceptions.context_window_exceeding_exception import ( @@ -31,108 +31,124 @@ class GeminiCompletion(BaseLLM): This class provides direct integration with the Google Gen AI Python SDK, offering native function calling, streaming support, and proper Gemini formatting. + + Attributes: + model: Gemini model name (e.g., 'gemini-2.0-flash-001', 'gemini-1.5-pro') + project: Google Cloud project ID (for Vertex AI) + location: Google Cloud location (for Vertex AI, defaults to 'us-central1') + top_p: Nucleus sampling parameter + top_k: Top-k sampling parameter + max_output_tokens: Maximum tokens in response + stop_sequences: Stop sequences + stream: Enable streaming responses + safety_settings: Safety filter settings + client_params: Additional parameters for Google Gen AI Client constructor + interceptor: HTTP interceptor (not yet supported for Gemini) """ - model_config: ClassVar[ConfigDict] = ConfigDict(ignored_types=(property,)) + model_config: ClassVar[ConfigDict] = ConfigDict( + ignored_types=(property,), arbitrary_types_allowed=True + ) - def __init__( - self, - model: str = "gemini-2.0-flash-001", - api_key: str | None = None, - project: str | None = None, - location: str | None = None, - temperature: float | None = None, - top_p: float | None = None, - top_k: int | None = None, - max_output_tokens: int | None = None, - stop_sequences: list[str] | None = None, - stream: bool = False, - safety_settings: dict[str, Any] | None = None, - client_params: dict[str, Any] | None = None, - interceptor: BaseInterceptor[Any, Any] | None = None, - **kwargs: Any, - ): - """Initialize Google Gemini chat completion client. + project: str | None = Field( + default=None, description="Google Cloud project ID (for Vertex AI)" + ) + location: str = Field( + default="us-central1", + description="Google Cloud location (for Vertex AI, defaults to 'us-central1')", + ) + top_p: float | None = Field(default=None, description="Nucleus sampling parameter") + top_k: int | None = Field(default=None, description="Top-k sampling parameter") + max_output_tokens: int | None = Field( + default=None, description="Maximum tokens in response" + ) + stream: bool = Field(default=False, description="Enable streaming responses") + safety_settings: dict[str, Any] = Field( + default_factory=dict, description="Safety filter settings" + ) + client_params: dict[str, Any] = Field( + default_factory=dict, + description="Additional parameters for Google Gen AI Client constructor", + ) + interceptor: Any = Field( + default=None, description="HTTP interceptor (not yet supported for Gemini)" + ) + client: Any = Field( + default=None, exclude=True, description="Gemini client instance" + ) + + _is_gemini_2: bool = PrivateAttr(default=False) + _is_gemini_1_5: bool = PrivateAttr(default=False) + _supports_tools: bool = PrivateAttr(default=False) + + @property + def stop_sequences(self) -> list[str]: + """Get stop sequences as a list. + + This property provides access to stop sequences in Gemini's native format + while maintaining synchronization with the base class's stop attribute. + """ + if self.stop is None: + return [] + if isinstance(self.stop, str): + return [self.stop] + return self.stop + + @stop_sequences.setter + def stop_sequences(self, value: list[str] | str | None) -> None: + """Set stop sequences, synchronizing with the stop attribute. Args: - model: Gemini model name (e.g., 'gemini-2.0-flash-001', 'gemini-1.5-pro') - api_key: Google API key (defaults to GOOGLE_API_KEY or GEMINI_API_KEY env var) - project: Google Cloud project ID (for Vertex AI) - location: Google Cloud location (for Vertex AI, defaults to 'us-central1') - temperature: Sampling temperature (0-2) - top_p: Nucleus sampling parameter - top_k: Top-k sampling parameter - max_output_tokens: Maximum tokens in response - stop_sequences: Stop sequences - stream: Enable streaming responses - safety_settings: Safety filter settings - client_params: Additional parameters to pass to the Google Gen AI Client constructor. - Supports parameters like http_options, credentials, debug_config, etc. - interceptor: HTTP interceptor (not yet supported for Gemini). - **kwargs: Additional parameters + value: Stop sequences as a list, string, or None """ - if interceptor is not None: + self.stop = value + + @model_validator(mode="after") + def setup_client(self) -> Self: + """Initialize the Gemini client and validate configuration.""" + if self.interceptor is not None: raise NotImplementedError( "HTTP interceptors are not yet supported for Google Gemini provider. " "Interceptors are currently supported for OpenAI and Anthropic providers only." ) - super().__init__( - model=model, temperature=temperature, stop=stop_sequences or [], **kwargs - ) + if self.api_key is None: + self.api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY") - # Store client params for later use - self.client_params = client_params or {} + if self.project is None: + self.project = os.getenv("GOOGLE_CLOUD_PROJECT") - # Get API configuration with environment variable fallbacks - self.api_key = ( - api_key or os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY") - ) - self.project = project or os.getenv("GOOGLE_CLOUD_PROJECT") - self.location = location or os.getenv("GOOGLE_CLOUD_LOCATION") or "us-central1" + if self.location == "us-central1": + env_location = os.getenv("GOOGLE_CLOUD_LOCATION") + if env_location: + self.location = env_location use_vertexai = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "").lower() == "true" self.client = self._initialize_client(use_vertexai) - # Store completion parameters - self.top_p = top_p - self.top_k = top_k - self.max_output_tokens = max_output_tokens - self.stream = stream - self.safety_settings = safety_settings or {} - self.stop_sequences = stop_sequences or [] + self._is_gemini_2 = "gemini-2" in self.model.lower() + self._is_gemini_1_5 = "gemini-1.5" in self.model.lower() + self._supports_tools = self._is_gemini_1_5 or self._is_gemini_2 - # Model-specific settings - self.is_gemini_2 = "gemini-2" in model.lower() - self.is_gemini_1_5 = "gemini-1.5" in model.lower() - self.supports_tools = self.is_gemini_1_5 or self.is_gemini_2 + return self - # @property - # def stop(self) -> list[str]: # type: ignore[misc] - # """Get stop sequences sent to the API.""" - # return self.stop_sequences + @property + def is_gemini_2(self) -> bool: + """Check if model is Gemini 2.""" + return self._is_gemini_2 - # @stop.setter - # def stop(self, value: list[str] | str | None) -> None: - # """Set stop sequences. - # - # Synchronizes stop_sequences to ensure values set by CrewAgentExecutor - # are properly sent to the Gemini API. - # - # Args: - # value: Stop sequences as a list, single string, or None - # """ - # if value is None: - # self.stop_sequences = [] - # elif isinstance(value, str): - # self.stop_sequences = [value] - # elif isinstance(value, list): - # self.stop_sequences = value - # else: - # self.stop_sequences = [] + @property + def is_gemini_1_5(self) -> bool: + """Check if model is Gemini 1.5.""" + return self._is_gemini_1_5 - def _initialize_client(self, use_vertexai: bool = False) -> genai.Client: # type: ignore[no-any-unimported] + @property + def supports_tools(self) -> bool: + """Check if model supports tools.""" + return self._supports_tools + + def _initialize_client(self, use_vertexai: bool = False) -> Any: """Initialize the Google Gen AI client with proper parameter handling. Args: @@ -154,12 +170,9 @@ class GeminiCompletion(BaseLLM): "location": self.location, } ) - client_params.pop("api_key", None) - elif self.api_key: client_params["api_key"] = self.api_key - client_params.pop("vertexai", None) client_params.pop("project", None) client_params.pop("location", None) @@ -188,7 +201,6 @@ class GeminiCompletion(BaseLLM): and hasattr(self.client, "vertexai") and self.client.vertexai ): - # Vertex AI configuration params.update( { "vertexai": True, @@ -300,15 +312,12 @@ class GeminiCompletion(BaseLLM): self.tools = tools config_params = {} - # Add system instruction if present if system_instruction: - # Convert system instruction to Content format system_content = types.Content( role="user", parts=[types.Part.from_text(text=system_instruction)] ) config_params["system_instruction"] = system_content - # Add generation config parameters if self.temperature is not None: config_params["temperature"] = self.temperature if self.top_p is not None: @@ -317,14 +326,13 @@ class GeminiCompletion(BaseLLM): config_params["top_k"] = self.top_k if self.max_output_tokens is not None: config_params["max_output_tokens"] = self.max_output_tokens - if self.stop_sequences: - config_params["stop_sequences"] = self.stop_sequences + if self.stop: + config_params["stop_sequences"] = self.stop if response_model: config_params["response_mime_type"] = "application/json" config_params["response_schema"] = response_model.model_json_schema() - # Handle tools for supported models if tools and self.supports_tools: config_params["tools"] = self._convert_tools_for_interference(tools) @@ -347,7 +355,6 @@ class GeminiCompletion(BaseLLM): description=description, ) - # Add parameters if present - ensure parameters is a dict if parameters and isinstance(parameters, dict): function_declaration.parameters = parameters @@ -383,16 +390,12 @@ class GeminiCompletion(BaseLLM): content = message.get("content", "") if role == "system": - # Extract system instruction - Gemini handles it separately if system_instruction: system_instruction += f"\n\n{content}" else: system_instruction = cast(str, content) else: - # Convert role for Gemini (assistant -> model) gemini_role = "model" if role == "assistant" else "user" - - # Create Content object gemini_content = types.Content( role=gemini_role, parts=[types.Part.from_text(text=content)] ) @@ -509,13 +512,11 @@ class GeminiCompletion(BaseLLM): else {}, } - # Handle completed function calls if function_calls and available_functions: for call_data in function_calls.values(): function_name = call_data["name"] function_args = call_data["args"] - # Execute tool result = self._handle_tool_execution( function_name=function_name, function_args=function_args, @@ -575,13 +576,11 @@ class GeminiCompletion(BaseLLM): "gemma-3-27b": 128000, } - # Find the best match for the model name for model_prefix, size in context_windows.items(): if self.model.startswith(model_prefix): return int(size * CONTEXT_WINDOW_USAGE_RATIO) - # Default context window size for Gemini models - return int(1048576 * CONTEXT_WINDOW_USAGE_RATIO) # 1M tokens + return int(1048576 * CONTEXT_WINDOW_USAGE_RATIO) def _extract_token_usage(self, response: dict[str, Any]) -> dict[str, Any]: """Extract token usage from Gemini response.""" diff --git a/lib/crewai/src/crewai/llm/providers/openai/completion.py b/lib/crewai/src/crewai/llm/providers/openai/completion.py index f9f65c8b1..b9fcc99c7 100644 --- a/lib/crewai/src/crewai/llm/providers/openai/completion.py +++ b/lib/crewai/src/crewai/llm/providers/openai/completion.py @@ -11,7 +11,8 @@ from openai import APIConnectionError, NotFoundError, OpenAI from openai.types.chat import ChatCompletion, ChatCompletionChunk from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import ChoiceDelta -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator +from typing_extensions import Self from crewai.events.types.llm_events import LLMCallType from crewai.llm.base_llm import BaseLLM @@ -73,26 +74,18 @@ class OpenAICompletion(BaseLLM): ) reasoning_effort: str | None = Field(None, description="Reasoning effort level") - # Internal state client: OpenAI = Field( default_factory=OpenAI, exclude=True, description="OpenAI client instance" ) is_o1_model: bool = Field(False, description="Whether this is an O1 model") is_gpt4_model: bool = Field(False, description="Whether this is a GPT-4 model") - def model_post_init(self, __context: Any) -> None: - """Initialize OpenAI client after model initialization. - - Args: - __context: Pydantic context - """ - super().model_post_init(__context) - - # Set API key from environment if not provided + @model_validator(mode="after") + def setup_client(self) -> Self: + """Initialize OpenAI client after model validation.""" if self.api_key is None: self.api_key = os.getenv("OPENAI_API_KEY") - # Initialize client client_config = self._get_client_params() if self.interceptor: transport = HTTPTransport(interceptor=self.interceptor) @@ -101,10 +94,11 @@ class OpenAICompletion(BaseLLM): self.client = OpenAI(**client_config) - # Set model flags self.is_o1_model = "o1" in self.model.lower() self.is_gpt4_model = "gpt-4" in self.model.lower() + return self + def _get_client_params(self) -> dict[str, Any]: """Get OpenAI client parameters.""" diff --git a/lib/crewai/src/crewai/tasks/hallucination_guardrail.py b/lib/crewai/src/crewai/tasks/hallucination_guardrail.py index dd000a83c..07b465366 100644 --- a/lib/crewai/src/crewai/tasks/hallucination_guardrail.py +++ b/lib/crewai/src/crewai/tasks/hallucination_guardrail.py @@ -8,7 +8,7 @@ Classes: from typing import Any -from crewai.llm import LLM +from crewai.llm.core import LLM from crewai.tasks.task_output import TaskOutput from crewai.utilities.logger import Logger diff --git a/lib/crewai/src/crewai/tools/tool_usage.py b/lib/crewai/src/crewai/tools/tool_usage.py index 6f0e92cb8..69791291a 100644 --- a/lib/crewai/src/crewai/tools/tool_usage.py +++ b/lib/crewai/src/crewai/tools/tool_usage.py @@ -36,7 +36,7 @@ if TYPE_CHECKING: from crewai.agents.agent_builder.base_agent import BaseAgent from crewai.agents.tools_handler import ToolsHandler from crewai.lite_agent import LiteAgent - from crewai.llm import LLM + from crewai.llm.core import LLM from crewai.task import Task diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index 3d8a1282e..16175bf0f 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -11,7 +11,7 @@ from crewai.events.event_types import ( ToolUsageFinishedEvent, ToolUsageStartedEvent, ) -from crewai.llm import CONTEXT_WINDOW_USAGE_RATIO, LLM +from crewai.llm.core import CONTEXT_WINDOW_USAGE_RATIO, LLM from crewai.utilities.token_counter_callback import TokenCalcHandler from pydantic import BaseModel import pytest @@ -229,7 +229,7 @@ def test_validate_call_params_supported(): a: int # Patch supports_response_schema to simulate a supported model. - with patch("crewai.llm.supports_response_schema", return_value=True): + with patch("crewai.llm.core.supports_response_schema", return_value=True): llm = LLM( model="openrouter/deepseek/deepseek-chat", response_format=DummyResponse ) @@ -242,7 +242,7 @@ def test_validate_call_params_not_supported(): a: int # Patch supports_response_schema to simulate an unsupported model. - with patch("crewai.llm.supports_response_schema", return_value=False): + with patch("crewai.llm.core.supports_response_schema", return_value=False): llm = LLM(model="gemini/gemini-1.5-pro", response_format=DummyResponse, is_litellm=True) with pytest.raises(ValueError) as excinfo: llm._validate_call_params() @@ -342,7 +342,7 @@ def test_context_window_validation(): # Test invalid window size with pytest.raises(ValueError) as excinfo: with patch.dict( - "crewai.llm.LLM_CONTEXT_WINDOW_SIZES", + "crewai.llm.core.LLM_CONTEXT_WINDOW_SIZES", {"test-model": 500}, # Below minimum clear=True, ): @@ -702,8 +702,8 @@ def test_ollama_does_not_modify_when_last_is_user(ollama_llm): def test_native_provider_raises_error_when_supported_but_fails(): """Test that when a native provider is in SUPPORTED_NATIVE_PROVIDERS but fails to instantiate, we raise the error.""" - with patch("crewai.llm.SUPPORTED_NATIVE_PROVIDERS", ["openai"]): - with patch("crewai.llm.LLM._get_native_provider") as mock_get_native: + with patch("crewai.llm.internal.meta.SUPPORTED_NATIVE_PROVIDERS", ["openai"]): + with patch("crewai.llm.internal.meta.LLMMeta._get_native_provider") as mock_get_native: # Mock that provider exists but throws an error when instantiated mock_provider = MagicMock() mock_provider.side_effect = ValueError("Native provider initialization failed") @@ -718,7 +718,7 @@ def test_native_provider_raises_error_when_supported_but_fails(): def test_native_provider_falls_back_to_litellm_when_not_in_supported_list(): """Test that when a provider is not in SUPPORTED_NATIVE_PROVIDERS, we fall back to LiteLLM.""" - with patch("crewai.llm.SUPPORTED_NATIVE_PROVIDERS", ["openai", "anthropic"]): + with patch("crewai.llm.internal.meta.SUPPORTED_NATIVE_PROVIDERS", ["openai", "anthropic"]): # Using a provider not in the supported list llm = LLM(model="groq/llama-3.1-70b-versatile", is_litellm=False) diff --git a/logs.txt b/logs.txt deleted file mode 100644 index 6aaa37c3a..000000000 --- a/logs.txt +++ /dev/null @@ -1,20 +0,0 @@ -lib/crewai/src/crewai/agent/core.py:901: error: Argument 1 has incompatible type "ToolFilterContext"; expected "dict[str, Any]" [arg-type] -lib/crewai/src/crewai/agent/core.py:901: note: Error code "arg-type" not covered by "type: ignore" comment -lib/crewai/src/crewai/agent/core.py:905: error: Argument 1 has incompatible type "dict[str, Any]"; expected "ToolFilterContext" [arg-type] -lib/crewai/src/crewai/agent/core.py:905: note: Error code "arg-type" not covered by "type: ignore" comment -lib/crewai/src/crewai/agent/core.py:996: error: Returning Any from function declared to return "dict[str, dict[str, Any]]" [no-any-return] -lib/crewai/src/crewai/agent/core.py:1157: error: Incompatible types in assignment (expression has type "tuple[UnionType, None]", target has type "tuple[type, Any]") [assignment] -lib/crewai/src/crewai/agent/core.py:1183: error: Argument 1 to "append" of "list" has incompatible type "type"; expected "type[str]" [arg-type] -lib/crewai/src/crewai/agent/core.py:1188: error: Incompatible types in assignment (expression has type "UnionType", variable has type "type[str]") [assignment] -lib/crewai/src/crewai/agent/core.py:1201: error: Argument 1 to "get" of "dict" has incompatible type "Any | None"; expected "str" [arg-type] -Found 7 errors in 1 file (checked 4 source files) -Success: no issues found in 4 source files -lib/crewai/src/crewai/llm/providers/gemini/completion.py:111: error: BaseModel field may only be overridden by another field [misc] -Found 1 error in 1 file (checked 4 source files) -Success: no issues found in 4 source files -lib/crewai/src/crewai/llm/providers/anthropic/completion.py:101: error: BaseModel field may only be overridden by another field [misc] -Found 1 error in 1 file (checked 4 source files) -lib/crewai/src/crewai/llm/providers/bedrock/completion.py:250: error: BaseModel field may only be overridden by another field [misc] -Found 1 error in 1 file (checked 4 source files) - -uv-lock..............................................(no files to check)Skipped