diff --git a/.gitignore b/.gitignore index d7e89fcaa..977aa9536 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ chromadb-*.lock blogs/* secrets/* UNKNOWN.egg-info/ +demos/* +.crewai/* diff --git a/docs/en/concepts/memory.mdx b/docs/en/concepts/memory.mdx index 954d5efe6..349546d1c 100644 --- a/docs/en/concepts/memory.mdx +++ b/docs/en/concepts/memory.mdx @@ -101,7 +101,7 @@ crew = Crew( ) ``` -When `memory=True`, the crew creates a default `Memory()` and passes the crew's `embedder` configuration through automatically. All agents in the crew share the crew's memory unless an agent has its own. +When `memory=True`, the crew creates a default `Memory()` and passes the crew's `embedder` configuration through automatically. All agents in the crew share the crew's memory unless an agent has its own. Without a custom `embedder`, memory uses OpenAI `text-embedding-3-large` embeddings. After each task, the crew automatically extracts discrete facts from the task output and stores them. Before each task, the agent recalls relevant context from memory and injects it into the task prompt. @@ -515,7 +515,11 @@ memory = Memory( ## Embedder Configuration -Memory needs an embedding model to convert text into vectors for semantic search. You can configure this in three ways. +Memory needs an embedding model to convert text into vectors for semantic search. By default, `Memory()` uses OpenAI `text-embedding-3-large` embeddings, which produce 3072-dimensional vectors. Set `OPENAI_API_KEY` for the default path, or configure a custom embedder in one of three ways. + + +Existing local memory stores created with 1536-dimensional embeddings, such as `text-embedding-3-small` or `text-embedding-ada-002`, may not be compatible with the `text-embedding-3-large` default. This applies to both the OpenAI and Azure OpenAI providers — Azure's default embedding model also changed from `text-embedding-ada-002` to `text-embedding-3-large`. If local testing fails with an embedding dimension mismatch, reset memory with `crewai reset-memories -m`, delete the local memory storage directory, or explicitly configure the older embedder model until you migrate. + ### Passing to Memory Directly @@ -523,7 +527,7 @@ Memory needs an embedding model to convert text into vectors for semantic search from crewai import Memory # As a config dict -memory = Memory(embedder={"provider": "openai", "config": {"model_name": "text-embedding-3-small"}}) +memory = Memory(embedder={"provider": "openai", "config": {"model_name": "text-embedding-3-large"}}) # As a pre-built callable from crewai.rag.embeddings.factory import build_embedder @@ -542,7 +546,7 @@ crew = Crew( agents=[...], tasks=[...], memory=True, - embedder={"provider": "openai", "config": {"model_name": "text-embedding-3-small"}}, + embedder={"provider": "openai", "config": {"model_name": "text-embedding-3-large"}}, ) ``` @@ -554,7 +558,7 @@ crew = Crew( memory = Memory(embedder={ "provider": "openai", "config": { - "model_name": "text-embedding-3-small", + "model_name": "text-embedding-3-large", # "api_key": "sk-...", # or set OPENAI_API_KEY env var }, }) @@ -701,9 +705,9 @@ memory = Memory(embedder=my_embedder) | Provider | Key | Typical Model | Notes | | :--- | :--- | :--- | :--- | -| OpenAI | `openai` | `text-embedding-3-small` | Default. Set `OPENAI_API_KEY`. | +| OpenAI | `openai` | `text-embedding-3-large` | Default. Set `OPENAI_API_KEY`. | | Ollama | `ollama` | `mxbai-embed-large` | Local, no API key needed. | -| Azure OpenAI | `azure` | `text-embedding-ada-002` | Requires `deployment_id`. | +| Azure OpenAI | `azure` | `text-embedding-3-large` | Default model. Requires `deployment_id`. | | Google AI | `google-generativeai` | `gemini-embedding-001` | Set `GOOGLE_API_KEY`. | | Google Vertex | `google-vertex` | `gemini-embedding-001` | Requires `project_id`. | | Cohere | `cohere` | `embed-english-v3.0` | Strong multilingual support. | @@ -836,6 +840,9 @@ class MemoryMonitor(BaseEventListener): **Background save errors in logs?** - Memory saves run in a background thread. Errors are emitted as `MemorySaveFailedEvent` but don't crash the agent. Check logs for the root cause (usually LLM or embedder connection issues). +**Embedding dimension mismatch?** +- Existing local memory stores may have been created with a different embedding model. The default OpenAI memory embedder is now `text-embedding-3-large` (3072 dimensions), while older stores commonly used 1536-dimensional embeddings. For local testing, run `crewai reset-memories -m`, delete the local memory storage directory, or configure the previous embedder model explicitly. + **Concurrent write conflicts?** - LanceDB operations are serialized with a shared lock and retried automatically on conflict. This handles multiple `Memory` instances pointing at the same database (e.g. agent memory + crew memory). No action needed. @@ -862,7 +869,7 @@ All configuration is passed as keyword arguments to `Memory(...)`. Every paramet | :--- | :--- | :--- | | `llm` | `"gpt-4o-mini"` | LLM for analysis (model name or `BaseLLM` instance). | | `storage` | `"lancedb"` | Storage backend (`"lancedb"`, a path string, or a `StorageBackend` instance). | -| `embedder` | `None` (OpenAI default) | Embedder (config dict, callable, or `None` for default OpenAI). | +| `embedder` | `None` (OpenAI `text-embedding-3-large`) | Embedder (config dict, callable, or `None` for default OpenAI). | | `recency_weight` | `0.3` | Weight for recency in composite score. | | `semantic_weight` | `0.5` | Weight for semantic similarity in composite score. | | `importance_weight` | `0.2` | Weight for importance in composite score. | diff --git a/docs/en/guides/migration/upgrading-crewai.mdx b/docs/en/guides/migration/upgrading-crewai.mdx index f158bc2c1..48895a254 100644 --- a/docs/en/guides/migration/upgrading-crewai.mdx +++ b/docs/en/guides/migration/upgrading-crewai.mdx @@ -141,7 +141,7 @@ crew = Crew( process=Process.sequential, # or Process.hierarchical memory=True, cache=True, - embedder={"provider": "openai", "config": {"model": "text-embedding-3-small"}}, + embedder={"provider": "openai", "config": {"model": "text-embedding-3-large"}}, ) ``` @@ -173,7 +173,7 @@ write = Task( ### Memory & embedder config {#memory-embedder-config} -If `memory=True` and you're not using the default OpenAI embeddings, you must pass an `embedder`: +If `memory=True` and you're not using the default OpenAI `text-embedding-3-large` embeddings, you must pass an `embedder`: ```python crew = Crew( @@ -187,4 +187,4 @@ crew = Crew( ) ``` -Set the relevant provider credentials (`OPENAI_API_KEY`, `OLLAMA_HOST`, etc.) in your `.env` file. Memory storage paths are project-local by default — delete the project's memory directory if you change embedders, since dimensions don't mix. +Set the relevant provider credentials (`OPENAI_API_KEY`, `OLLAMA_HOST`, etc.) in your `.env` file. Memory storage paths are project-local by default. Existing local memory stores created with 1536-dimensional embeddings may not be compatible with the default OpenAI `text-embedding-3-large` embedder, which uses 3072 dimensions. If you hit a dimension mismatch, delete the project's memory directory, run `crewai reset-memories -m`, or explicitly configure the older embedder model until you migrate. diff --git a/lib/cli/src/crewai_cli/cli.py b/lib/cli/src/crewai_cli/cli.py index f2ebd9a84..4257771dd 100644 --- a/lib/cli/src/crewai_cli/cli.py +++ b/lib/cli/src/crewai_cli/cli.py @@ -3,42 +3,94 @@ from __future__ import annotations from importlib.metadata import version as get_version import os import subprocess -from typing import Any +from typing import TYPE_CHECKING, Any import click from crewai_core.token_manager import TokenManager -from crewai_cli.add_crew_to_flow import add_crew_to_flow -from crewai_cli.authentication.main import AuthenticationCommand from crewai_cli.config import Settings -from crewai_cli.create_crew import create_crew -from crewai_cli.create_flow import create_flow -from crewai_cli.crew_chat import run_chat -from crewai_cli.deploy.main import DeployCommand -from crewai_cli.enterprise.main import EnterpriseConfigureCommand -from crewai_cli.evaluate_crew import evaluate_crew -from crewai_cli.experimental.skills.main import SkillCommand -from crewai_cli.install_crew import install_crew -from crewai_cli.kickoff_flow import kickoff_flow -from crewai_cli.organization.main import OrganizationCommand -from crewai_cli.plot_flow import plot_flow -from crewai_cli.remote_template.main import TemplateCommand -from crewai_cli.replay_from_task import replay_task_command -from crewai_cli.reset_memories_command import reset_memories_command -from crewai_cli.run_crew import run_crew -from crewai_cli.run_flow_definition import run_flow_definition -from crewai_cli.settings.main import SettingsCommand -from crewai_cli.task_outputs import load_task_outputs -from crewai_cli.tools.main import ToolCommand -from crewai_cli.train_crew import train_crew -from crewai_cli.triggers.main import TriggersCommand -from crewai_cli.update_crew import update_crew from crewai_cli.user_data import ( _load_user_data, is_tracing_enabled, update_user_data, ) -from crewai_cli.utils import build_env_with_all_tool_credentials, read_toml +from crewai_cli.utils import ( + build_env_with_all_tool_credentials, + enable_prompt_line_editing, + read_toml, +) + + +def train_crew(*args: Any, **kwargs: Any) -> Any: + from crewai_cli.train_crew import train_crew as _train_crew + + return _train_crew(*args, **kwargs) + + +def evaluate_crew(*args: Any, **kwargs: Any) -> Any: + from crewai_cli.evaluate_crew import evaluate_crew as _evaluate_crew + + return _evaluate_crew(*args, **kwargs) + + +def replay_task_command(*args: Any, **kwargs: Any) -> Any: + from crewai_cli.replay_from_task import replay_task_command as _replay_task_command + + return _replay_task_command(*args, **kwargs) + + +def run_flow_definition(*args: Any, **kwargs: Any) -> Any: + from crewai_cli.run_flow_definition import ( + run_flow_definition as _run_flow_definition, + ) + + return _run_flow_definition(*args, **kwargs) + + +def run_crew(*args: Any, **kwargs: Any) -> Any: + from crewai_cli.run_crew import run_crew as _run_crew + + return _run_crew(*args, **kwargs) + + +if TYPE_CHECKING: + # mypy sees the real classes; at runtime the shims below defer the + # heavy imports until a command actually instantiates them. + from crewai_cli.authentication.main import AuthenticationCommand + from crewai_cli.deploy.main import DeployCommand + from crewai_cli.organization.main import OrganizationCommand + from crewai_cli.remote_template.main import TemplateCommand +else: + + class AuthenticationCommand: + def __new__(cls, *args: Any, **kwargs: Any) -> Any: + from crewai_cli.authentication.main import ( + AuthenticationCommand as _AuthenticationCommand, + ) + + return _AuthenticationCommand(*args, **kwargs) + + class DeployCommand: + def __new__(cls, *args: Any, **kwargs: Any) -> Any: + from crewai_cli.deploy.main import DeployCommand as _DeployCommand + + return _DeployCommand(*args, **kwargs) + + class TemplateCommand: + def __new__(cls, *args: Any, **kwargs: Any) -> Any: + from crewai_cli.remote_template.main import ( + TemplateCommand as _TemplateCommand, + ) + + return _TemplateCommand(*args, **kwargs) + + class OrganizationCommand: + def __new__(cls, *args: Any, **kwargs: Any) -> Any: + from crewai_cli.organization.main import ( + OrganizationCommand as _OrganizationCommand, + ) + + return _OrganizationCommand(*args, **kwargs) def _get_cli_version() -> str: @@ -91,17 +143,57 @@ def uv(uv_args: tuple[str, ...]) -> None: @crewai.command() -@click.argument("type", type=click.Choice(["crew", "flow"])) -@click.argument("name") +@click.argument( + "type", required=False, default=None, type=click.Choice(["crew", "flow"]) +) +@click.argument("name", required=False, default=None) @click.option("--provider", type=str, help="The provider to use for the crew") @click.option("--skip_provider", is_flag=True, help="Skip provider validation") +@click.option( + "--classic", + is_flag=True, + help="Use classic Python/YAML project structure instead of JSON", +) def create( - type: str, name: str, provider: str | None, skip_provider: bool = False + type: str | None, + name: str | None, + provider: str | None, + skip_provider: bool = False, + classic: bool = False, ) -> None: """Create a new crew, or flow.""" + if not type: + from crewai_cli.tui_picker import pick + + options = [ + ("crew", "A team of AI agents working together"), + ( + "flow", + "A deterministic workflow with full control over agents and crews", + ), + ] + type = pick("What would you like to create?", options) + if type is None: + raise SystemExit(0) + click.echo() + if not name: + enable_prompt_line_editing() + name = click.prompt( + click.style(f" Name of your {type}", fg="cyan", bold=True), + prompt_suffix=click.style(" › ", fg="bright_white"), # noqa: RUF001 + ) if type == "crew": - create_crew(name, provider, skip_provider) + if classic: + from crewai_cli.create_crew import create_crew + + create_crew(name, provider, skip_provider) + else: + from crewai_cli.create_json_crew import create_json_crew + + create_json_crew(name, provider, skip_provider) elif type == "flow": + from crewai_cli.create_flow import create_flow + create_flow(name) else: click.secho("Error: Invalid type. Must be 'crew' or 'flow'.", fg="red") @@ -186,6 +278,8 @@ def replay(task_id: str, trained_agents_file: str | None) -> None: def log_tasks_outputs() -> None: """Retrieve your latest crew.kickoff() task outputs.""" try: + from crewai_cli.task_outputs import load_task_outputs + tasks = load_task_outputs() if not tasks: @@ -274,6 +368,8 @@ def reset_memories( "Please specify at least one memory type to reset using the appropriate flags." ) return + from crewai_cli.reset_memories_command import reset_memories_command + reset_memories_command(memory, knowledge, agent_knowledge, kickoff_outputs, all) except Exception as e: click.echo(f"An error occurred while resetting memories: {e}", err=True) @@ -296,7 +392,7 @@ def reset_memories( "--embedder-model", type=str, default=None, - help="Embedder model name (e.g. text-embedding-3-small, gemini-embedding-001).", + help="Embedder model name (e.g. text-embedding-3-large, gemini-embedding-001).", ) @click.option( "--embedder-config", @@ -351,7 +447,7 @@ def memory( "-m", "--model", type=str, - default="gpt-4o-mini", + default="gpt-5.4-mini", help="LLM Model to run the tests on the Crew. For now only accepting only OpenAI models.", ) @click.option( @@ -382,6 +478,8 @@ def test(n_iterations: int, model: str, trained_agents_file: str | None) -> None @click.pass_context def install(context: click.Context) -> None: """Install the Crew.""" + from crewai_cli.install_crew import install_crew + install_crew(context.args) @@ -415,7 +513,9 @@ def install(context: click.Context) -> None: help='Experimental: JSON object passed to flow.kickoff(), e.g. \'{"topic":"AI"}\'.', ) def run( - trained_agents_file: str | None, definition: str | None, inputs: str | None + trained_agents_file: str | None, + definition: str | None, + inputs: str | None, ) -> None: """Run the Crew or Flow.""" if inputs is not None and definition is None: @@ -435,6 +535,8 @@ def run( @crewai.command() def update() -> None: """Update the pyproject.toml of the Crew project to use uv.""" + from crewai_cli.update_crew import update_crew + update_crew() @@ -544,6 +646,8 @@ def tool() -> None: @tool.command(name="create") @click.argument("handle") def tool_create(handle: str) -> None: + from crewai_cli.tools.main import ToolCommand + tool_cmd = ToolCommand() tool_cmd.create(handle) @@ -551,6 +655,8 @@ def tool_create(handle: str) -> None: @tool.command(name="install") @click.argument("handle") def tool_install(handle: str) -> None: + from crewai_cli.tools.main import ToolCommand + tool_cmd = ToolCommand() tool_cmd.login() tool_cmd.install(handle) @@ -567,6 +673,8 @@ def tool_install(handle: str) -> None: @click.option("--public", "is_public", flag_value=True, default=False) @click.option("--private", "is_public", flag_value=False) def tool_publish(is_public: bool, force: bool) -> None: + from crewai_cli.tools.main import ToolCommand + tool_cmd = ToolCommand() tool_cmd.login() tool_cmd.publish(is_public, force) @@ -599,6 +707,8 @@ def skill() -> None: help="Create skill in current dir instead of ./skills/", ) def skill_create(name: str, in_project: bool) -> None: + from crewai_cli.experimental.skills.main import SkillCommand + skill_cmd = SkillCommand() skill_cmd.create(name, in_project=in_project) @@ -606,6 +716,8 @@ def skill_create(name: str, in_project: bool) -> None: @skill.command(name="install") @click.argument("ref") def skill_install(ref: str) -> None: + from crewai_cli.experimental.skills.main import SkillCommand + skill_cmd = SkillCommand() skill_cmd.install(ref) @@ -622,6 +734,8 @@ def skill_install(ref: str) -> None: @click.option("--private", "is_public", flag_value=False) @click.option("--org", default=None, help="Organisation slug (overrides settings).") def skill_publish(is_public: bool, org: str | None, force: bool) -> None: + from crewai_cli.experimental.skills.main import SkillCommand + skill_cmd = SkillCommand() skill_cmd.publish(is_public, org=org, force=force) @@ -629,6 +743,8 @@ def skill_publish(is_public: bool, org: str | None, force: bool) -> None: @skill.command(name="list") def skill_list() -> None: """List locally installed skills.""" + from crewai_cli.experimental.skills.main import SkillCommand + skill_cmd = SkillCommand() skill_cmd.list_cached() @@ -668,6 +784,8 @@ def flow() -> None: @flow.command(name="kickoff") def flow_run() -> None: """Kickoff the Flow.""" + from crewai_cli.kickoff_flow import kickoff_flow + click.echo("Running the Flow") kickoff_flow() @@ -675,6 +793,8 @@ def flow_run() -> None: @flow.command(name="plot") def flow_plot() -> None: """Plot the Flow.""" + from crewai_cli.plot_flow import plot_flow + click.echo("Plotting the Flow") plot_flow() @@ -683,6 +803,8 @@ def flow_plot() -> None: @click.argument("crew_name") def flow_add_crew(crew_name: str) -> None: """Add a crew to an existing flow.""" + from crewai_cli.add_crew_to_flow import add_crew_to_flow + click.echo(f"Adding crew {crew_name} to the flow") add_crew_to_flow(crew_name) @@ -695,6 +817,8 @@ def triggers() -> None: @triggers.command(name="list") def triggers_list() -> None: """List all available triggers from integrations.""" + from crewai_cli.triggers.main import TriggersCommand + triggers_cmd = TriggersCommand() triggers_cmd.list_triggers() @@ -703,6 +827,8 @@ def triggers_list() -> None: @click.argument("trigger_path") def triggers_run(trigger_path: str) -> None: """Execute crew with trigger payload. Format: app_slug/trigger_slug""" + from crewai_cli.triggers.main import TriggersCommand + triggers_cmd = TriggersCommand() triggers_cmd.execute_with_trigger(trigger_path) @@ -715,6 +841,8 @@ def chat() -> None: click.secho( "\nStarting a conversation with the Crew\nType 'exit' or Ctrl+C to quit.\n", ) + from crewai_cli.crew_chat import run_chat + run_chat() @@ -754,6 +882,8 @@ def enterprise() -> None: @click.argument("enterprise_url") def enterprise_configure(enterprise_url: str) -> None: """Configure CrewAI AMP OAuth2 settings from the provided Enterprise URL.""" + from crewai_cli.enterprise.main import EnterpriseConfigureCommand + enterprise_command = EnterpriseConfigureCommand() enterprise_command.configure(enterprise_url) @@ -766,6 +896,8 @@ def config() -> None: @config.command("list") def config_list() -> None: """List all CLI configuration parameters.""" + from crewai_cli.settings.main import SettingsCommand + config_command = SettingsCommand() config_command.list() @@ -775,6 +907,8 @@ def config_list() -> None: @click.argument("value") def config_set(key: str, value: str) -> None: """Set a CLI configuration parameter.""" + from crewai_cli.settings.main import SettingsCommand + config_command = SettingsCommand() config_command.set(key, value) @@ -782,6 +916,8 @@ def config_set(key: str, value: str) -> None: @config.command("reset") def config_reset() -> None: """Reset all CLI configuration parameters to default values.""" + from crewai_cli.settings.main import SettingsCommand + config_command = SettingsCommand() config_command.reset_all_settings() diff --git a/lib/cli/src/crewai_cli/create_json_crew.py b/lib/cli/src/crewai_cli/create_json_crew.py new file mode 100644 index 000000000..e30ac77b9 --- /dev/null +++ b/lib/cli/src/crewai_cli/create_json_crew.py @@ -0,0 +1,1108 @@ +"""Scaffold a new JSON-first crew project.""" + +from __future__ import annotations + +import json +from pathlib import Path +import re +import sys +from typing import Any + +import click +from rich.console import Console +from rich.text import Text + +from crewai_cli.constants import ENV_VARS +from crewai_cli.tui_picker import pick_many, pick_one +from crewai_cli.utils import enable_prompt_line_editing, load_env_vars, write_env_file + + +# ── Provider / model data ─────────────────────────────────────── + +_PROVIDERS: list[tuple[str, str]] = [ + ("openai", "OpenAI"), + ("anthropic", "Anthropic"), + ("gemini", "Google Gemini"), + ("groq", "Groq"), + ("ollama", "Ollama"), + ("bedrock", "AWS Bedrock"), + ("azure", "Azure OpenAI"), + ("nvidia_nim", "NVIDIA NIM"), + ("huggingface", "Hugging Face"), + ("cerebras", "Cerebras"), + ("sambanova", "SambaNova"), + ("watson", "IBM watsonx"), +] + +_PROVIDER_MODELS: dict[str, list[tuple[str, str]]] = { + "openai": [ + ("gpt-5.5", "GPT-5.5"), + ("gpt-5.5-pro", "GPT-5.5 Pro"), + ("gpt-5.4", "GPT-5.4"), + ("o4-mini", "o4-mini"), + ("gpt-4.1", "GPT-4.1"), + ("gpt-4.1-mini", "GPT-4.1 Mini"), + ], + "anthropic": [ + ("claude-opus-4-6", "Claude Opus 4.6"), + ("claude-sonnet-4-6", "Claude Sonnet 4.6"), + ("claude-haiku-4-5-20251001", "Claude Haiku 4.5"), + ("claude-3-7-sonnet-20250219", "Claude 3.7 Sonnet"), + ("claude-3-5-sonnet-20241022", "Claude 3.5 Sonnet"), + ], + "gemini": [ + ("gemini-3-pro-preview", "Gemini 3 Pro (preview)"), + ("gemini-2.5-pro-exp-03-25", "Gemini 2.5 Pro"), + ("gemini-2.5-flash-preview-04-17", "Gemini 2.5 Flash"), + ("gemini-2.0-flash-001", "Gemini 2.0 Flash"), + ("gemini-1.5-pro", "Gemini 1.5 Pro"), + ], + "groq": [ + ("llama-3.3-70b-versatile", "Llama 3.3 70B"), + ("llama-3.1-70b-versatile", "Llama 3.1 70B"), + ("llama-3.1-8b-instant", "Llama 3.1 8B"), + ("deepseek-r1-distill-llama-70b", "DeepSeek R1 70B"), + ("mixtral-8x7b-32768", "Mixtral 8x7B"), + ], + "ollama": [ + ("llama3.3", "Llama 3.3"), + ("llama3.1", "Llama 3.1"), + ("deepseek-r1", "DeepSeek R1"), + ("qwen2.5", "Qwen 2.5"), + ("mistral", "Mistral"), + ], +} + + +# ── Static project files ─────────────────────────────────────── + +_PYPROJECT_TOML = """\ +[project] +name = "{folder_name}" +version = "0.1.0" +description = "{name} using crewAI" +authors = [{{ name = "Your Name", email = "you@example.com" }}] +requires-python = ">=3.10,<3.14" +dependencies = [ + "crewai[tools]>=1.15" +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.crewai] +type = "crew" +""" + +_GITIGNORE = """\ +.env +__pycache__/ +.DS_Store +report.md +""" + +_README = """\ +# {name} + +A crewAI project using JSON-first configuration. + +## Running + +```bash +crewai run +``` + +## Project Structure + +- `agents/` - Agent definitions (JSONC) +- `crew.jsonc` - Crew definition with tasks and configuration +- `tools/` - Custom tools (Python) +- `knowledge/` - Knowledge files for agents + +> **Note:** `custom:` tool references execute `tools/.py` as local +> Python code when the crew loads. Only run crew projects from sources you +> trust. +""" + + +# ── Common tools for picker ──────────────────────────────────── + +_TOOL_CATEGORIES: list[tuple[str, list[tuple[str, str]]]] = [ + ( + "Search & Research", + [ + ("SerperDevTool", "Google search via Serper API"), + ("BraveSearchTool", "Web search via Brave Search"), + ("BraveWebSearchTool", "Focused Brave web search"), + ("BraveNewsSearchTool", "Search current news with Brave"), + ("BraveImageSearchTool", "Search images with Brave"), + ("BraveVideoSearchTool", "Search videos with Brave"), + ("BraveLocalPOIsTool", "Find local places with Brave"), + ("BraveLocalPOIsDescriptionTool", "Describe local places with Brave"), + ("BraveLLMContextTool", "Fetch Brave search context"), + ("TavilySearchTool", "Web search via Tavily"), + ("TavilyResearchTool", "Run Tavily research"), + ("TavilyGetResearchTool", "Retrieve Tavily research results"), + ("TavilyExtractorTool", "Extract content with Tavily"), + ("EXASearchTool", "Semantic web search via Exa"), + ("ExaSearchTool", "Semantic web search via Exa"), + ("LinkupSearchTool", "Web search via Linkup"), + ("SerpApiGoogleSearchTool", "Google search via SerpApi"), + ("SerpApiGoogleShoppingTool", "Google Shopping via SerpApi"), + ("SerplyWebSearchTool", "Web search via Serply"), + ("SerplyNewsSearchTool", "News search via Serply"), + ("SerplyScholarSearchTool", "Scholar search via Serply"), + ("SerplyJobSearchTool", "Job search via Serply"), + ("SerplyWebpageToMarkdownTool", "Convert webpages with Serply"), + ("ParallelSearchTool", "Run parallel web searches"), + ("BrightDataSearchTool", "Search with Bright Data"), + ("GithubSearchTool", "Search GitHub repositories"), + ("ArxivPaperTool", "Search arXiv academic papers"), + ], + ), + ( + "Web Scraping", + [ + ("ScrapeWebsiteTool", "Extract content from a URL"), + ("ScrapeElementFromWebsiteTool", "Extract page elements from a URL"), + ("FirecrawlScrapeWebsiteTool", "Scrape with Firecrawl"), + ("FirecrawlCrawlWebsiteTool", "Crawl a website with Firecrawl"), + ("FirecrawlSearchTool", "Search with Firecrawl"), + ("SeleniumScrapingTool", "Browser-based scraping"), + ("JinaScrapeWebsiteTool", "Scrape with Jina"), + ("ScrapegraphScrapeTool", "AI-powered page scraping"), + ("SerperScrapeWebsiteTool", "Scrape pages with Serper"), + ("BrowserbaseLoadTool", "Load web pages with Browserbase"), + ("HyperbrowserLoadTool", "Load web pages with Hyperbrowser"), + ("MultiOnTool", "Control web workflows with MultiOn"), + ("SpiderTool", "Crawl websites with Spider"), + ("StagehandTool", "Browser automation with Stagehand"), + ("BrightDataWebUnlockerTool", "Unlock websites with Bright Data"), + ("BrightDataDatasetTool", "Fetch Bright Data datasets"), + ("WebsiteSearchTool", "RAG search on a website"), + ], + ), + ( + "File & Document", + [ + ("DirectoryReadTool", "List directory contents"), + ("DirectorySearchTool", "Search directory contents"), + ("FileReadTool", "Read local files"), + ("FileWriterTool", "Write to local files"), + ("FileCompressorTool", "Compress local files"), + ("CSVSearchTool", "Search within CSV files"), + ("PDFSearchTool", "Search within PDF files"), + ("DOCXSearchTool", "Search within DOCX files"), + ("MDXSearchTool", "Search within MDX files"), + ("JSONSearchTool", "Search within JSON files"), + ("TXTSearchTool", "Search within text files"), + ("XMLSearchTool", "Search within XML files"), + ("OCRTool", "Extract text with OCR"), + ("YoutubeVideoSearchTool", "Search within YouTube videos"), + ("YoutubeChannelSearchTool", "Search within YouTube channels"), + ], + ), + ( + "Code & Data", + [ + ("CodeDocsSearchTool", "Search code documentation"), + ("RagTool", "RAG over custom data sources"), + ("NL2SQLTool", "Natural language to SQL queries"), + ("DatabricksQueryTool", "Query Databricks data"), + ("SingleStoreSearchTool", "Search SingleStore data"), + ], + ), + ( + "Cloud & Storage", + [ + ("S3ReaderTool", "Read objects from Amazon S3"), + ("S3WriterTool", "Write objects to Amazon S3"), + ("BedrockInvokeAgentTool", "Invoke an Amazon Bedrock agent"), + ("BedrockKBRetrieverTool", "Retrieve from Bedrock knowledge bases"), + ], + ), + ( + "Sandbox & Automation", + [ + ("E2BExecTool", "Run commands in E2B"), + ("E2BFileTool", "Manage files in E2B"), + ("E2BPythonTool", "Run Python in E2B"), + ("DaytonaExecTool", "Run commands in Daytona"), + ("DaytonaFileTool", "Manage files in Daytona"), + ("DaytonaPythonTool", "Run Python in Daytona"), + ("GenerateCrewaiAutomationTool", "Generate CrewAI automations"), + ], + ), + ( + "AI & Vision", + [ + ("DallETool", "Generate images with DALL-E"), + ("VisionTool", "Analyze images with vision models"), + ("AIMindTool", "Connect to MindStudio agents"), + ("PatronusEvalTool", "Evaluate output with Patronus"), + ("PatronusLocalEvaluatorTool", "Run local Patronus evaluations"), + ], + ), +] + +_FLAT_TOOLS: list[tuple[str, str]] = [ + tool for _cat, tools in _TOOL_CATEGORIES for tool in tools +] + +_COMMON_TOOL_ORDER = [ + "SerperDevTool", + "ScrapeWebsiteTool", + "DirectoryReadTool", + "FileReadTool", + "FileWriterTool", +] + +_ANSI_SEQUENCE_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]") + + +# ── Interactive wizard ───────────────────────────────────────── + + +def _prompt_text( + label: str, + default: str = "", + *, + spacing_before: bool = True, +) -> str: + if spacing_before: + click.echo() + + prompt = click.style(f" {label}", fg="cyan") + if default: + prompt += f" [{default}]" + prompt += click.style(" > ", fg="bright_white") + + try: + value = input(_readline_safe_prompt(prompt)) + except (KeyboardInterrupt, EOFError): + raise click.Abort() from None + + if not value and default: + value = default + return value.strip() + + +def _readline_safe_prompt(prompt: str) -> str: + if not sys.stdin.isatty(): + return prompt + + try: + import readline # noqa: F401 + except ImportError: + return prompt + + return _ANSI_SEQUENCE_RE.sub(lambda match: f"\001{match.group(0)}\002", prompt) + + +def _confirm(label: str, default: bool = False) -> bool: + click.echo() + return click.confirm( + click.style(f" {label}", fg="cyan"), + default=default, + prompt_suffix=click.style(" > ", fg="bright_white"), + ) + + +def _success(message: str, *, bold: bool = False, dim: bool = False) -> None: + click.echo() + click.secho(f" ✔ {message}", fg="green", bold=bold, dim=dim) + + +def _highlight_placeholders(text: str) -> Text: + highlighted = Text(text, style="dim") + highlighted.highlight_regex(r"\{[A-Za-z_][A-Za-z0-9_]*\}", style="bold cyan") + return highlighted + + +def _show_interpolation_hint(kind: str) -> None: + console = Console() + console.print( + _highlight_placeholders( + " Tip: Use {placeholder} for dynamic values you want to change later." + ) + ) + + +def _tool_label(name: str, description: str) -> str: + return f"{description:<48s} {name}" + + +def _tool_category_label(category: str) -> str: + return f"── {category} ──" + + +def _category_row_label( + category: str, tools: list[tuple[str, str]], selected: set[str], expanded: bool +) -> str: + """Render an accordion category row with tool/selection counts.""" + marker = "▾" if expanded else "▸" + sel_count = sum(1 for name, _desc in tools if name in selected) + suffix = f"{len(tools)} tools" + if sel_count: + suffix += f", {sel_count} selected" + return f"{marker} {category} ({suffix})" + + +def _select_tools() -> list[str]: + """Accordion tool picker. + + Common tools are always visible at the top; every other category shows + as a single expandable row. Expanding one category collapses the others. + Selections persist while expanding/collapsing. + """ + tools_by_name = {name: desc for name, desc in _FLAT_TOOLS} + common_tools = [ + (name, tools_by_name[name]) + for name in _COMMON_TOOL_ORDER + if name in tools_by_name + ] + common_tool_names = {name for name, _desc in common_tools} + + categories: list[tuple[str, list[tuple[str, str]]]] = [] + for category, category_tools in _TOOL_CATEGORIES: + remaining_tools = [ + (name, desc) + for name, desc in category_tools + if name not in common_tool_names + ] + if remaining_tools: + categories.append((category, remaining_tools)) + + selected: set[str] = set() + expanded: str | None = None + focus_category: str | None = None + + while True: + labels: list[str] = [] + tool_by_index: dict[int, str] = {} + separator_indices: set[int] = set() + action_indices: set[int] = set() + category_by_index: dict[int, str] = {} + preselected: set[int] = set() + initial_cursor: int | None = None + + separator_indices.add(len(labels)) + labels.append(_tool_category_label("Common tools")) + for name, desc in common_tools: + if name in selected: + preselected.add(len(labels)) + tool_by_index[len(labels)] = name + labels.append(_tool_label(name, desc)) + + for category, category_tools in categories: + row = len(labels) + action_indices.add(row) + category_by_index[row] = category + is_expanded = category == expanded + if category == focus_category: + initial_cursor = row + labels.append( + _category_row_label(category, category_tools, selected, is_expanded) + ) + if is_expanded: + for name, desc in category_tools: + if name in selected: + preselected.add(len(labels)) + tool_by_index[len(labels)] = name + labels.append(_tool_label(name, desc)) + + indices, action = pick_many( + "Tools (space to toggle, enter to confirm):", + labels, + action_indices=action_indices, + separator_indices=separator_indices, + preselected=preselected, + initial_cursor=initial_cursor, + ) + + # Carry over toggles made on this screen; tools not visible in this + # render keep their previous state. + visible = set(tool_by_index.values()) + chosen = {tool_by_index[i] for i in indices if i in tool_by_index} + selected = (selected - visible) | chosen + + if action is None: + break + toggled = category_by_index.get(action) + focus_category = toggled + expanded = None if toggled == expanded else toggled + + ordered = [name for name, _desc in common_tools] + [ + name for _cat, cat_tools in categories for name, _desc in cat_tools + ] + return [name for name in ordered if name in selected] + + +def _wizard_agent( + agent_num: int, + existing_names: list[str], + skip_provider: bool = False, + last_llm: str | None = None, + preset_llm: str | None = None, +) -> dict[str, Any] | None: + """Interactive wizard for one agent. Returns agent dict or None if skipped.""" + click.echo() + click.secho(f" Agent {agent_num}", fg="cyan", bold=True) + + role = _prompt_text("Role", spacing_before=False) + if not role: + return None + + name_default = role.lower().replace(" ", "_")[:30] + name_default = re.sub(r"[^a-z0-9_]", "", name_default) + if not name_default: + # Roles made only of symbols would otherwise produce an empty slug + # and an invalid agents/.jsonc file name. + name_default = f"agent_{agent_num}" + while name_default in existing_names: + name_default += "_2" + + goal = _prompt_text("Goal", spacing_before=False) + + backstory = _prompt_text("Backstory", spacing_before=False) + + # LLM model + if preset_llm: + llm = preset_llm + _success(llm) + elif skip_provider: + llm = last_llm or "openai/gpt-4o" + elif last_llm: + reuse_labels = [ + f"Same as before ({last_llm})", + "Choose a different model", + ] + r_idx = pick_one("LLM:", reuse_labels) + if r_idx == 1: + llm = _select_model() + else: + llm = last_llm + _success(llm) + else: + llm = _select_model() + + tools = _select_tools() + if tools: + _success(f"{len(tools)} tool{'s' if len(tools) != 1 else ''}") + else: + _success("No tools", dim=True) + + # Planning + planning = _confirm("Enable step-by-step planning?", default=False) + + # Allow delegation + allow_delegation = _confirm("Allow delegation to other agents?", default=False) + + return { + "name": name_default, + "role": role, + "goal": goal, + "backstory": backstory, + "llm": llm, + "tools": tools, + "planning": planning, + "allow_delegation": allow_delegation, + } + + +def _wizard_task( + task_num: int, + agent_names: list[str], + prior_task_names: list[str], +) -> dict[str, Any] | None: + """Interactive wizard for one task. Returns task dict or None if skipped.""" + click.echo() + click.secho(f" Task {task_num}", fg="cyan", bold=True) + + description = _prompt_text("Description", spacing_before=False) + if not description: + return None + + # Auto-generate name from first few words of description + words = description.lower().split()[:4] + base = re.sub(r"[^a-z0-9_]", "", "_".join(words)) + name = f"{base}_task" if base else f"task_{task_num}" + while name in prior_task_names: + name += "_2" + + expected_output = _prompt_text("Expected output", spacing_before=False) + + # Agent assignment + if len(agent_names) == 1: + assigned_agent = agent_names[0] + else: + a_idx = pick_one("Assign to agent:", agent_names) + while a_idx < 0: + click.secho(" Every task needs an agent — pick one to continue.", dim=True) + a_idx = pick_one("Assign to agent:", agent_names) + assigned_agent = agent_names[a_idx] + _success(f"Agent: {assigned_agent}") + + # Context dependencies + context: list[str] = [] + if prior_task_names: + ctx_indices = pick_many( + "Context from prior tasks (space to toggle):", + [*prior_task_names, "None"], + ) + context = [ + prior_task_names[i] for i in ctx_indices if i < len(prior_task_names) + ] + if context: + _success(f"Context: {', '.join(context)}") + + return { + "name": name, + "description": description, + "expected_output": expected_output, + "agent": assigned_agent, + "context": context, + } + + +def _wizard_agents_and_tasks( + skip_provider: bool = False, + default_llm: str | None = None, +) -> tuple[list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]: + """Run the full interactive wizard. Returns (agents, tasks, crew_settings).""" + agents: list[dict[str, Any]] = [] + tasks: list[dict[str, Any]] = [] + + # ── Step 1: Agents ── + click.echo() + click.secho(" Step 1/3 — Agents", fg="cyan", bold=True) + click.secho(" Define the AI agents in your crew.", dim=True) + _show_interpolation_hint("agents") + + while True: + last_llm = agents[-1]["llm"] if agents else None + agent = _wizard_agent( + agent_num=len(agents) + 1, + existing_names=[a["name"] for a in agents], + skip_provider=skip_provider, + last_llm=last_llm, + preset_llm=default_llm if not agents else None, + ) + if agent is None and not agents: + click.secho(" Need at least one agent.", fg="yellow") + continue + if agent is not None: + agents.append(agent) + _success(f"{agent['role']} added", bold=True) + + if not _confirm("Add another agent?", default=False): + break + + # ── Step 2: Tasks ── + click.echo() + click.secho(" Step 2/3 — Tasks", fg="cyan", bold=True) + click.secho(" Define what your agents should do.", dim=True) + _show_interpolation_hint("tasks") + + agent_names = [a["name"] for a in agents] + task_names: list[str] = [] + + while True: + task = _wizard_task( + task_num=len(tasks) + 1, + agent_names=agent_names, + prior_task_names=task_names, + ) + if task is None and not tasks: + click.secho(" Need at least one task.", fg="yellow") + continue + if task is not None: + tasks.append(task) + task_names.append(task["name"]) + _success(f"Task {len(tasks)} added", bold=True) + + if not _confirm("Add another task?", default=False): + break + + # ── Step 3: Settings ── + click.echo() + click.secho(" Step 3/3 — Settings", fg="cyan", bold=True) + + process = "sequential" + memory = _confirm("Enable crew memory?", default=True) + + crew_settings = { + "process": process, + "memory": memory, + "inputs": {}, + } + + return agents, tasks, crew_settings + + +# ── JSONC generation from wizard data ────────────────────────── + + +def _agent_to_jsonc(agent: dict[str, Any]) -> str: + """Convert agent wizard data to JSONC string with comments.""" + has_planning = agent["planning"] + delegation_val = "true" if agent["allow_delegation"] else "false" + delegation_comma = "," if has_planning else "" + + settings_lines = [] + settings_lines.append(" // Show detailed execution logs") + settings_lines.append(' "verbose": false,') + settings_lines.append("") + settings_lines.append( + " // Allow this agent to delegate tasks to other agents in the crew" + ) + settings_lines.append(f' "allow_delegation": {delegation_val}{delegation_comma}') + settings_lines.append("") + settings_lines.append( + " // Maximum reasoning iterations per task (prevents infinite loops)" + ) + settings_lines.append(' // "max_iter": 25,') + settings_lines.append("") + settings_lines.append(" // Maximum tokens for agent's response generation") + settings_lines.append(' // "max_tokens": null,') + settings_lines.append("") + settings_lines.append(" // Maximum execution time in seconds") + settings_lines.append(' // "max_execution_time": null,') + settings_lines.append("") + settings_lines.append(" // Maximum LLM requests per minute (rate limiting)") + settings_lines.append(' // "max_rpm": null,') + settings_lines.append("") + settings_lines.append(" // Enable agent-level memory (persists across tasks)") + settings_lines.append(' // "memory": false,') + settings_lines.append("") + settings_lines.append(" // Cache tool results to avoid duplicate calls") + settings_lines.append(' // "cache": true,') + settings_lines.append("") + settings_lines.append( + " // Auto-summarize context when it exceeds the LLM's context window" + ) + settings_lines.append(' // "respect_context_window": true,') + settings_lines.append("") + settings_lines.append(" // Maximum retries on execution errors") + settings_lines.append(' // "max_retry_limit": 2,') + settings_lines.append("") + settings_lines.append(" // Enable step-by-step planning before task execution") + if has_planning: + settings_lines.append(' "planning": true') + else: + settings_lines.append(' // "planning": false') + settings_lines.append("") + settings_lines.append(" // Include system prompt in LLM calls") + settings_lines.append(' // "use_system_prompt": true') + + settings_block = "\n".join(settings_lines) + + return f"""\ +{{ + // Agent's role title — appears in prompts and logs. + // You can use {{placeholder}} inputs in role, goal, or backstory. + // Example: "role": "Senior {{industry}} Researcher" + "role": {json.dumps(agent["role"])}, + + // The agent's primary objective + "goal": {json.dumps(agent["goal"])}, + + // Background story that shapes the agent's personality and approach + "backstory": {json.dumps(agent["backstory"])}, + + // LLM model in provider/model format + // Examples: "openai/gpt-4o", "anthropic/claude-sonnet-4-6", "ollama/llama3.3" + // For custom endpoints or deployment-based providers, replace with: + // "llm": {{"model": "llama3", "provider": "ollama", "base_url": "http://localhost:11434"}}, + // "llm": {{"deployment_name": "my-deployment", "provider": "azure", "api_version": "2024-10-21"}}, + "llm": {json.dumps(agent["llm"])}, + + // Override LLM used specifically for tool/function calling + // "function_calling_llm": "openai/gpt-5.4-mini", + + // Tools available to this agent + // Built-in: "SerperDevTool", "ScrapeWebsiteTool", "FileReadTool", etc. + // Custom: "custom:my_tool" loads from tools/my_tool.py + "tools": {json.dumps(agent["tools"])}, + + // Optional agent-level guardrail — validates this agent's final output. + // String guardrails are checked by an LLM and can reject/retry output. + // "guardrail": "Only answer with information supported by retrieved evidence.", + // "guardrail_max_retries": 2, + + // Advanced agent options: + // Docs: https://docs.crewai.com/concepts/agents + // "reasoning": true, + // "max_reasoning_attempts": 3, + // "planning_config": {{ + // "reasoning_effort": "medium", + // "llm": {{"model": "deepseek-chat", "provider": "deepseek"}} + // }}, + // "multimodal": false, + // "allow_code_execution": false, + // "code_execution_mode": "safe", + // "knowledge_sources": [], + // "knowledge_config": {{}}, + // "inject_date": true, + // "date_format": "%Y-%m-%d", + // "security_config": {{}}, + + // Agent behavior settings + "settings": {{ +{settings_block} + }} +}} +""" + + +def _task_to_json_fragment(task: dict[str, Any]) -> str: + """Convert task wizard data to a JSON-like fragment for embedding in crew JSONC.""" + lines = [] + lines.append(" {") + lines.append(" // Task identifier") + lines.append(f' "name": {json.dumps(task["name"])},') + lines.append("") + lines.append(" // What the task should accomplish") + lines.append( + " // Use {placeholder} inputs here; crewai run prompts for missing values" + ) + lines.append(f' "description": {json.dumps(task["description"])},') + lines.append("") + lines.append(" // Clear definition of what the output should look like") + lines.append(f' "expected_output": {json.dumps(task["expected_output"])},') + lines.append("") + lines.append( + " // Optional task guardrail(s) validate output before completion" + ) + lines.append(' // Use "guardrail" for one rule or "guardrails" for many') + lines.append(" // Failed guardrails retry up to guardrail_max_retries times") + lines.append(' // "guardrail": "Every factual claim needs context support.",') + lines.append(' // "guardrails": [') + lines.append(' // "Every factual claim must be supported by context.",') + lines.append(' // "The answer must match the expected output format."') + lines.append(" // ],") + lines.append(' // "guardrail_max_retries": 2,') + lines.append("") + lines.append(" // Advanced task options:") + lines.append(" // Docs: https://docs.crewai.com/concepts/tasks") + lines.append(' // "output_json": null,') + lines.append(' // "output_pydantic": null,') + lines.append(' // "response_model": null,') + lines.append(' // "markdown": false,') + lines.append(' // "input_files": [],') + lines.append(' // "security_config": {},') + lines.append("") + lines.append(" // Which agent handles this task") + lines.append(f' "agent": {json.dumps(task["agent"])}') + + if task.get("context"): + lines[-1] += "," # add comma to agent line + lines.append("") + lines.append(" // Task outputs used as context") + lines.append(f' "context": {json.dumps(task["context"])}') + + if task.get("output_file"): + lines[-1] += "," + lines.append("") + lines.append(" // Save output to a file") + lines.append(f' "output_file": {json.dumps(task["output_file"])}') + + lines.append("") + lines.append(' // "tools": [],') + lines.append(' // "human_input": false,') + lines.append(' // "async_execution": false') + lines.append(" }") + return "\n".join(lines) + + +def _crew_to_jsonc( + name: str, + agents: list[dict[str, Any]], + tasks: list[dict[str, Any]], + settings: dict[str, Any], +) -> str: + """Generate the full crew.jsonc from wizard data.""" + agent_names_json = json.dumps([a["name"] for a in agents]) + tasks_fragments = ",\n".join(_task_to_json_fragment(t) for t in tasks) + inputs_json = json.dumps(settings.get("inputs", {}), indent=4) + # Re-indent inputs to 4-space + inputs_lines = inputs_json.split("\n") + if len(inputs_lines) > 1: + inputs_json = ( + inputs_lines[0] + "\n" + "\n".join(" " + line for line in inputs_lines[1:]) + ) + + process = settings.get("process", "sequential") + memory = "true" if settings.get("memory") else "false" + + return f"""\ +{{ + // Display name for this crew + "name": {json.dumps(name)}, + + // Agents to include — each must have a matching agents/.jsonc file + "agents": {agent_names_json}, + + // Task definitions — executed in order for sequential process + "tasks": [ +{tasks_fragments} + ], + + // Execution process + // "sequential" — tasks run in order, each receiving prior task outputs + // "hierarchical" — a manager agent delegates tasks (requires manager_llm) + "process": "{process}", + + // Enable verbose logging during execution + "verbose": true, + + // Enable crew memory — persists context and learnings across tasks + "memory": {memory}, + + // Automatically plan the execution strategy before running tasks + // "planning": false, + + // LLM for the planning step (used when planning is true) + // "planning_llm": "openai/gpt-4o", + + // LLM for the manager agent (required when process is "hierarchical") + // "manager_llm": "openai/gpt-4o", + + // Crew-level LLM fields also accept object form for custom endpoints + // "chat_llm": {{"model": "llama3", "provider": "ollama", "base_url": "http://localhost:11434"}}, + + // Advanced crew options: + // Docs: https://docs.crewai.com/concepts/crews + // "manager_agent": "{agents[0]["name"]}", + // "function_calling_llm": "openai/gpt-4o-mini", + // "max_rpm": null, + // "cache": true, + // "knowledge_sources": [], + // "embedder": {{}}, + // "output_log_file": "crew.log", + // "stream": false, + // "tracing": false, + // "security_config": {{}}, + + // Optional runtime input defaults. + // Use {{placeholder}} in agent or task text, for example: + // "description": "Research {{topic}} and write a brief" + // `crewai run` prompts for any placeholders missing from this object. + "inputs": {inputs_json} +}} +""" + + +# ── Model selection ───────────────────────────────────────────── + + +def _select_model() -> str: + """Two-step arrow-key selection: provider, then model.""" + provider_labels = [label for _, label in _PROVIDERS] + provider_labels.append("Other (enter manually)") + + p_idx = pick_one("LLM Provider:", provider_labels) + if p_idx < 0: + return "openai/gpt-4o" + + if p_idx == len(_PROVIDERS): + custom: str = click.prompt( + click.style(" Enter model (provider/model)", fg="cyan"), + type=str, + prompt_suffix=click.style(" > ", fg="bright_white"), + ) + return custom.strip() + + provider_key, provider_name = _PROVIDERS[p_idx] + click.secho(f" → {provider_name}", fg="green") + + models = _PROVIDER_MODELS.get(provider_key, []) + if not models: + custom = click.prompt( + click.style(f" Enter model name for {provider_key}/", fg="cyan"), + type=str, + prompt_suffix=click.style(" > ", fg="bright_white"), + ) + return f"{provider_key}/{custom.strip()}" + + model_labels = [f"{label} ({model_id})" for model_id, label in models] + model_labels.append("Other (enter model name)") + + m_idx = pick_one(f"{provider_name} Model:", model_labels) + if m_idx < 0: + return f"{provider_key}/{models[0][0]}" + + if m_idx == len(models): + custom = click.prompt( + click.style(f" Enter model name for {provider_key}/", fg="cyan"), + type=str, + prompt_suffix=click.style(" > ", fg="bright_white"), + ) + result = f"{provider_key}/{custom.strip()}" + else: + model_id = models[m_idx][0] + result = f"{provider_key}/{model_id}" + + click.secho(f" → {result}", fg="green") + return result + + +def _default_model_for_provider(provider: str | None) -> str | None: + """Return the default provider/model string for a ``--provider`` value.""" + if not provider: + return None + normalized = provider.strip().lower() + if not normalized: + return None + if "/" in normalized: + return normalized + models = _PROVIDER_MODELS.get(normalized) + if not models: + return None + return f"{normalized}/{models[0][0]}" + + +# ── Helpers ───────────────────────────────────────────────────── + + +def _write_jsonc(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + + +def _setup_env(folder_path: Path, llm_model: str) -> None: + """Prompt for API keys based on the selected provider.""" + click.echo() + env_vars = load_env_vars(folder_path) + env_vars["MODEL"] = llm_model + + provider = llm_model.split("/")[0] if "/" in llm_model else llm_model + if provider in ENV_VARS: + for details in ENV_VARS[provider]: + if details.get("default", False): + for key, value in details.items(): + if key not in ["prompt", "key_name", "default"]: + env_vars[key] = value + elif "key_name" in details: + api_key_value = click.prompt( + click.style(f" {details['prompt']}", fg="cyan"), + default="", + show_default=False, + prompt_suffix=click.style(" > ", fg="bright_white"), + ) + if api_key_value.strip(): + env_vars[details["key_name"]] = api_key_value + + if env_vars: + write_env_file(folder_path, env_vars) + click.secho(" API keys and model saved to .env file", fg="green") + + +# ── Main ──────────────────────────────────────────────────────── + + +def create_json_crew( + name: str, + provider: str | None = None, + skip_provider: bool = False, +) -> None: + """Scaffold a new JSON-first crew project.""" + import keyword + import shutil + + enable_prompt_line_editing() + + name = name.rstrip("/") + if not name.strip(): + raise ValueError("Project name cannot be empty") + + folder_name = name.replace(" ", "_").replace("-", "_").lower() + folder_name = re.sub(r"[^a-zA-Z0-9_]", "", folder_name) + + if not folder_name or folder_name[0].isdigit(): + raise ValueError( + f"Project name '{name}' produces invalid folder name '{folder_name}'" + ) + + if keyword.iskeyword(folder_name): + raise ValueError(f"'{folder_name}' is a reserved Python keyword") + + folder_path = Path(folder_name) + if folder_path.exists(): + if not click.confirm(f"Folder {folder_name} already exists. Override?"): + click.secho("Cancelled.", fg="yellow") + sys.exit(0) + shutil.rmtree(folder_path) + + click.echo() + click.secho(f" Creating crew: {name}", fg="green", bold=True) + + agents, tasks, crew_settings = _wizard_agents_and_tasks( + skip_provider=skip_provider, + default_llm=_default_model_for_provider(provider), + ) + + # Create directories + folder_path.mkdir(parents=True) + (folder_path / "agents").mkdir() + (folder_path / "tools").mkdir() + (folder_path / "skills").mkdir() + (folder_path / "knowledge").mkdir() + + for agent in agents: + _write_jsonc( + folder_path / "agents" / f"{agent['name']}.jsonc", + _agent_to_jsonc(agent), + ) + + _write_jsonc( + folder_path / "crew.jsonc", + _crew_to_jsonc(name, agents, tasks, crew_settings), + ) + + # Write pyproject.toml + (folder_path / "pyproject.toml").write_text( + _PYPROJECT_TOML.format(folder_name=folder_name, name=name), + encoding="utf-8", + ) + + # Write .gitignore + (folder_path / ".gitignore").write_text(_GITIGNORE, encoding="utf-8") + + # Write README + (folder_path / "README.md").write_text( + _README.format(name=name), + encoding="utf-8", + ) + + # Write knowledge placeholder + (folder_path / "knowledge" / "user_preference.txt").write_text( + "# Add your knowledge files here\n", + encoding="utf-8", + ) + + # Keep skills dir tracked by git + (folder_path / "skills" / ".gitkeep").write_text("", encoding="utf-8") + + # Setup .env with API keys + if not skip_provider: + models = list({a["llm"] for a in agents}) + for model in models: + _setup_env(folder_path, model) + + click.echo() + click.secho(f" ✔ Crew {name} created successfully!", fg="green", bold=True) + click.echo() + click.secho(" Next steps:", bold=True) + click.echo() + click.echo(f" cd {folder_name}") + click.echo() + click.secho(" Run your crew:", fg="cyan") + click.echo(" crewai run") + click.echo() + click.secho(" Customize your crew:", fg="cyan") + click.echo(" agents/*.jsonc Define agent roles, goals, and LLMs") + click.echo(" crew.jsonc Configure tasks and optional input defaults") + click.echo(" tools/ Add custom tools (Python)") + click.echo() diff --git a/lib/cli/src/crewai_cli/crew_run_tui.py b/lib/cli/src/crewai_cli/crew_run_tui.py new file mode 100644 index 000000000..ce60c2e93 --- /dev/null +++ b/lib/cli/src/crewai_cli/crew_run_tui.py @@ -0,0 +1,2098 @@ +"""Full-screen Textual TUI for crew execution. + +Two-column layout: left sidebar (tasks/agents/tokens) + main content +(task header, plan checklist, activity timeline, streaming output). +""" + +import json as _json +import re +import threading +import time +from typing import Any, ClassVar, cast + +from rich.text import Text +from textual import work +from textual.app import App, ComposeResult +from textual.binding import Binding, BindingType +from textual.containers import Horizontal, Vertical, VerticalScroll +from textual.css.query import NoMatches +from textual.screen import ModalScreen +from textual.widgets import Button, Footer, Header, Static + + +_SPINNER = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏" + +# CrewAI brand palette +_C_PRIMARY = "#FF5A50" # crewai.primary (coral red) +_C_TEAL = "#1F7982" # crewai.secondary / tertiary +_C_GREEN = "#4aba6a" # success green (warm, not neon) +_C_RED = "#FF5A50" # error (same as primary) +_C_TEXT = "#e0e0e0" # light text on dark bg +_C_DIM = "#AAAAAA" # crewai.background-grey +_C_MUTED = "#666666" # dimmer than _C_DIM for past timeline + +_STEP_NUMBER_RE = re.compile(r"\bstep\s+(\d+)\b", re.IGNORECASE) +_REFINEMENT_RE = re.compile(r"^\s*step\s+(\d+)\s*:\s*(.+)\s*$", re.IGNORECASE) +_INTERNAL_TOOL_NAMES = {"create_reasoning_plan"} + + +def _enable_tracing_in_dotenv() -> None: + """Append CREWAI_TRACING_ENABLED=true to .env if not already set.""" + from pathlib import Path + + env_file = Path.cwd() / ".env" + key = "CREWAI_TRACING_ENABLED" + try: + if env_file.exists(): + content = env_file.read_text() + if key in content: + return + sep = "" if content.endswith("\n") or not content else "\n" + env_file.write_text(f"{content}{sep}{key}=true\n") + else: + env_file.write_text(f"{key}=true\n") + except OSError: + # Persisting the tracing flag is best-effort; an unwritable .env + # must not block the run (tracing stays enabled for this session). + pass + + +def _unescape_text(s: str) -> str: + """Replace literal backslash-n sequences with real newlines.""" + return s.replace("\\n", "\n").replace("\\t", " ") + + +def _try_parse_structured(text: str) -> Any | None: + """Try JSON first, then Python repr (single-quoted dicts/lists).""" + try: + return _json.loads(text) + except (ValueError, TypeError): + pass + try: + import ast + + obj = ast.literal_eval(text) + if isinstance(obj, (dict, list)): + return obj + except Exception: # noqa: S110 + pass + return None + + +def _format_json_in_text(text: str) -> str: + """Find JSON objects/arrays in text and pretty-print them.""" + if not text or ("{" not in text and "[" not in text): + return text + + result: list[str] = [] + i = 0 + while i < len(text): + if text[i] in ("{", "["): + close = "}" if text[i] == "{" else "]" + depth = 0 + for j in range(i, len(text)): + if text[j] == text[i]: + depth += 1 + elif text[j] == close: + depth -= 1 + if depth == 0: + candidate = text[i : j + 1] + parsed = _try_parse_structured(candidate) + if parsed is not None: + formatted = _json.dumps( + parsed, indent=2, ensure_ascii=False + ) + result.append(formatted) + i = j + 1 + else: + result.append(text[i]) + i += 1 + break + else: + remaining = text[i:] + parsed = _try_parse_structured(remaining) + if parsed is not None: + result.append(_json.dumps(parsed, indent=2, ensure_ascii=False)) + else: + result.append(remaining) + break + else: + result.append(text[i]) + i += 1 + + return "".join(result) + + +def _colorize_json_line(t: Text, line: str) -> None: + """Append a single line with soft JSON syntax highlighting.""" + stripped = line.lstrip() + leading = line[: len(line) - len(stripped)] + t.append(leading, style=_C_MUTED) + if not stripped: + return + s = stripped + i = 0 + while i < len(s): + ch = s[i] + if ch == '"': + j = i + 1 + while j < len(s): + if s[j] == "\\": + j += 2 + continue + if s[j] == '"': + j += 1 + break + j += 1 + token = s[i:j] + rest = s[j:].lstrip() + if rest.startswith(":"): + t.append(token, style=_C_TEAL) + else: + t.append(token, style=_C_DIM) + i = j + elif ch in "{}[],": + t.append(ch, style=_C_MUTED) + i += 1 + elif ch == ":": + t.append(": ", style=_C_MUTED) + i += 1 + if i < len(s) and s[i] == " ": + i += 1 + elif ch in "-0123456789": + j = i + 1 + while j < len(s) and s[j] in "0123456789.eE+-": + j += 1 + t.append(s[i:j], style=_C_PRIMARY) + i = j + elif s[i : i + 4] == "true": + t.append("true", style=_C_GREEN) + i += 4 + elif s[i : i + 5] == "false": + t.append("false", style=_C_GREEN) + i += 5 + elif s[i : i + 4] == "null": + t.append("null", style=f"italic {_C_MUTED}") + i += 4 + else: + t.append(ch, style=_C_DIM) + i += 1 + + +def _append_highlighted(t: Text, content: str, indent: str, max_lines: int = 50) -> int: + """Append text with JSON highlighting if it looks like JSON, else plain.""" + lines = content.split("\n") + total = len(lines) + is_json = content.lstrip()[:1] in ("{", "[", '"') + for line in lines[:max_lines]: + t.append(f"{indent} ", style="") + if is_json: + _colorize_json_line(t, line) + else: + t.append(line, style=_C_DIM) + t.append("\n") + return total + + +class TraceConsentScreen(ModalScreen[bool]): + CSS = """ + TraceConsentScreen { + align: center middle; + } + #consent-dialog { + width: 50; + height: auto; + max-height: 16; + background: #1c1c1c; + border: tall #333333; + padding: 1 2 2 2; + } + #consent-buttons { + height: 3; + margin-top: 1; + width: 100%; + layout: horizontal; + } + .consent-btn { + width: 1fr; + height: 3; + margin: 0 1; + } + #btn-consent-yes { + background: #1F7982; + color: #e0e0e0; + border: none; + text-style: bold; + } + #btn-consent-yes:hover { + background: #28969f; + } + #btn-consent-yes:disabled { + background: #1F7982; + color: #e0e0e0; + text-opacity: 100%; + opacity: 100%; + } + #btn-consent-no { + background: #333333; + color: #AAAAAA; + border: none; + } + #btn-consent-no:hover { + background: #444444; + } + """ + + BINDINGS: ClassVar[list[BindingType]] = [ + Binding("y", "consent_yes", "Yes", show=False), + Binding("n", "consent_no", "No", show=False), + Binding("escape", "consent_no", "Cancel", show=False), + ] + + def __init__(self) -> None: + super().__init__() + self._sending = False + self._frame = 0 + self._spin_timer: Any = None + + def compose(self) -> ComposeResult: + with Vertical(id="consent-dialog"): + yield Static(self._build_content(), id="consent-text") + with Horizontal(id="consent-buttons"): + yield Button("View Traces", id="btn-consent-yes", classes="consent-btn") + yield Button("Cancel", id="btn-consent-no", classes="consent-btn") + + def _build_content(self) -> Text: + t = Text() + t.append(" View execution traces on CrewAI AMP\n\n", style=f"bold {_C_TEXT}") + t.append(" Sends agent decisions, tool calls, and\n", style=_C_DIM) + t.append(" timing data. Link expires in 24h.\n\n", style=_C_DIM) + t.append(" Traces will be enabled for future runs.\n", style=_C_MUTED) + return t + + def _start_sending(self) -> None: + self._sending = True + btn_yes = self.query_one("#btn-consent-yes", Button) + btn_no = self.query_one("#btn-consent-no", Button) + btn_yes.disabled = True + btn_yes.label = f"{_SPINNER[0]} Loading…" + btn_no.display = False + self._spin_timer = self.set_interval(1 / 8, self._spin_tick) + cast("CrewRunApp", self.app)._on_trace_consent_accepted() + + def _spin_tick(self) -> None: + self._frame += 1 + try: + btn = self.query_one("#btn-consent-yes", Button) + btn.label = f"{_SPINNER[self._frame % len(_SPINNER)]} Loading…" + except Exception: # noqa: S110 + pass + + def on_button_pressed(self, event: Button.Pressed) -> None: + if self._sending: + return + if event.button.id == "btn-consent-yes": + self._start_sending() + else: + self.dismiss(False) + + def action_consent_yes(self) -> None: + if self._sending: + return + self._start_sending() + + def action_consent_no(self) -> None: + if self._sending: + return + self.dismiss(False) + + +class CrewRunApp(App[Any]): + TITLE = "CrewAI" + + CSS = """ +Screen { + background: #131313; +} + +#body { + height: 1fr; +} + +#sidebar { + width: 34; + background: #1c1c1c; + border-right: vkey #333333; + scrollbar-size-vertical: 1; + scrollbar-color: #666666; + scrollbar-color-hover: #FF5A50; + scrollbar-background: #1c1c1c; + overflow-y: auto; + overflow-x: hidden; +} + +#sidebar-content { + width: 100%; + height: auto; + padding: 1 0; +} + +#main-panel { + width: 1fr; +} + +#task-header { + height: auto; + max-height: 6; + padding: 1 2; + background: #1c1c1c; + border-bottom: hkey #333333; +} + +#scroll-area { + height: 3fr; + min-height: 6; + scrollbar-size-vertical: 1; + scrollbar-color: #666666; + scrollbar-color-hover: #FF5A50; + scrollbar-background: #131313; +} + +#main-content { + padding: 1 2; + height: auto; +} + +Header { + background: #1c1c1c; + color: #FF5A50; +} + +Footer { + background: #1c1c1c; +} + +FooterKey { + background: #1c1c1c; + color: #AAAAAA; +} + +FooterKey .footer-key--key { + background: #262626; + color: #FF5A50; +} + +#log-panel { + height: 2fr; + min-height: 6; + background: #1c1c1c; + border-top: hkey #333333; + scrollbar-size-vertical: 1; + scrollbar-color: #666666; + scrollbar-color-hover: #FF5A50; + scrollbar-background: #1c1c1c; +} + +#log-content { + padding: 1 2; + height: auto; +} + +#sidebar-actions { + display: none; + height: auto; + padding: 0 1; + margin-top: 1; + border-top: hkey #333333; +} + +.action-btn { + width: 100%; + min-width: 20; + height: 3; + margin: 1 1 0 1; + text-style: bold; +} + +#btn-traces { + background: #1F7982; + color: #e0e0e0; + border: none; +} +#btn-traces:hover { + background: #28969f; +} +#btn-traces:disabled { + background: #1a4a50; + color: #888888; +} + +#btn-deploy { + background: #333333; + color: #e0e0e0; + border: none; +} +#btn-deploy:hover { + background: #444444; +} + +#btn-traces-done { + background: #1a3a3a; + color: #1F7982; + border: none; +} +#btn-traces-done:hover { + background: #1F7982; + color: #e0e0e0; +} +""" + + BINDINGS: ClassVar[list[BindingType]] = [ + Binding("q", "quit", "Quit"), + Binding("s", "toggle_sidebar", "Sidebar"), + Binding("l", "toggle_logs", "Logs"), + Binding("t", "view_traces", "Traces", show=False), + Binding("d", "deploy_crew", "Deploy", show=False), + Binding("down", "log_down", "Log ↓", show=False), + Binding("up", "log_up", "Log ↑", show=False), + Binding("enter", "log_toggle", "Expand", show=False), + ] + + def __init__( + self, + crew_name: str = "Crew", + total_tasks: int = 0, + agent_names: list[str] | None = None, + task_names: list[str] | None = None, + ): + super().__init__() + self.title = f"CrewAI — {crew_name}" + self.sub_title = "0:00" + self._crew_name = crew_name + self._lock = threading.RLock() + + self._total_tasks = total_tasks + self._current_task_idx = 0 + self._current_task_desc = "" + self._current_agent = "" + self._task_names = task_names or [] + self._agent_names = agent_names or [] + self._task_statuses: dict[int, str] = { + i: "pending" for i in range(1, total_tasks + 1) + } + # Maps a task's identity to state captured when it started (sidebar + # index, description, agent, start time) so completion/failure events + # build their log entry from the right task even when tasks run + # async/overlapping. + self._task_state_by_key: dict[str, dict[str, Any]] = {} + + self._timeline: list[tuple[str, str, str]] = [] + self._current_step: tuple[str, str, str] | None = None + + self._input_tokens = 0 + self._output_tokens = 0 + self._live_out_tokens = 0 + self._pending_input_estimate = 0 + self._llm_calls = 0 + + self._streaming_text = "" + self._is_streaming = False + self._current_llm_text = "" + self._task_full_output = "" + + self._plan: dict[str, Any] | None = None + self._plan_step_status: dict[int, str] = {} + self._awaiting_replan = False + + self._status = "starting" + self._start_time = time.time() + self._task_start_time = time.time() + self._final_output: str | None = None + self._error: str | None = None + self._frame = 0 + + self._task_logs: list[dict[str, Any]] = [] + self._current_task_steps: list[dict[str, Any]] = [] + + self._log_entries: list[dict[str, Any]] = [] + self._log_cursor: int = 0 + self._log_expanded: set[int] = set() + self._log_scroll_needed: bool = False + self._log_line_map: list[tuple[int, int, int]] = [] + + self._event_handlers: list[tuple[type, Any]] = [] + + self._crew: Any = None + self._default_inputs: dict[str, Any] | None = None + self._crew_result: Any = None + self._crew_json_path: Any = None + self._elapsed_frozen: float | None = None + self._want_deploy: bool = False + self._trace_url: str | None = None + self._consent_screen: TraceConsentScreen | None = None + + # ── Layout ────────────────────────────────────────────── + + def compose(self) -> ComposeResult: + yield Header(show_clock=False) + with Horizontal(id="body"): + with VerticalScroll(id="sidebar"): + yield Static(id="sidebar-content") + with Vertical(id="sidebar-actions"): + yield Button("View Traces", id="btn-traces", classes="action-btn") + yield Button("Deploy", id="btn-deploy", classes="action-btn") + with Vertical(id="main-panel"): + yield Static(id="task-header") + with VerticalScroll(id="scroll-area"): + yield Static(id="main-content") + with VerticalScroll(id="log-panel"): + yield Static(id="log-content") + yield Footer() + + def on_mount(self) -> None: + self._start_time = time.time() + self._subscribe() + self._tick_timer = self.set_interval(1 / 8, self._tick) + if self._crew: + self._run_crew_worker() + elif self._crew_json_path: + self._load_and_run_worker() + + # ── Crew execution ────────────────────────────────────── + + @work(thread=True, exclusive=True, group="crew") + def _load_and_run_worker(self) -> None: + from crewai.events.listeners.tracing.utils import ( + set_suppress_tracing_messages, + set_tui_mode, + ) + + set_tui_mode(True) + set_suppress_tracing_messages(True) + try: + from crewai.project.crew_loader import load_crew + + crew, default_inputs = load_crew(self._crew_json_path) + crew.verbose = False + for agent in crew.agents: + agent.verbose = False + if hasattr(agent, "llm") and hasattr(agent.llm, "stream"): + agent.llm.stream = True + + task_names = [] + for task in crew.tasks: + name = getattr(task, "name", "") or "" + if not name: + desc = getattr(task, "description", "") or "Task" + name = desc[:40] + task_names.append(name) + + agent_names = [] + for agent in crew.agents: + name = ( + getattr(agent, "role", "") or getattr(agent, "name", "") or "Agent" + ) + agent_names.append(name) + + self._crew = crew + self._default_inputs = default_inputs + + def _apply_crew_info() -> None: + with self._lock: + self._total_tasks = len(crew.tasks) + self._task_names = task_names + self._agent_names = agent_names + self._task_statuses = { + i: "pending" for i in range(1, len(crew.tasks) + 1) + } + self.title = f"CrewAI — {crew.name or 'Crew'}" + self._crew_name = crew.name or "Crew" + self._start_time = time.time() + self._run_crew_worker() + + self.call_from_thread(_apply_crew_info) + except Exception as e: + self.call_from_thread(self._on_crew_failed, str(e)) + + @work(thread=True, exclusive=True, group="crew") + def _run_crew_worker(self) -> None: + from crewai.events.listeners.tracing.utils import ( + set_suppress_tracing_messages, + set_tui_mode, + ) + + set_tui_mode(True) + set_suppress_tracing_messages(True) + try: + result = self._crew.kickoff(inputs=self._default_inputs) + output = result.raw if result and hasattr(result, "raw") else None + with self._lock: + self._crew_result = result + self.call_from_thread(self._on_crew_done, output) + except Exception as e: + self.call_from_thread(self._on_crew_failed, str(e)) + + def _on_crew_done(self, output: str | None) -> None: + self._unsubscribe() + with self._lock: + self._status = "completed" + self._final_output = output + self._is_streaming = False + self._streaming_text = "" + self._current_step = None + self._timeline = [] + self._elapsed_frozen = time.time() - self._start_time + self._collapse_plan_on_task_done() + for k in self._task_statuses: + if self._task_statuses[k] == "active": + self._task_statuses[k] = "done" + now = time.time() + for entry in self._log_entries: + if entry["status"] == "running": + entry["status"] = "timeout" + entry["error"] = "No result received before crew completed" + entry["duration"] = now - entry["start_time"] + try: + from crewai.events.listeners.tracing.trace_listener import ( + TraceCollectionListener, + ) + + listener: TraceCollectionListener | None = getattr( + TraceCollectionListener, "_instance", None + ) + if listener and listener.batch_manager: + bm = listener.batch_manager + self._trace_url = ( + getattr(bm, "trace_url", None) or bm.ephemeral_trace_url + ) + except Exception: # noqa: S110 + pass + try: + self.query_one("#sidebar-actions").display = True + if self._trace_url: + btn = self.query_one("#btn-traces", Button) + btn.label = "✔ Open Traces" + btn.id = "btn-traces-done" + except Exception: # noqa: S110 + pass + self._tick() + self._scroll_to_result() + self.call_later(self._focus_activity_log) + self._tick_timer.stop() + self._tick_timer = self.set_interval(1 / 2, self._tick) + + def _on_crew_failed(self, error: str) -> None: + self._unsubscribe() + with self._lock: + self._status = "failed" + self._error = error + self._is_streaming = False + self._current_step = None + self._elapsed_frozen = time.time() - self._start_time + now = time.time() + for entry in self._log_entries: + if entry["status"] == "running": + entry["status"] = "error" + entry["duration"] = now - entry["start_time"] + self._tick() + self.call_later(self._focus_activity_log) + self._tick_timer.stop() + self._tick_timer = self.set_interval(1 / 2, self._tick) + + # ── Actions ───────────────────────────────────────────── + + def action_toggle_sidebar(self) -> None: + sidebar = self.query_one("#sidebar") + sidebar.display = not sidebar.display + + def action_toggle_logs(self) -> None: + panel = self.query_one("#log-panel") + panel.display = not panel.display + + def action_log_down(self) -> None: + try: + if not self.query_one("#log-panel").display: + return + except Exception: + return + should_refresh = False + with self._lock: + if self._log_entries: + self._log_cursor = min(self._log_cursor + 1, len(self._log_entries) - 1) + self._log_scroll_needed = True + should_refresh = True + if should_refresh: + self._refresh_log_panel() + + def action_log_up(self) -> None: + try: + if not self.query_one("#log-panel").display: + return + except Exception: + return + should_refresh = False + with self._lock: + if self._log_entries: + self._log_cursor = max(self._log_cursor - 1, 0) + self._log_scroll_needed = True + should_refresh = True + if should_refresh: + self._refresh_log_panel() + + def action_log_toggle(self) -> None: + try: + if not self.query_one("#log-panel").display: + return + except Exception: + return + should_refresh = False + with self._lock: + if self._log_entries: + if self._log_cursor in self._log_expanded: + self._log_expanded.discard(self._log_cursor) + else: + self._log_expanded.add(self._log_cursor) + should_refresh = True + if should_refresh: + self._refresh_log_panel() + + async def action_quit(self) -> None: + self._unsubscribe() + self.exit(self._crew_result) + + def action_view_traces(self) -> None: + if self._status != "completed": + return + if self._trace_url: + import webbrowser + + try: + webbrowser.open(self._trace_url) + except Exception: # noqa: S110 + pass + return + self._consent_screen = TraceConsentScreen() + self.push_screen(self._consent_screen) + + def _on_trace_consent_accepted(self) -> None: + self._send_traces_worker() + + @work(thread=True) + def _send_traces_worker(self) -> None: + import webbrowser + + try: + from crewai.events.listeners.tracing.utils import ( + set_suppress_tracing_messages, + set_tui_mode, + ) + + set_tui_mode(True) + set_suppress_tracing_messages(True) + + from crewai.events.listeners.tracing.trace_listener import ( + TraceCollectionListener, + ) + from crewai.events.listeners.tracing.utils import ( + mark_first_execution_completed, + ) + + listener: TraceCollectionListener | None = getattr( + TraceCollectionListener, "_instance", None + ) + if not listener: + self.call_from_thread(self._dismiss_consent_modal) + return + + bm = listener.batch_manager + url = getattr(bm, "trace_url", None) or bm.ephemeral_trace_url + + if not url: + handler = listener.first_time_handler + handler.set_batch_manager(bm) + handler._initialize_backend_and_send_events() + url = handler.ephemeral_url or bm.ephemeral_trace_url + + if listener.first_time_handler.is_first_time: + mark_first_execution_completed(user_consented=True) + + _enable_tracing_in_dotenv() + + if url: + self._trace_url = url + + def _done() -> None: + self._dismiss_consent_modal() + try: + btn = self.query_one("#btn-traces", Button) + btn.label = "✔ Open Traces" + btn.id = "btn-traces-done" + except Exception: # noqa: S110 + pass + + self.call_from_thread(_done) + try: + webbrowser.open(url) + except Exception: # noqa: S110 + pass + else: + self.call_from_thread(self._dismiss_consent_modal) + except Exception: + self.call_from_thread(self._dismiss_consent_modal) + + def _dismiss_consent_modal(self) -> None: + try: + screen = self._consent_screen + if screen and screen.is_attached: + screen.dismiss(False) + except Exception: # noqa: S110 + pass + + def action_deploy_crew(self) -> None: + if self._status != "completed": + return + self._want_deploy = True + self._unsubscribe() + self.exit(self._crew_result) + + def on_button_pressed(self, event: Button.Pressed) -> None: + if event.button.id in ("btn-traces", "btn-traces-done"): + self.action_view_traces() + elif event.button.id == "btn-deploy": + self.action_deploy_crew() + + def _scroll_to_result(self) -> None: + try: + scroll = self.query_one("#scroll-area", VerticalScroll) + self.call_later(lambda: scroll.scroll_end(animate=False)) + except Exception: # noqa: S110 + pass + + def _focus_activity_log(self) -> None: + if not self._is_mounted: + return + log_panel = self.query_one("#log-panel", VerticalScroll) + if log_panel.display: + log_panel.focus() + + def _refresh_log_panel(self) -> None: + if not self._is_mounted: + return + with self._lock: + if self.query_one("#log-panel").display: + self._render_log_panel() + + def on_click(self, event: Any) -> None: + try: + widget = self.query_one("#log-content", Static) + except Exception: + return + if not widget.region.contains(event.screen_x, event.screen_y): + return + scroll = self.query_one("#log-panel", VerticalScroll) + clicked_line = event.screen_y - widget.region.y + int(scroll.scroll_y) + with self._lock: + for idx, start, end in self._log_line_map: + if start <= clicked_line < end: + self._log_cursor = idx + if idx in self._log_expanded: + self._log_expanded.discard(idx) + else: + self._log_expanded.add(idx) + break + self._refresh_log_panel() + + # ── Tick (8 fps) ──────────────────────────────────────── + + def _tick(self) -> None: + self._frame += 1 + elapsed = getattr(self, "_elapsed_frozen", None) or ( + time.time() - self._start_time + ) + mins, secs = divmod(int(elapsed), 60) + self.sub_title = f"{mins}:{secs:02d}" + + try: + with self._lock: + self._render_sidebar() + self._render_task_header() + self._render_main_content() + if self.query_one("#log-panel").display: + self._render_log_panel() + except NoMatches: + return + + def _spinner(self) -> str: + return _SPINNER[self._frame % len(_SPINNER)] + + # ── Sidebar rendering ─────────────────────────────────── + + def _render_sidebar(self) -> None: + widget = self.query_one("#sidebar-content", Static) + t = Text() + sidebar_width = 30 + + t.append(" TASKS\n", style=f"bold {_C_PRIMARY}") + t.append("\n") + + for i in range(1, self._total_tasks + 1): + status = self._task_statuses.get(i, "pending") + name = ( + self._task_names[i - 1] if i <= len(self._task_names) else f"Task {i}" + ) + max_name = sidebar_width - 6 + if len(name) > max_name: + name = name[: max_name - 1] + "…" + + if status == "done": + t.append(" ✔ ", style=_C_GREEN) + t.append(f"{name}\n", style=_C_DIM) + elif status == "active": + t.append(f" {self._spinner()} ", style=_C_PRIMARY) + t.append(f"{name}\n", style=f"bold {_C_TEXT}") + elif status == "failed": + t.append(" ✘ ", style=_C_RED) + t.append(f"{name}\n", style=_C_RED) + else: + t.append(" ○ ", style=_C_DIM) + t.append(f"{name}\n", style=_C_DIM) + + t.append("\n") + t.append(" AGENTS\n", style=f"bold {_C_PRIMARY}") + t.append("\n") + + for name in self._agent_names: + max_name = sidebar_width - 6 + disp = name[: max_name - 1] + "…" if len(name) > max_name else name + if name == self._current_agent: + t.append(f" ● {disp}\n", style=f"bold {_C_PRIMARY}") + else: + t.append(f" {disp}\n", style=_C_DIM) + + t.append("\n") + t.append(" TOKENS\n", style=f"bold {_C_PRIMARY}") + t.append("\n") + + out = self._output_tokens + self._live_out_tokens + t.append(f" ↑ {self._input_tokens:,}\n", style=_C_DIM) + t.append(f" ↓ {out:,}\n", style=_C_DIM) + + widget.update(t) + + # ── Task header rendering ─────────────────────────────── + + def _render_task_header(self) -> None: + widget = self.query_one("#task-header", Static) + t = Text() + + if self._status == "completed": + elapsed = self._elapsed_frozen or (time.time() - self._start_time) + t.append("✔ ", style=f"bold {_C_GREEN}") + t.append(f"Completed {self._total_tasks} tasks", style=f"bold {_C_GREEN}") + t.append(f" {elapsed:.1f}s", style=_C_DIM) + + out = self._output_tokens + self._live_out_tokens + parts = [] + if self._input_tokens: + parts.append(f"↑{self._input_tokens:,}") + if out: + parts.append(f"↓{out:,}") + if parts: + t.append(f" {' '.join(parts)} tokens", style=_C_DIM) + + elif self._status == "failed": + t.append("✘ ", style=f"bold {_C_RED}") + t.append("Failed", style=f"bold {_C_RED}") + if self._error: + t.append(f"\n{self._error[:120]}", style=_C_RED) + + elif self._current_task_idx > 0: + t.append( + f"Task {self._current_task_idx}/{self._total_tasks}", + style=f"bold {_C_PRIMARY}", + ) + if self._current_task_desc: + desc = self._current_task_desc + if len(desc) > 80: + desc = desc[:79] + "…" + t.append(f" — {desc}", style=_C_TEXT) + if self._current_agent: + t.append("\nAgent: ", style=_C_DIM) + t.append(self._current_agent, style=f"bold {_C_TEXT}") + + else: + t.append(f"{self._spinner()} ", style=_C_PRIMARY) + if not self._crew: + t.append("Loading crew…", style=_C_DIM) + else: + t.append("Starting crew…", style=_C_DIM) + + widget.update(t) + + # ── Main content rendering ────────────────────────────── + + def _render_main_content(self) -> None: + widget = self.query_one("#main-content", Static) + t = Text() + should_scroll = False + + # Plan section + if self._plan and self._plan.get("steps"): + plan_title = self._plan.get("plan", "Plan") + completed = self._status == "completed" and all( + self._plan_step_status.get(step.get("step_number")) == "done" + for step in self._plan["steps"] + ) + if completed: + total = len(self._plan["steps"]) + t.append(" PLAN ", style=f"bold {_C_MUTED}") + t.append(f"✔ {total} steps completed\n\n", style=_C_MUTED) + else: + t.append(" PLAN\n", style=f"bold {_C_MUTED}") + t.append(" ▸ ", style=f"bold {_C_TEAL}") + t.append(f"{plan_title[:80]}\n", style=f"bold {_C_TEAL}") + t.append("\n") + + for step in self._plan["steps"]: + sn = step.get("step_number", 0) + desc = step.get("description", "") + short = desc[:90] + if len(desc) > 90: + short += "…" + + st = self._plan_step_status.get(sn, "pending") + if st == "done": + t.append(" ✔ ", style=_C_GREEN) + t.append(f"{sn}. {short}\n", style=_C_MUTED) + elif st == "failed": + t.append(" ✘ ", style=_C_RED) + t.append(f"{sn}. {short}\n", style=_C_RED) + elif st == "active": + t.append(f" {self._spinner()} ", style=_C_PRIMARY) + t.append(f"{sn}. {short}\n", style=_C_TEXT) + else: + t.append(" ○ ", style=_C_MUTED) + t.append(f"{sn}. {short}\n", style=_C_MUTED) + t.append("\n") + + # Current activity indicator + if self._current_step: + sty, msg, _detail = self._current_step + if sty == "yellow": + t.append(f" {self._spinner()} ", style=_C_PRIMARY) + t.append(f"{msg}\n\n", style=_C_DIM) + elif sty == "teal": + t.append(f" {self._spinner()} ", style=_C_TEAL) + t.append(f"{msg}\n\n", style=_C_TEAL) + + # Streaming output + if self._is_streaming and self._streaming_text: + text = self._filtered_streaming_text() + text = _unescape_text(text) + if text.strip(): + lines = text.rstrip().split("\n") + for line in lines[-40:]: + t.append(f" {line}\n", style=_C_TEXT) + should_scroll = True + + # Final output + if self._status == "completed" and self._final_output: + t.append("\n") + t.append(" ━━━ Result ━━━\n\n", style=f"bold {_C_TEAL}") + output = _unescape_text(self._final_output) + output = _format_json_in_text(output) + is_json = output.lstrip()[:1] in ("{", "[", '"') + for line in output.split("\n"): + t.append(" ") + if is_json: + _colorize_json_line(t, line) + else: + t.append(line, style=_C_TEXT) + t.append("\n") + + widget.update(t) + + if should_scroll: + try: + scroll = self.query_one("#scroll-area", VerticalScroll) + if ( + scroll.max_scroll_y <= 0 + or scroll.scroll_y >= scroll.max_scroll_y - 50 + ): + scroll.scroll_end(animate=False) + except Exception: # noqa: S110 + pass + + # ── Log panel rendering ────────────────────────────────── + + def _render_log_panel(self) -> None: + widget = self.query_one("#log-content", Static) + t = Text() + t.append(" ACTIVITY LOG", style=f"bold {_C_PRIMARY}") + t.append(" ↑↓ navigate enter expand/collapse\n", style=_C_MUTED) + + if not self._log_entries: + t.append("\n No activity yet.\n", style=_C_MUTED) + widget.update(t) + return + + if self._log_cursor >= len(self._log_entries): + self._log_cursor = len(self._log_entries) - 1 + + cursor_line = 0 + line_map: list[tuple[int, int, int]] = [] + now = time.time() + for i, entry in enumerate(self._log_entries): + entry_start_line = t.plain.count("\n") + name = entry["tool_name"] + status = entry["status"] + focused = i == self._log_cursor + expanded = i in self._log_expanded + if focused: + cursor_line = entry_start_line + + if status == "running" and (now - entry["start_time"]) > 120: + entry["status"] = "timeout" + entry["error"] = "No response received (timeout)" + entry["duration"] = now - entry["start_time"] + status = "timeout" + self._log_expanded.add(i) + + arrow = "▾" if expanded else "▸" + + if focused: + t.append("\n") + t.append(" > ", style=_C_PRIMARY) + else: + t.append("\n ", style="") + + if status == "running": + elapsed = now - entry["start_time"] + t.append(f"{arrow} ", style=_C_MUTED) + t.append(f"{self._spinner()} ", style=_C_PRIMARY) + t.append(f"{name}", style=f"bold {_C_TEXT}" if focused else _C_TEXT) + t.append(f" {elapsed:.0f}s\n", style=_C_MUTED) + elif status == "success": + t.append(f"{arrow} ", style=_C_MUTED) + t.append("✔ ", style=_C_GREEN) + t.append(f"{name}", style=f"bold {_C_TEXT}" if focused else _C_DIM) + if entry.get("from_cache"): + t.append(" cached\n", style=_C_TEAL) + else: + t.append(f" {entry['duration']:.1f}s\n", style=_C_MUTED) + elif status in ("error", "timeout"): + t.append(f"{arrow} ", style=_C_MUTED) + t.append("✘ ", style=_C_RED) + t.append(f"{name}", style=f"bold {_C_RED}") + dur = f" {entry['duration']:.1f}s" if entry.get("duration") else "" + t.append(f"{dur}\n", style=_C_MUTED) + + if not expanded: + continue + + indent = " " + if entry.get("args"): + t.append(f"{indent}Args:\n", style=_C_MUTED) + try: + parsed = _json.loads(entry["args"]) + formatted = _json.dumps(parsed, indent=2, ensure_ascii=False) + except (ValueError, TypeError): + formatted = entry["args"] + _append_highlighted(t, formatted, indent) + + if status in ("error", "timeout") and entry.get("error"): + t.append(f"{indent}Error:\n", style=_C_RED) + for line in str(entry["error"]).split("\n"): + if line.strip(): + t.append(f"{indent} {line}\n", style=_C_RED) + + if status == "success" and entry.get("result"): + t.append(f"{indent}Result:\n", style=_C_TEAL) + result_text = _unescape_text(str(entry["result"])) + result_text = _format_json_in_text(result_text) + total = _append_highlighted(t, result_text, indent) + if total > 50: + t.append(f"{indent} … ({total} lines total)\n", style=_C_MUTED) + + line_map.append((i, entry_start_line, t.plain.count("\n"))) + + self._log_line_map = line_map + widget.update(t) + + if self._log_scroll_needed: + self._log_scroll_needed = False + try: + log_scroll = self.query_one("#log-panel", VerticalScroll) + panel_h = log_scroll.size.height + cursor_top = cursor_line + cursor_bottom = cursor_line + 2 + for _idx, _start, _end in self._log_line_map: + if _idx == self._log_cursor: + cursor_bottom = _end + break + visible_top = int(log_scroll.scroll_y) + visible_bottom = visible_top + panel_h + if cursor_top < visible_top + 1: + log_scroll.scroll_to(y=max(0, cursor_top - 1), animate=False) + elif cursor_bottom > visible_bottom - 1: + log_scroll.scroll_to( + y=max(0, cursor_bottom - panel_h + 1), animate=False + ) + except Exception: # noqa: S110 + pass + + def _filtered_streaming_text(self) -> str: + if not self._streaming_text: + return "" + text = self._streaming_text + + # Strip plan JSON — both complete (already parsed) and in-progress + plan_start = text.find('{"plan"') + if plan_start >= 0: + depth = 0 + for i in range(plan_start, len(text)): + if text[i] == "{": + depth += 1 + elif text[i] == "}": + depth -= 1 + if depth == 0: + text = (text[:plan_start] + text[i + 1 :]).strip() + break + else: + # Incomplete JSON — hide the partial blob + text = text[:plan_start].strip() + + text = self._strip_step_observation_json(text) + return _format_json_in_text(text) + + def _strip_step_observation_json(self, text: str) -> str: + """Hide structured step-observation JSON from the live transcript.""" + if "step_completed_successfully" not in text: + return text + + result: list[str] = [] + decoder = _json.JSONDecoder() + i = 0 + while i < len(text): + start = text.find("{", i) + if start < 0: + result.append(text[i:]) + break + + result.append(text[i:start]) + try: + parsed, offset = decoder.raw_decode(text[start:]) + except ValueError: + if "step_completed_successfully" in text[start:]: + break + result.append(text[start]) + i = start + 1 + continue + + end = start + offset + if self._is_step_observation_payload(parsed): + i = end + continue + + result.append(text[start:end]) + i = end + + return "".join(result).strip() + + @staticmethod + def _is_step_observation_payload(payload: Any) -> bool: + return ( + isinstance(payload, dict) + and "step_completed_successfully" in payload + and "key_information_learned" in payload + ) + + # ── Event helpers ─────────────────────────────────────── + + def _complete_step(self, style: str, message: str, detail: str = "") -> None: + with self._lock: + if self._current_step: + prev_style, prev_msg, prev_detail = self._current_step + skip = prev_msg in ( + "Thinking…", + "Generating response…", + ) or prev_msg.startswith("⚡") + if not skip: + self._timeline.append((prev_style, prev_msg, prev_detail)) + self._current_step = (style, message, detail) + if len(self._timeline) > 20: + self._timeline = self._timeline[-20:] + + def _replace_step(self, style: str, message: str, detail: str = "") -> None: + """Replace current step in-place (no archive). Used for tool results.""" + with self._lock: + self._current_step = (style, message, detail) + + def _set_step(self, style: str, message: str) -> None: + with self._lock: + self._current_step = (style, message, "") + + # ── Plan detection ────────────────────────────────────── + + def _try_parse_plan(self, text: str) -> None: + stripped = text.strip() + start = stripped.find('{"plan"') + if start < 0: + return + depth = 0 + for i in range(start, len(stripped)): + if stripped[i] == "{": + depth += 1 + elif stripped[i] == "}": + depth -= 1 + if depth == 0: + try: + data = _json.loads(stripped[start : i + 1]) + if "steps" in data and isinstance(data["steps"], list): + self._plan = data + self._plan_step_status = { + s["step_number"]: "pending" + for s in data["steps"] + if "step_number" in s + } + self._awaiting_replan = False + except (ValueError, KeyError): + # Best-effort parse of streamed planner output: + # partial or non-plan JSON is expected and ignored. + pass + return + + def _set_plan_step_status(self, step_number: int, status: str) -> None: + """Set status for an explicit plan step reported by the planner.""" + if not self._plan or step_number not in self._plan_step_status: + return + + self._plan_step_status[step_number] = status + + def _mark_plan_goal_achieved(self, step_number: int | None = None) -> None: + """Collapse early-goal/skipped plan steps into completed UI state.""" + if not self._plan: + return + + if step_number is not None: + self._set_plan_step_status(step_number, "done") + + for sn, current in list(self._plan_step_status.items()): + if current in ("pending", "active"): + self._plan_step_status[sn] = "done" + + def _collapse_plan_on_task_done(self) -> None: + """Collapse unfinished display-only plan steps once the task succeeds.""" + if not self._plan: + return + + for sn, current in list(self._plan_step_status.items()): + if current in ("pending", "active"): + self._plan_step_status[sn] = "done" + + def _pop_task_state(self, event: Any) -> dict[str, Any]: + """Return the start-time state for a completion/failure event's task. + + Tasks can run async/overlapping, so the event's task identity is + matched against the state registered when the task started rather + than assuming the most recently started task. Falls back to the + current shared state for unmatched events. Caller must hold + ``self._lock``. + """ + task = getattr(event, "task", None) + candidates: list[str] = [] + if task is not None: + task_id = str(getattr(task, "id", "") or "") + if task_id: + candidates.append(task_id) + desc = getattr(task, "name", "") or getattr(task, "description", "") or "" + if desc: + candidates.append(desc) + event_task_name = getattr(event, "task_name", "") or "" + if event_task_name: + candidates.append(event_task_name) + for key in candidates: + state = self._task_state_by_key.pop(key, None) + if state is not None: + return state + return { + "idx": self._current_task_idx, + "desc": self._current_task_desc, + "agent": self._current_agent, + "start_time": self._task_start_time, + } + + def _prepare_for_replan(self) -> None: + """Keep current statuses visible while allowing the next plan to replace it.""" + self._awaiting_replan = True + + def _apply_plan_refinements(self, refinements: list[str] | None) -> None: + """Apply refined descriptions while leaving statuses as pending/done/failed.""" + if not self._plan or not refinements: + return + + steps = self._plan.get("steps") + if not isinstance(steps, list): + return + + steps_by_number = { + step.get("step_number"): step for step in steps if isinstance(step, dict) + } + for refinement in refinements: + match = _REFINEMENT_RE.match(refinement) + if not match: + continue + step_number = int(match.group(1)) + description = match.group(2).strip() + step = steps_by_number.get(step_number) + if step is not None and description: + step["description"] = description + + def _try_parse_step_observation(self, text: str) -> bool: + """Parse streamed observation JSON and update the exact step it names.""" + if "step_completed_successfully" not in text: + return False + + decoder = _json.JSONDecoder() + updated = False + i = 0 + while i < len(text): + start = text.find("{", i) + if start < 0: + break + try: + payload, offset = decoder.raw_decode(text[start:]) + except ValueError: + i = start + 1 + continue + + if self._is_step_observation_payload(payload): + step_number = self._observation_step_number(payload) + if step_number is not None: + status = ( + "done" + if payload.get("step_completed_successfully") is True + else "failed" + ) + self._set_plan_step_status(step_number, status) + if payload.get("goal_already_achieved") is True: + self._mark_plan_goal_achieved(step_number) + updated = True + i = start + max(offset, 1) + + return updated + + def _observation_step_number(self, payload: dict[str, Any]) -> int | None: + raw_step_number = payload.get("step_number") + if isinstance(raw_step_number, int): + return raw_step_number + + searchable = " ".join( + str(payload.get(field) or "") + for field in ("key_information_learned", "replan_reason") + ) + match = _STEP_NUMBER_RE.search(searchable) + if not match: + return None + + return int(match.group(1)) + + # ── Event subscription ────────────────────────────────── + + def _register_handler(self, event_type: type, handler: Any) -> None: + self._event_handlers.append((event_type, handler)) + + def _unsubscribe(self) -> None: + if not self._event_handlers: + return + try: + from crewai.events.event_bus import crewai_event_bus + + for event_type, handler in self._event_handlers: + crewai_event_bus.off(event_type, handler) + except Exception: # noqa: S110 + pass + self._event_handlers.clear() + + def _subscribe(self) -> None: + from crewai.events.event_bus import crewai_event_bus + from crewai.events.types.crew_events import CrewKickoffStartedEvent + from crewai.events.types.llm_events import ( + LLMCallCompletedEvent, + LLMCallStartedEvent, + LLMStreamChunkEvent, + ) + from crewai.events.types.logging_events import ( + AgentLogsExecutionEvent, + AgentLogsStartedEvent, + ) + from crewai.events.types.observation_events import ( + GoalAchievedEarlyEvent, + PlanRefinementEvent, + PlanReplanTriggeredEvent, + PlanStepCompletedEvent, + PlanStepStartedEvent, + StepObservationCompletedEvent, + StepObservationFailedEvent, + StepObservationStartedEvent, + ) + from crewai.events.types.task_events import ( + TaskCompletedEvent, + TaskFailedEvent, + TaskStartedEvent, + ) + from crewai.events.types.tool_usage_events import ( + ToolUsageErrorEvent, + ToolUsageFinishedEvent, + ToolUsageStartedEvent, + ) + + @crewai_event_bus.on(CrewKickoffStartedEvent) + def on_crew_started(source: Any, event: CrewKickoffStartedEvent) -> None: + with self._lock: + if event.crew_name: + self._crew_name = event.crew_name + self.title = f"CrewAI — {event.crew_name}" + self._status = "working" + + self._register_handler(CrewKickoffStartedEvent, on_crew_started) + + @crewai_event_bus.on(TaskStartedEvent) + def on_task_started(source: Any, event: TaskStartedEvent) -> None: + with self._lock: + self._current_task_idx += 1 + idx = self._current_task_idx + self._task_start_time = time.time() + self._streaming_text = "" + self._task_full_output = "" + self._is_streaming = False + self._plan = None + self._plan_step_status = {} + self._awaiting_replan = False + + # Tasks may run async/overlapping, so earlier active rows are + # only marked done by their own completion events (with a + # final sweep in _on_crew_done). + if idx in self._task_statuses: + self._task_statuses[idx] = "active" + + desc = "" + if event.task: + desc = getattr(event.task, "name", "") or "" + if not desc: + desc = getattr(event.task, "description", "") or "" + if not desc and event.task_name: + desc = event.task_name + self._current_task_desc = desc + + agent = getattr(source, "agent", None) if source else None + agent_role = (getattr(agent, "role", "") or "") if agent else "" + if agent_role: + self._current_agent = agent_role + + key = str(getattr(event.task, "id", "") or "") or desc + if key: + self._task_state_by_key[key] = { + "idx": idx, + "desc": desc, + "agent": agent_role, + "start_time": self._task_start_time, + } + + self._timeline = [] + self._current_step = None + self._set_step("yellow", "Thinking…") + + self._register_handler(TaskStartedEvent, on_task_started) + + @crewai_event_bus.on(AgentLogsStartedEvent) + def on_agent_started(source: Any, event: AgentLogsStartedEvent) -> None: + with self._lock: + role = event.agent_role.split("\n")[0] if event.agent_role else "" + if role: + self._current_agent = role + + self._register_handler(AgentLogsStartedEvent, on_agent_started) + + @crewai_event_bus.on(LLMCallStartedEvent) + def on_llm_started(source: Any, event: LLMCallStartedEvent) -> None: + with self._lock: + self._is_streaming = False + self._streaming_text = "" + self._live_out_tokens = 0 + self._current_llm_text = "" + if event.messages: + estimate = len(str(event.messages)) // 4 + self._input_tokens += estimate + self._pending_input_estimate = estimate + self._complete_step("yellow", "Thinking…") + + self._register_handler(LLMCallStartedEvent, on_llm_started) + + @crewai_event_bus.on(LLMCallCompletedEvent) + def on_llm_completed(source: Any, event: LLMCallCompletedEvent) -> None: + with self._lock: + self._llm_calls += 1 + self._is_streaming = False + self._streaming_text = "" + self._live_out_tokens = 0 + self._input_tokens -= self._pending_input_estimate + self._pending_input_estimate = 0 + if event.usage: + u = event.usage + inp = next( + ( + u[k] + for k in ( + "prompt_tokens", + "input_tokens", + "prompt_token_count", + ) + if u.get(k) + ), + 0, + ) + out = next( + ( + u[k] + for k in ( + "completion_tokens", + "output_tokens", + "candidates_token_count", + ) + if u.get(k) + ), + 0, + ) + self._input_tokens += inp + self._output_tokens += out + if self._current_llm_text.strip(): + self._current_task_steps.append( + { + "type": "llm", + "summary": f"LLM response (call {self._llm_calls})", + "detail": self._current_llm_text.strip(), + "style": "dim", + } + ) + self._current_llm_text = "" + + self._register_handler(LLMCallCompletedEvent, on_llm_completed) + + @crewai_event_bus.on(LLMStreamChunkEvent) + def on_stream_chunk(source: Any, event: LLMStreamChunkEvent) -> None: + with self._lock: + if not self._is_streaming: + self._current_step = ("yellow", "Generating response…", "") + self._is_streaming = True + self._streaming_text += event.chunk + self._task_full_output += event.chunk + self._current_llm_text += event.chunk + self._live_out_tokens += 1 + if ( + not self._plan or self._awaiting_replan + ) and '{"plan"' in self._streaming_text: + self._try_parse_plan(self._streaming_text) + if self._plan and "step_completed_successfully" in self._streaming_text: + self._try_parse_step_observation(self._streaming_text) + + self._register_handler(LLMStreamChunkEvent, on_stream_chunk) + + @crewai_event_bus.on(StepObservationStartedEvent) + def on_step_observation_started( + source: Any, event: StepObservationStartedEvent + ) -> None: + with self._lock: + self._set_plan_step_status(event.step_number, "active") + + self._register_handler(StepObservationStartedEvent, on_step_observation_started) + + @crewai_event_bus.on(StepObservationCompletedEvent) + def on_step_observation_completed( + source: Any, event: StepObservationCompletedEvent + ) -> None: + with self._lock: + status = "done" if event.step_completed_successfully else "failed" + self._set_plan_step_status(event.step_number, status) + + self._register_handler( + StepObservationCompletedEvent, on_step_observation_completed + ) + + @crewai_event_bus.on(StepObservationFailedEvent) + def on_step_observation_failed( + source: Any, event: StepObservationFailedEvent + ) -> None: + with self._lock: + # Intentionally "done", not "failed": this event means the + # step OBSERVER failed (e.g. timeout), not the step itself, + # and the executor continues past it. A red ✘ would wrongly + # suggest the plan step failed. + self._set_plan_step_status(event.step_number, "done") + + self._register_handler(StepObservationFailedEvent, on_step_observation_failed) + + @crewai_event_bus.on(PlanRefinementEvent) + def on_plan_refinement(source: Any, event: PlanRefinementEvent) -> None: + with self._lock: + if event.step_number: + self._set_plan_step_status(event.step_number, "done") + self._apply_plan_refinements(event.refinements) + + self._register_handler(PlanRefinementEvent, on_plan_refinement) + + @crewai_event_bus.on(PlanStepStartedEvent) + def on_plan_step_started(source: Any, event: PlanStepStartedEvent) -> None: + with self._lock: + self._set_plan_step_status(event.step_number, "active") + + self._register_handler(PlanStepStartedEvent, on_plan_step_started) + + @crewai_event_bus.on(PlanStepCompletedEvent) + def on_plan_step_completed(source: Any, event: PlanStepCompletedEvent) -> None: + with self._lock: + self._set_plan_step_status( + event.step_number, + "done" if event.success else "failed", + ) + + self._register_handler(PlanStepCompletedEvent, on_plan_step_completed) + + @crewai_event_bus.on(PlanReplanTriggeredEvent) + def on_plan_replan_triggered( + source: Any, event: PlanReplanTriggeredEvent + ) -> None: + with self._lock: + self._prepare_for_replan() + self._current_step = ("yellow", "Replanning…", event.replan_reason) + + self._register_handler(PlanReplanTriggeredEvent, on_plan_replan_triggered) + + @crewai_event_bus.on(GoalAchievedEarlyEvent) + def on_goal_achieved_early(source: Any, event: GoalAchievedEarlyEvent) -> None: + with self._lock: + self._mark_plan_goal_achieved(event.step_number or None) + + self._register_handler(GoalAchievedEarlyEvent, on_goal_achieved_early) + + @crewai_event_bus.on(ToolUsageStartedEvent) + def on_tool_started(source: Any, event: ToolUsageStartedEvent) -> None: + if event.tool_name in _INTERNAL_TOOL_NAMES: + return + + with self._lock: + self._is_streaming = False + self._streaming_text = "" + now = time.time() + args_str = "" + if event.tool_args: + try: + args_str = _json.dumps(event.tool_args, indent=2, default=str) + except Exception: + args_str = str(event.tool_args) + for entry in self._log_entries: + if ( + entry["status"] == "running" + and entry["tool_name"] == event.tool_name + and entry["args"] == (args_str or None) + ): + return + for entry in self._log_entries: + if ( + entry["status"] == "running" + and entry["tool_name"] != event.tool_name + ): + entry["status"] = "timeout" + entry["error"] = ( + "No result received before the next tool started" + ) + entry["duration"] = now - entry["start_time"] + plan_step_number = getattr(event, "plan_step_number", None) + if not isinstance(plan_step_number, int): + plan_step_number = None + self._current_task_steps.append( + { + "type": "tool", + "summary": f"Using {event.tool_name}…", + "detail": f"Args:\n{args_str}" if args_str else None, + "style": "yellow", + "_tool_name": event.tool_name, + } + ) + self._log_entries.append( + { + "tool_name": event.tool_name, + "status": "running", + "args": args_str or None, + "result": None, + "error": None, + "start_time": time.time(), + "duration": None, + "task_idx": self._current_task_idx, + "plan_step_number": plan_step_number, + } + ) + self._complete_step("teal", f"⚡ {event.tool_name}…") + + self._register_handler(ToolUsageStartedEvent, on_tool_started) + + @crewai_event_bus.on(ToolUsageFinishedEvent) + def on_tool_finished(source: Any, event: ToolUsageFinishedEvent) -> None: + if event.tool_name in _INTERNAL_TOOL_NAMES: + return + + with self._lock: + if event.output is not None: + out = event.output + if isinstance(out, (dict, list)): + try: + result_str = _json.dumps(out, indent=2, ensure_ascii=False)[ + :5000 + ] + except (TypeError, ValueError): + result_str = str(out)[:5000] + else: + result_str = str(out)[:5000] + else: + result_str = "No output" + for step in reversed(self._current_task_steps): + if ( + step.get("_tool_name") == event.tool_name + and step["type"] == "tool" + ): + existing = step.get("detail") or "" + step["detail"] = ( + f"{existing}\n\nResult:\n{result_str}" + if existing + else f"Result:\n{result_str}" + ) + step["summary"] = f"✔ {event.tool_name}" + step["style"] = "green" + break + from_cache = getattr(event, "from_cache", False) + for entry in reversed(self._log_entries): + if entry["tool_name"] == event.tool_name and ( + entry["status"] == "running" + or (entry["status"] == "success" and entry["result"] is None) + ): + entry["status"] = "success" + entry["result"] = result_str + entry["duration"] = time.time() - entry["start_time"] + entry["from_cache"] = from_cache + break + self._replace_step("green", f"✔ {event.tool_name}") + + self._register_handler(ToolUsageFinishedEvent, on_tool_finished) + + @crewai_event_bus.on(ToolUsageErrorEvent) + def on_tool_error(source: Any, event: ToolUsageErrorEvent) -> None: + if event.tool_name in _INTERNAL_TOOL_NAMES: + return + + error_text = str(event.error)[:200] if event.error else "" + with self._lock: + for step in reversed(self._current_task_steps): + if ( + step.get("_tool_name") == event.tool_name + and step["type"] == "tool" + ): + existing = step.get("detail") or "" + step["detail"] = ( + f"{existing}\n\nError:\n{event.error}" + if existing + else f"Error:\n{event.error}" + ) + step["summary"] = f"✘ {event.tool_name}" + step["style"] = "red" + break + for idx, entry in reversed(list(enumerate(self._log_entries))): + if entry["tool_name"] == event.tool_name and ( + entry["status"] == "running" + or (entry["status"] == "success" and entry["result"] is None) + ): + entry["status"] = "error" + entry["error"] = str(event.error) if event.error else None + entry["duration"] = time.time() - entry["start_time"] + self._log_expanded.add(idx) + break + self._replace_step("red", f"✘ {event.tool_name}", error_text) + + self._register_handler(ToolUsageErrorEvent, on_tool_error) + + from crewai.events.types.memory_events import ( + MemoryRetrievalCompletedEvent, + MemoryRetrievalFailedEvent, + MemoryRetrievalStartedEvent, + ) + + @crewai_event_bus.on(MemoryRetrievalStartedEvent) + def on_memory_retrieval_started( + source: Any, event: MemoryRetrievalStartedEvent + ) -> None: + with self._lock: + self._log_entries.append( + { + "tool_name": "memory_recall", + "status": "running", + "args": None, + "result": None, + "error": None, + "start_time": time.time(), + "duration": None, + "task_idx": self._current_task_idx, + } + ) + + self._register_handler(MemoryRetrievalStartedEvent, on_memory_retrieval_started) + + @crewai_event_bus.on(MemoryRetrievalCompletedEvent) + def on_memory_retrieval_completed( + source: Any, event: MemoryRetrievalCompletedEvent + ) -> None: + with self._lock: + for entry in reversed(self._log_entries): + if ( + entry["tool_name"] == "memory_recall" + and entry["status"] == "running" + ): + entry["status"] = "success" + entry["duration"] = event.retrieval_time_ms / 1000 + content = event.memory_content or "" + if content: + entry["result"] = content[:3000] + break + + self._register_handler( + MemoryRetrievalCompletedEvent, on_memory_retrieval_completed + ) + + @crewai_event_bus.on(MemoryRetrievalFailedEvent) + def on_memory_retrieval_failed( + source: Any, event: MemoryRetrievalFailedEvent + ) -> None: + with self._lock: + for idx, entry in enumerate(self._log_entries): + if ( + entry["tool_name"] == "memory_recall" + and entry["status"] == "running" + ): + entry["status"] = "error" + entry["error"] = event.error + entry["duration"] = 0 + self._log_expanded.add(idx) + break + + self._register_handler(MemoryRetrievalFailedEvent, on_memory_retrieval_failed) + + @crewai_event_bus.on(AgentLogsExecutionEvent) + def on_agent_execution(source: Any, event: AgentLogsExecutionEvent) -> None: + from crewai.agents.parser import AgentAction, AgentFinish + + if isinstance(event.formatted_answer, AgentAction): + self._complete_step("cyan", f"→ {event.formatted_answer.tool}") + elif isinstance(event.formatted_answer, AgentFinish): + self._complete_step("green", "✔ Agent finished") + + self._register_handler(AgentLogsExecutionEvent, on_agent_execution) + + @crewai_event_bus.on(TaskCompletedEvent) + def on_task_completed(source: Any, event: TaskCompletedEvent) -> None: + now = time.time() + with self._lock: + state = self._pop_task_state(event) + idx = state["idx"] + self._task_statuses[idx] = "done" + elapsed = now - state["start_time"] + # The shared stream fields (steps, timeline, streamed output) + # belong to the most recently started task. Only consume and + # reset them when that is the task completing — an earlier + # task finishing out of order must not steal or clear the + # current task's stream. + is_current = idx == self._current_task_idx + output = getattr(event.output, "raw", "") or "" + + if is_current: + self._collapse_plan_on_task_done() + + if self._current_llm_text.strip(): + self._current_task_steps.append( + { + "type": "llm", + "summary": "Final response", + "detail": self._current_llm_text.strip(), + "style": "green", + } + ) + self._current_llm_text = "" + + steps = list(self._current_task_steps) + self._current_task_steps = [] + timeline = list(self._timeline) + output = self._task_full_output or output + + self._is_streaming = False + self._streaming_text = "" + self._task_full_output = "" + self._timeline = [] + self._current_step = None + else: + steps = [] + timeline = [] + + self._task_logs.append( + { + "idx": idx, + "desc": state["desc"] or "Task", + "agent": state["agent"], + "elapsed": elapsed, + "timeline": timeline, + "steps": steps, + "output": output, + } + ) + + self._register_handler(TaskCompletedEvent, on_task_completed) + + @crewai_event_bus.on(TaskFailedEvent) + def on_task_failed(source: Any, event: TaskFailedEvent) -> None: + now = time.time() + with self._lock: + state = self._pop_task_state(event) + idx = state["idx"] + self._task_statuses[idx] = "failed" + is_current = idx == self._current_task_idx + + error_step = { + "type": "error", + "summary": f"✘ Failed: {event.error[:100]}", + "detail": event.error, + "style": "red", + } + if is_current: + self._current_task_steps.append(error_step) + steps = list(self._current_task_steps) + self._current_task_steps = [] + timeline = list(self._timeline) + output = self._task_full_output + else: + steps = [error_step] + timeline = [] + output = "" + + self._task_logs.append( + { + "idx": idx, + "desc": state["desc"] or "Task", + "agent": state["agent"], + "elapsed": now - state["start_time"], + "timeline": timeline, + "steps": steps, + "output": output, + "error": event.error, + } + ) + self._complete_step( + "red", f"✘ Failed: {event.error[:50]}", event.error[:200] + ) + + self._register_handler(TaskFailedEvent, on_task_failed) diff --git a/lib/cli/src/crewai_cli/deploy/main.py b/lib/cli/src/crewai_cli/deploy/main.py index 2e44f87a6..2d3e737b9 100644 --- a/lib/cli/src/crewai_cli/deploy/main.py +++ b/lib/cli/src/crewai_cli/deploy/main.py @@ -34,6 +34,39 @@ def _run_predeploy_validation(skip_validate: bool) -> bool: return True +def _display_git_repository_help() -> None: + """Explain how to prepare a new project for deployment.""" + console.print( + "Deployment requires a Git repository with an origin remote.", + style="bold red", + ) + console.print( + "CrewAI AMP deploys from the remote repository URL, so commit and push " + "this project first, then run deploy again.", + style="yellow", + ) + console.print("\nSuggested setup:") + console.print(" git init") + console.print(" git add .") + console.print(' git commit -m "Initial crew"') + console.print(" git branch -M main") + console.print(" git remote add origin ") + console.print(" git push -u origin main") + + +def _display_git_remote_help() -> None: + """Explain how to add a remote to an existing Git repository.""" + console.print("No remote repository URL found.", style="bold red") + console.print( + "CrewAI AMP deploys from the origin remote. Add a remote, push your " + "latest commit, then run deploy again.", + style="yellow", + ) + console.print("\nSuggested setup:") + console.print(" git remote add origin ") + console.print(" git push -u origin HEAD") + + class DeployCommand(BaseCommand, PlusAPIMixin): """ A class to handle deployment-related operations for CrewAI projects. @@ -124,14 +157,11 @@ class DeployCommand(BaseCommand, PlusAPIMixin): try: remote_repo_url = git.Repository().origin_url() except ValueError: - remote_repo_url = None + _display_git_repository_help() + return if remote_repo_url is None: - console.print("No remote repository URL found.", style="bold red") - console.print( - "Please ensure your project has a valid remote repository.", - style="yellow", - ) + _display_git_remote_help() return self._confirm_input(env_vars, remote_repo_url, confirm) diff --git a/lib/cli/src/crewai_cli/deploy/validate.py b/lib/cli/src/crewai_cli/deploy/validate.py index 5316879aa..544ff20cf 100644 --- a/lib/cli/src/crewai_cli/deploy/validate.py +++ b/lib/cli/src/crewai_cli/deploy/validate.py @@ -38,6 +38,12 @@ import subprocess import sys from typing import Any +from crewai.project.json_loader import ( + JSONProjectValidationError, + find_crew_json_file, + find_json_project_file, + validate_crew_project, +) from rich.console import Console from crewai_cli.utils import parse_toml @@ -151,9 +157,33 @@ class DeployValidator: def ok(self) -> bool: return not self.errors + @property + def _is_json_crew(self) -> bool: + """True for JSON crew projects, deferring to the declared type. + + A flow project that also contains a crew.json(c) file validates as + the flow it declares in pyproject.toml, not as a JSON crew. + """ + if find_crew_json_file(self.project_root) is None: + return False + pyproject_path = self.project_root / "pyproject.toml" + if not pyproject_path.exists(): + return True + try: + data = parse_toml(pyproject_path.read_text()) + except Exception: + return True + declared_type: str | None = ( + (data.get("tool") or {}).get("crewai", {}).get("type") + ) + return declared_type != "flow" + def run(self) -> list[ValidationResult]: """Run all checks. Later checks are skipped when earlier ones make them impossible (e.g. no pyproject.toml → no lockfile check).""" + if self._is_json_crew: + return self._run_json_checks() + if not self._check_pyproject(): return self.results @@ -176,6 +206,110 @@ class DeployValidator: return self.results + def _run_json_checks(self) -> list[ValidationResult]: + """Validation suite for JSON-defined crew projects.""" + crew_path = find_crew_json_file(self.project_root) + if crew_path is None: + return self.results + + try: + project = validate_crew_project(crew_path, self.project_root / "agents") + except JSONProjectValidationError as e: + self._add( + Severity.ERROR, + "invalid_crew_json", + f"{crew_path.name} has invalid JSON crew configuration", + detail="\n".join(e.errors), + hint="Fix the JSON crew, agent, and task references before deploying.", + ) + return self.results + except Exception as e: + self._add( + Severity.ERROR, + "invalid_crew_json", + f"Cannot parse {crew_path.name}", + detail=str(e), + ) + return self.results + + agents_dir = self.project_root / "agents" + + self._check_pyproject() + self._check_lockfile() + self._check_env_vars_json(crew_path, agents_dir, project.agent_names) + self._check_version_vs_lockfile() + + return self.results + + def _check_env_vars_json( + self, crew_path: Path, agents_dir: Path, agent_names: list[str] + ) -> None: + """Check for env var references in JSON crew files.""" + referenced: set[str] = set() + pattern = re.compile(r"\$\{?([A-Z][A-Z0-9_]+)\}?") + + try: + referenced.update(pattern.findall(crew_path.read_text(errors="ignore"))) + except OSError as exc: + logger.debug("Skipping unreadable crew file %s: %s", crew_path, exc) + + for name in agent_names: + agent_path = find_json_project_file(agents_dir, name) + if agent_path is None: + continue + try: + referenced.update( + pattern.findall(agent_path.read_text(errors="ignore")) + ) + except OSError as exc: + logger.debug("Skipping unreadable agent file %s: %s", agent_path, exc) + + for py_path in self.project_root.rglob("*.py"): + if ".venv" in py_path.parts: + continue + try: + text = py_path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + env_pattern = re.compile( + r"""(?x) + (?:os\.environ\s*(?:\[\s*|\.get\s*\(\s*) + |os\.getenv\s*\(\s* + |getenv\s*\(\s*) + ['"]([A-Z][A-Z0-9_]*)['"] + """ + ) + referenced.update(env_pattern.findall(text)) + + env_file = self.project_root / ".env" + env_keys: set[str] = set() + if env_file.exists(): + for line in env_file.read_text(errors="ignore").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + env_keys.add(line.split("=", 1)[0].strip()) + + missing_known = sorted( + var + for var in referenced + if var in _KNOWN_API_KEY_HINTS + and var not in env_keys + and var not in os.environ + ) + if missing_known: + self._add( + Severity.WARNING, + "env_vars_not_in_dotenv", + f"{len(missing_known)} referenced API key(s) not in .env", + detail=( + "These env vars are referenced in your project but not set " + f"locally: {', '.join(missing_known)}. Deploys will fail " + "unless they are added to the deployment's Environment " + "Variables in the CrewAI dashboard." + ), + ) + def _check_pyproject(self) -> bool: pyproject_path = self.project_root / "pyproject.toml" if not pyproject_path.exists(): diff --git a/lib/cli/src/crewai_cli/git.py b/lib/cli/src/crewai_cli/git.py index b43e4d3cb..297ab533a 100644 --- a/lib/cli/src/crewai_cli/git.py +++ b/lib/cli/src/crewai_cli/git.py @@ -48,6 +48,7 @@ class Repository: ["git", "rev-parse", "--is-inside-work-tree"], # noqa: S607 cwd=self.path, encoding="utf-8", + stderr=subprocess.DEVNULL, ) return True except subprocess.CalledProcessError: diff --git a/lib/cli/src/crewai_cli/run_crew.py b/lib/cli/src/crewai_cli/run_crew.py index 198f3e5bb..df51a9aa2 100644 --- a/lib/cli/src/crewai_cli/run_crew.py +++ b/lib/cli/src/crewai_cli/run_crew.py @@ -1,25 +1,311 @@ +from __future__ import annotations + +from contextlib import AbstractContextManager, nullcontext from enum import Enum +import os +from pathlib import Path +import re import subprocess +import sys +from typing import TYPE_CHECKING, Any import click +from crewai.project.json_loader import find_crew_json_file from crewai_core.constants import CREWAI_TRAINED_AGENTS_FILE_ENV from packaging import version -from crewai_cli.utils import build_env_with_all_tool_credentials, read_toml +from crewai_cli.utils import ( + build_env_with_all_tool_credentials, + enable_prompt_line_editing, + read_toml, +) from crewai_cli.version import get_crewai_version +if TYPE_CHECKING: + from crewai_cli.crew_run_tui import CrewRunApp + + class CrewType(Enum): STANDARD = "standard" FLOW = "flow" -def run_crew(trained_agents_file: str | None = None) -> None: - """Run the crew or flow by running a command in the UV environment. +# Must accept the same names as the kickoff interpolation pattern in +# crewai.utilities.string_utils (_VARIABLE_PATTERN), including hyphens — +# otherwise placeholders are interpolated at runtime but never prompted for. +_INPUT_PLACEHOLDER_RE = re.compile(r"(? bool: + """Check if this is a JSON-defined crew project. + + The project type declared in pyproject.toml wins: a flow project that + happens to contain a crew.json(c) file still runs as a flow. A missing + or unreadable pyproject means a bare JSON crew project. + """ + if find_crew_json_file() is None: + return False + try: + pyproject_data = read_toml() + except Exception: + return True + declared_type: str | None = ( + pyproject_data.get("tool", {}).get("crewai", {}).get("type") + ) + return declared_type != "flow" + + +def _extract_input_placeholders(text: str | None) -> set[str]: + if not text: + return set() + return set(_INPUT_PLACEHOLDER_RE.findall(text)) + + +def _missing_input_names(crew: Any, inputs: dict[str, Any]) -> list[str]: + """Return input placeholders used by a crew but not provided as defaults.""" + placeholders: set[str] = set() + + for agent in getattr(crew, "agents", []) or []: + placeholders.update(_extract_input_placeholders(getattr(agent, "role", None))) + placeholders.update(_extract_input_placeholders(getattr(agent, "goal", None))) + placeholders.update( + _extract_input_placeholders(getattr(agent, "backstory", None)) + ) + + for task in getattr(crew, "tasks", []) or []: + placeholders.update( + _extract_input_placeholders(getattr(task, "description", None)) + ) + placeholders.update( + _extract_input_placeholders(getattr(task, "expected_output", None)) + ) + placeholders.update( + _extract_input_placeholders(getattr(task, "output_file", None)) + ) + + return sorted(name for name in placeholders if name not in inputs) + + +def _prompt_for_missing_inputs( + crew: Any, default_inputs: dict[str, Any] +) -> dict[str, Any]: + """Ask for runtime values for placeholders that lack default inputs.""" + inputs = dict(default_inputs or {}) + missing = _missing_input_names(crew, inputs) + if not missing: + return inputs + + enable_prompt_line_editing() + + click.echo() + click.secho(" Runtime inputs", fg="cyan", bold=True) + click.secho( + " Values for {placeholder} references in your agents and tasks.", + dim=True, + ) + + for name in missing: + inputs[name] = click.prompt( + click.style(f" {name}", fg="cyan"), + prompt_suffix=click.style(" > ", fg="bright_white"), + ) + + return inputs + + +def _json_loading_status(message: str) -> AbstractContextManager[Any]: + from rich.console import Console + from rich.text import Text + + console = Console() + if not console.is_terminal: + return nullcontext() + return console.status( + Text(f" {message}", style="bold #1F7982"), + spinner="dots", + ) + + +def _load_json_crew(crew_path: Path) -> tuple[Any, dict[str, Any]]: + from crewai.project.crew_loader import load_crew + + return load_crew(crew_path) + + +def _load_json_crew_for_tui( + crew_path: Path, +) -> tuple[type[Any], Any, dict[str, Any], list[str], list[str]]: + with _json_loading_status("Preparing crew..."): + from crewai_cli.crew_run_tui import CrewRunApp + + crew, default_inputs = _load_json_crew(crew_path) + _prepare_json_crew_for_tui(crew) + task_names = [ + getattr(task, "name", "") or getattr(task, "description", "")[:40] or "Task" + for task in crew.tasks + ] + agent_names = [ + getattr(agent, "role", "") or getattr(agent, "name", "") or "Agent" + for agent in crew.agents + ] + + return CrewRunApp, crew, default_inputs, task_names, agent_names + + +def _prepare_json_crew_for_tui(crew: Any) -> None: + """Apply the same quiet/streaming setup used by the TUI JSON loader.""" + crew.verbose = False + for agent in crew.agents: + agent.verbose = False + if hasattr(agent, "llm") and hasattr(agent.llm, "stream"): + agent.llm.stream = True + + +def _run_json_crew(trained_agents_file: str | None = None) -> Any: + """Load and run a JSON-defined crew.""" + from dotenv import load_dotenv + + env_file = Path.cwd() / ".env" + if env_file.exists(): + load_dotenv(env_file, override=True) + + # JSON crews run in-process, so export the trained-agents file directly + # instead of forwarding it to a subprocess like classic crews do. + if trained_agents_file: + os.environ[CREWAI_TRAINED_AGENTS_FILE_ENV] = trained_agents_file + + crew_path = find_crew_json_file() + if crew_path is None: + raise FileNotFoundError("No crew.jsonc or crew.json found") + + crew_run_app_cls, crew, default_inputs, task_names, agent_names = ( + _load_json_crew_for_tui(crew_path) + ) + runtime_inputs = _prompt_for_missing_inputs(crew, default_inputs) + + app = crew_run_app_cls( + crew_name=crew.name or "Crew", + total_tasks=len(crew.tasks), + agent_names=agent_names, + task_names=task_names, + ) + app._crew = crew + app._default_inputs = runtime_inputs + + app.run() + + _print_post_tui_summary(app) + + if app._status == "failed": + # Mirror the classic subprocess path: a failed crew must produce a + # non-zero exit code so scripts and CI don't treat it as success. + raise SystemExit(1) + + if app._status not in ("completed", "failed"): + # User quit mid-run. kickoff runs in a thread worker that cannot be + # force-cancelled, so end the process to stop in-flight LLM and tool + # work instead of letting it burn tokens in the background. + click.secho("\n Run cancelled.", fg="yellow") + sys.stdout.flush() + os._exit(130) + + if getattr(app, "_want_deploy", False): + _chain_deploy() + + return app._crew_result + + +def _chain_deploy() -> None: + from rich.console import Console + + console = Console() + try: + from crewai_cli.deploy.main import DeployCommand + + console.print("\nStarting deployment…\n", style="bold #FF5A50") + DeployCommand().create_crew(confirm=False, skip_validate=True) + except SystemExit: + from crewai_cli.authentication.main import AuthenticationCommand + + console.print() + AuthenticationCommand().login() + try: + DeployCommand().create_crew(confirm=False, skip_validate=True) + except Exception as e: + console.print(f"\nDeploy failed: {e}\n", style="bold red") + except Exception as e: + console.print(f"\nDeploy failed: {e}\n", style="bold red") + + +def _print_post_tui_summary(app: CrewRunApp) -> None: + """Print a summary to the terminal after the Textual TUI exits.""" + import time + + from rich.console import Console + from rich.markdown import Markdown + from rich.padding import Padding + from rich.panel import Panel + from rich.text import Text + + console = Console() + elapsed = time.time() - app._start_time + + out_tokens = app._output_tokens + app._live_out_tokens + token_parts = [] + if app._input_tokens: + token_parts.append(f"↑{app._input_tokens:,}") + if out_tokens: + token_parts.append(f"↓{out_tokens:,}") + token_str = " ".join(token_parts) + if token_str: + token_str += " tokens" + + crewai_red = "#FF5A50" + crewai_teal = "#1F7982" + + if app._status == "completed": + summary = Text() + summary.append( + f" ✔ Completed {app._total_tasks} tasks", + style=f"bold {crewai_teal}", + ) + summary.append(f" in {elapsed:.1f}s", style="dim") + if token_str: + summary.append(f" {token_str}", style="dim") + console.print( + Panel( + summary, + title=f" {app._crew_name} ", + title_align="left", + border_style=crewai_teal, + padding=(0, 1), + ) + ) + if app._final_output: + console.print() + console.print(Text(" Final Result", style=f"bold {crewai_teal}")) + console.print() + console.print(Padding(Markdown(app._final_output), (0, 2))) + elif app._status == "failed": + content = Text() + content.append(" ✘ Failed", style=f"bold {crewai_red}") + content.append(f" after {elapsed:.1f}s\n", style="dim") + if app._error: + content.append(f"\n {app._error}\n", style=crewai_red) + console.print( + Panel( + content, + title=f" {app._crew_name} ", + title_align="left", + border_style=crewai_red, + padding=(0, 1), + ) + ) + + +def run_crew(trained_agents_file: str | None = None) -> None: + """Run the crew or flow. Args: trained_agents_file: Optional path to a trained-agents pickle produced @@ -27,6 +313,11 @@ def run_crew(trained_agents_file: str | None = None) -> None: ``CREWAI_TRAINED_AGENTS_FILE`` so agents load suggestions from this file instead of the default ``trained_agents_data.pkl``. """ + # JSON crew projects take precedence + if _has_json_crew(): + _run_json_crew(trained_agents_file=trained_agents_file) + return + crewai_version = get_crewai_version() min_required_version = "0.71.0" pyproject_data = read_toml() diff --git a/lib/cli/src/crewai_cli/tui_picker.py b/lib/cli/src/crewai_cli/tui_picker.py new file mode 100644 index 000000000..69157658c --- /dev/null +++ b/lib/cli/src/crewai_cli/tui_picker.py @@ -0,0 +1,419 @@ +"""Arrow-key interactive pickers for CLI prompts.""" + +from __future__ import annotations + +from contextlib import suppress +import sys +from typing import overload + +import click + + +# CrewAI brand: primary=#FF5A50 (coral), teal=#1F7982 +_CORAL = "\033[38;2;255;90;80m" # #FF5A50 +_TEAL = "\033[38;2;31;121;130m" # #1F7982 +_BOLD = "\033[1m" +_DIM = "\033[2m" +_RESET = "\033[0m" +_HIDE_CURSOR = "\033[?25l" +_SHOW_CURSOR = "\033[?25h" + + +def _is_interactive() -> bool: + try: + return sys.stdin.isatty() and sys.stdout.isatty() + except Exception: + return False + + +def _read_key() -> str: + if sys.platform == "win32": + import msvcrt + + ch = msvcrt.getwch() + if ch in ("\x00", "\xe0"): + ch2 = msvcrt.getwch() + return {"H": "up", "P": "down"}.get(ch2, "") + if ch == "\r": + return "enter" + if ch == " ": + return "space" + if ch == "\x03": + raise KeyboardInterrupt + return ch + + import termios + import tty + + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + try: + tty.setcbreak(fd) + ch = sys.stdin.read(1) + if ch == "\x1b": + seq = sys.stdin.read(2) + if seq == "[A": + return "up" + if seq == "[B": + return "down" + return "esc" + if ch in ("\r", "\n"): + return "enter" + if ch == " ": + return "space" + if ch == "\x03": + raise KeyboardInterrupt + return ch + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old) + + +def _clear_lines(n: int) -> None: + sys.stdout.write(f"\033[{n}A") + for _ in range(n): + sys.stdout.write("\033[2K\n") + sys.stdout.write(f"\033[{n}A") + sys.stdout.flush() + + +def _draw_single(labels: list[str], cursor: int, *, clear: bool = False) -> None: + total = len(labels) + if clear: + sys.stdout.write(f"\033[{total}A") + for i, label in enumerate(labels): + if i == cursor: + sys.stdout.write(f"\033[2K {_CORAL}→{_RESET} {_BOLD}{label}{_RESET}\n") + else: + sys.stdout.write(f"\033[2K {label}\n") + sys.stdout.flush() + + +def _draw_multi( + labels: list[str], + cursor: int, + selected: set[int], + *, + action_indices: set[int] | None = None, + separator_indices: set[int] | None = None, + clear: bool = False, +) -> None: + action_indices = action_indices or set() + separator_indices = separator_indices or set() + hint_text = "↑↓ navigate, space toggle, enter confirm" + if action_indices: + hint_text = "↑↓ navigate, space toggle, enter confirm, ▸ rows expand/collapse" + hint = f" {_DIM}{hint_text}{_RESET}" + total = len(labels) + 1 + if clear: + sys.stdout.write(f"\033[{total}A") + sys.stdout.write(f"\033[2K{hint}\n") + for i, label in enumerate(labels): + if i in separator_indices: + sys.stdout.write(f"\033[2K {_TEAL}{label}{_RESET}\n") + continue + if i in action_indices: + check = " " + elif i in selected: + check = f"{_CORAL}[x]{_RESET}" + else: + check = "[ ]" + arrow = f"{_CORAL}→{_RESET} " if i == cursor else " " + bold = f"{_BOLD}{label}{_RESET}" if i == cursor else label + sys.stdout.write(f"\033[2K {arrow}{check} {bold}\n") + sys.stdout.flush() + + +def _arrow_select_one(labels: list[str]) -> int: + cursor = 0 + total = len(labels) + sys.stdout.write(_HIDE_CURSOR) + sys.stdout.flush() + try: + _draw_single(labels, cursor) + while True: + key = _read_key() + if key == "up" and cursor > 0: + cursor -= 1 + _draw_single(labels, cursor, clear=True) + elif key == "down" and cursor < total - 1: + cursor += 1 + _draw_single(labels, cursor, clear=True) + elif key == "enter": + _clear_lines(total) + return cursor + elif key in ("esc", "q"): + _clear_lines(total) + return -1 + finally: + sys.stdout.write(_SHOW_CURSOR) + sys.stdout.flush() + + +def _arrow_select_multi( + labels: list[str], + *, + action_indices: set[int] | None = None, + separator_indices: set[int] | None = None, + preselected: set[int] | None = None, + initial_cursor: int | None = None, +) -> tuple[list[int], int | None]: + total = len(labels) + selected: set[int] = set(preselected or ()) + action_indices = action_indices or set() + separator_indices = separator_indices or set() + if initial_cursor is not None and 0 <= initial_cursor < total: + cursor = initial_cursor + else: + cursor = _first_selectable_index(total, separator_indices) + sys.stdout.write(_HIDE_CURSOR) + sys.stdout.flush() + try: + _draw_multi( + labels, + cursor, + selected, + action_indices=action_indices, + separator_indices=separator_indices, + ) + while True: + key = _read_key() + if key == "up": + cursor = _next_selectable_index(cursor, -1, total, separator_indices) + _draw_multi( + labels, + cursor, + selected, + action_indices=action_indices, + separator_indices=separator_indices, + clear=True, + ) + elif key == "down": + cursor = _next_selectable_index(cursor, 1, total, separator_indices) + _draw_multi( + labels, + cursor, + selected, + action_indices=action_indices, + separator_indices=separator_indices, + clear=True, + ) + elif key == "space": + if cursor in action_indices: + _clear_lines(total + 1) + return sorted(selected), cursor + selected ^= {cursor} + _draw_multi( + labels, + cursor, + selected, + action_indices=action_indices, + separator_indices=separator_indices, + clear=True, + ) + elif key == "enter": + _clear_lines(total + 1) + if cursor in action_indices: + return sorted(selected), cursor + return sorted(selected), None + elif key in ("esc", "q"): + _clear_lines(total + 1) + return sorted(selected), None + finally: + sys.stdout.write(_SHOW_CURSOR) + sys.stdout.flush() + + +def _numbered_select(labels: list[str]) -> int: + for idx, label in enumerate(labels, 1): + click.echo(f" {idx}. {label}") + click.echo() + while True: + choice = click.prompt(" Select", type=str, default="1") + if choice.lower() == "q": + return -1 + try: + num = int(choice) + if 1 <= num <= len(labels): + return num - 1 + except ValueError: + # Non-numeric input falls through to the shared error message. + pass + click.secho(f" Invalid choice. Enter 1-{len(labels)}.", fg="red") + + +def _numbered_select_multi( + labels: list[str], + *, + action_indices: set[int] | None = None, + separator_indices: set[int] | None = None, + preselected: set[int] | None = None, +) -> tuple[list[int], int | None]: + action_indices = action_indices or set() + separator_indices = separator_indices or set() + numbered_indices: list[int] = [] + for idx, label in enumerate(labels): + if idx in separator_indices: + click.secho(f" {label}", fg="cyan") + continue + numbered_indices.append(idx) + click.echo(f" {len(numbered_indices)}. {label}") + click.echo() + raw = click.prompt( + " Select (comma-separated numbers, or empty to skip)", + default="", + show_default=False, + ) + if not raw.strip(): + return sorted(preselected or ()), None + indices: list[int] = list(preselected or ()) + for part in raw.split(","): + with suppress(ValueError): + num = int(part.strip()) + if 1 <= num <= len(numbered_indices): + idx = numbered_indices[num - 1] + if idx in action_indices: + return sorted(set(indices)), idx + indices.append(idx) + return sorted(set(indices)), None + + +def _first_selectable_index(total: int, separator_indices: set[int]) -> int: + for idx in range(total): + if idx not in separator_indices: + return idx + return 0 + + +def _next_selectable_index( + cursor: int, + direction: int, + total: int, + separator_indices: set[int], +) -> int: + next_cursor = cursor + direction + while 0 <= next_cursor < total: + if next_cursor not in separator_indices: + return next_cursor + next_cursor += direction + return cursor + + +# ── Public API ────────────────────────────────────────────────── + + +def pick(title: str, options: list[tuple[str, str]]) -> str | None: + """Arrow-key single-select picker. + + Args: + title: Header text. + options: List of ``(value, description)`` tuples. + + Returns: + The *value* of the selected option, or ``None`` if cancelled. + """ + labels = [f"{value:<12s} {desc}" for value, desc in options] + + click.echo() + click.secho(f" {title}", fg="cyan", bold=True) + click.echo() + + if _is_interactive(): + try: + idx = _arrow_select_one(labels) + except Exception: + idx = _numbered_select(labels) + else: + idx = _numbered_select(labels) + + if idx < 0: + return None + + value, _desc = options[idx] + click.secho(f" ✔ {value}", fg="green") + return value + + +def pick_one(title: str, labels: list[str]) -> int: + """Arrow-key single-select from plain labels. + + Returns: + Selected index, or ``-1`` if cancelled. + """ + click.echo() + click.secho(f" {title}", fg="cyan") + + if _is_interactive(): + try: + return _arrow_select_one(labels) + except Exception: + return _numbered_select(labels) + return _numbered_select(labels) + + +@overload +def pick_many( + title: str, + labels: list[str], + *, + separator_indices: set[int] | None = None, + preselected: set[int] | None = None, + initial_cursor: int | None = None, +) -> list[int]: ... + + +@overload +def pick_many( + title: str, + labels: list[str], + *, + action_indices: set[int], + separator_indices: set[int] | None = None, + preselected: set[int] | None = None, + initial_cursor: int | None = None, +) -> tuple[list[int], int | None]: ... + + +def pick_many( + title: str, + labels: list[str], + *, + action_indices: set[int] | None = None, + separator_indices: set[int] | None = None, + preselected: set[int] | None = None, + initial_cursor: int | None = None, +) -> list[int] | tuple[list[int], int | None]: + """Arrow-key multi-select with checkboxes. + + Returns: + Sorted list of selected indices, or ``(indices, action_index)`` when + ``action_indices`` is provided. + """ + click.echo() + click.secho(f" {title}", fg="cyan") + + if _is_interactive(): + try: + selected, action = _arrow_select_multi( + labels, + action_indices=action_indices, + separator_indices=separator_indices, + preselected=preselected, + initial_cursor=initial_cursor, + ) + except Exception: + selected, action = _numbered_select_multi( + labels, + action_indices=action_indices, + separator_indices=separator_indices, + preselected=preselected, + ) + else: + selected, action = _numbered_select_multi( + labels, + action_indices=action_indices, + separator_indices=separator_indices, + preselected=preselected, + ) + if action_indices is None: + return selected + return selected, action diff --git a/lib/cli/src/crewai_cli/utils.py b/lib/cli/src/crewai_cli/utils.py index 063c6d14e..386478c8c 100644 --- a/lib/cli/src/crewai_cli/utils.py +++ b/lib/cli/src/crewai_cli/utils.py @@ -24,6 +24,7 @@ __all__ = [ "build_env_with_all_tool_credentials", "build_env_with_tool_repository_credentials", "copy_template", + "enable_prompt_line_editing", "fetch_and_json_env_file", "get_project_description", "get_project_name", @@ -40,6 +41,19 @@ __all__ = [ console = Console() +def enable_prompt_line_editing() -> None: + """Enable cursor movement/history editing for Click text prompts when available.""" + try: + import readline + except ImportError: + return + + try: + readline.parse_and_bind("set editing-mode emacs") + except Exception: # pragma: no cover - readline backends vary by platform + return + + def copy_template( src: Path, dst: Path, name: str, class_name: str, folder_name: str ) -> None: diff --git a/lib/cli/tests/deploy/test_deploy_main.py b/lib/cli/tests/deploy/test_deploy_main.py index 4f9fbbc4f..e121fb044 100644 --- a/lib/cli/tests/deploy/test_deploy_main.py +++ b/lib/cli/tests/deploy/test_deploy_main.py @@ -150,6 +150,7 @@ class TestDeployCommand(unittest.TestCase): @patch("crewai_cli.deploy.main.fetch_and_json_env_file") @patch("crewai_cli.deploy.main.git.Repository.origin_url") @patch("builtins.input") + @pytest.mark.timeout(180) def test_create_crew(self, mock_input, mock_git_origin_url, mock_fetch_env): mock_fetch_env.return_value = {"ENV_VAR": "value"} mock_git_origin_url.return_value = "https://github.com/test/repo.git" @@ -165,6 +166,40 @@ class TestDeployCommand(unittest.TestCase): self.assertIn("Deployment created successfully!", fake_out.getvalue()) self.assertIn("new-uuid", fake_out.getvalue()) + @patch("crewai_cli.deploy.main.fetch_and_json_env_file") + @patch("crewai_cli.deploy.main.git.Repository") + def test_create_crew_without_git_repo_shows_setup_help( + self, mock_repository, mock_fetch_env + ): + mock_fetch_env.return_value = {"ENV_VAR": "value"} + mock_repository.side_effect = ValueError("not a Git repository") + + with patch("sys.stdout", new=StringIO()) as fake_out: + self.deploy_command.create_crew(skip_validate=True) + output = fake_out.getvalue() + + self.assertIn("Deployment requires a Git repository", output) + self.assertIn("git init", output) + self.assertIn("git remote add origin ", output) + self.mock_client.create_crew.assert_not_called() + + @patch("crewai_cli.deploy.main.fetch_and_json_env_file") + @patch("crewai_cli.deploy.main.git.Repository") + def test_create_crew_without_remote_shows_remote_help( + self, mock_repository, mock_fetch_env + ): + mock_fetch_env.return_value = {"ENV_VAR": "value"} + mock_repository.return_value.origin_url.return_value = None + + with patch("sys.stdout", new=StringIO()) as fake_out: + self.deploy_command.create_crew(skip_validate=True) + output = fake_out.getvalue() + + self.assertIn("No remote repository URL found.", output) + self.assertIn("git remote add origin ", output) + self.assertIn("git push -u origin HEAD", output) + self.mock_client.create_crew.assert_not_called() + def test_list_crews(self): mock_response = MagicMock() mock_response.status_code = 200 diff --git a/lib/cli/tests/deploy/test_validate.py b/lib/cli/tests/deploy/test_validate.py index 65589987c..64c1ffbcc 100644 --- a/lib/cli/tests/deploy/test_validate.py +++ b/lib/cli/tests/deploy/test_validate.py @@ -110,6 +110,45 @@ def _run_without_import_check(root: Path) -> DeployValidator: return v +def _scaffold_json_crew(root: Path, *, task_agent: str = "researcher") -> None: + (root / "pyproject.toml").write_text(_make_pyproject(name="json_crew")) + (root / "uv.lock").write_text("# dummy uv lockfile\n") + agents_dir = root / "agents" + agents_dir.mkdir() + (agents_dir / "researcher.jsonc").write_text( + dedent( + """ + { + "role": "Researcher", + "goal": "Research things", + "backstory": "Experienced researcher", + "llm": "openai/gpt-4o-mini" + } + """ + ).strip() + + "\n" + ) + (root / "crew.jsonc").write_text( + dedent( + f""" + {{ + "name": "json_crew", + "agents": ["researcher"], + "tasks": [ + {{ + "name": "research", + "description": "Research https://example.com/a//b", + "expected_output": "Findings", + "agent": "{task_agent}" + }} + ] + }} + """ + ).strip() + + "\n" + ) + + @pytest.mark.parametrize( "project_name, expected", [ @@ -129,6 +168,38 @@ def test_valid_standard_crew_project_passes(tmp_path: Path) -> None: assert v.ok, f"expected clean run, got {v.results}" +def test_valid_json_crew_project_passes(tmp_path: Path) -> None: + _scaffold_json_crew(tmp_path) + v = DeployValidator(project_root=tmp_path) + v.run() + assert "invalid_crew_json" not in _codes(v) + + +def test_json_task_agent_mismatch_is_error(tmp_path: Path) -> None: + _scaffold_json_crew(tmp_path, task_agent="missing_agent") + v = DeployValidator(project_root=tmp_path) + v.run() + finding = next(r for r in v.results if r.code == "invalid_crew_json") + assert finding.severity is Severity.ERROR + assert "missing_agent" in finding.detail + + +def test_json_runtime_fields_are_deploy_errors(tmp_path: Path) -> None: + _scaffold_json_crew(tmp_path) + crew_path = tmp_path / "crew.jsonc" + crew_path.write_text( + crew_path.read_text().replace( + '"name": "json_crew",', + '"name": "json_crew",\n "id": "00000000-0000-4000-8000-000000000000",', + ) + ) + v = DeployValidator(project_root=tmp_path) + v.run() + finding = next(r for r in v.results if r.code == "invalid_crew_json") + assert finding.severity is Severity.ERROR + assert "runtime-only" in finding.detail + + def test_missing_pyproject_errors(tmp_path: Path) -> None: v = _run_without_import_check(tmp_path) assert "missing_pyproject" in _codes(v) @@ -426,4 +497,31 @@ def test_create_crew_aborts_on_validation_error(tmp_path: Path) -> None: cmd = DeployCommand() cmd.create_crew() assert not cmd.plus_api_client.create_crew.called - del mock_api # silence unused-var lint \ No newline at end of file + del mock_api # silence unused-var lint + + +def test_is_json_crew_defers_to_declared_flow_type(tmp_path): + """A flow project with a stray crew.jsonc must validate as a flow.""" + (tmp_path / "crew.jsonc").write_text("{}") + (tmp_path / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + '[tool.crewai]\ntype = "flow"\n' + ) + + assert DeployValidator(project_root=tmp_path)._is_json_crew is False + + +def test_is_json_crew_true_for_declared_crew_type(tmp_path): + (tmp_path / "crew.jsonc").write_text("{}") + (tmp_path / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + '[tool.crewai]\ntype = "crew"\n' + ) + + assert DeployValidator(project_root=tmp_path)._is_json_crew is True + + +def test_is_json_crew_true_without_pyproject(tmp_path): + (tmp_path / "crew.jsonc").write_text("{}") + + assert DeployValidator(project_root=tmp_path)._is_json_crew is True diff --git a/lib/cli/tests/test_cli.py b/lib/cli/tests/test_cli.py index 15abe42ab..a8cab30b9 100644 --- a/lib/cli/tests/test_cli.py +++ b/lib/cli/tests/test_cli.py @@ -94,9 +94,9 @@ def test_version_command_with_tools(runner): def test_test_default_iterations(evaluate_crew, runner): result = runner.invoke(test) - evaluate_crew.assert_called_once_with(3, "gpt-4o-mini", trained_agents_file=None) + evaluate_crew.assert_called_once_with(3, "gpt-5.4-mini", trained_agents_file=None) assert result.exit_code == 0 - assert "Testing the crew for 3 iterations with model gpt-4o-mini" in result.output + assert "Testing the crew for 3 iterations with model gpt-5.4-mini" in result.output @mock.patch("crewai_cli.cli.evaluate_crew") diff --git a/lib/cli/tests/test_create_crew.py b/lib/cli/tests/test_create_crew.py index 83fdbbeeb..060a3c24f 100644 --- a/lib/cli/tests/test_create_crew.py +++ b/lib/cli/tests/test_create_crew.py @@ -6,6 +6,8 @@ from unittest import mock import pytest from click.testing import CliRunner +import crewai_cli.create_json_crew as json_crew +import crewai_cli.tui_picker as tui_picker from crewai_cli.create_crew import create_crew, create_folder_structure @@ -345,3 +347,441 @@ def test_env_vars_are_uppercased_in_env_file( env_file_path = crew_path / ".env" content = env_file_path.read_text() assert "MODEL=" in content + + +def test_json_wizard_defaults_to_sequential_and_memory_enabled(monkeypatch): + monkeypatch.setattr( + json_crew, + "_wizard_agent", + lambda **_: { + "name": "researcher", + "role": "Researcher", + "goal": "Research", + "backstory": "Researcher", + "llm": "openai/gpt-5.5", + "tools": [], + "planning": False, + "allow_delegation": False, + }, + ) + monkeypatch.setattr( + json_crew, + "_wizard_task", + lambda **_: { + "name": "research_task", + "description": "Research", + "expected_output": "Findings", + "agent": "researcher", + "context": [], + }, + ) + + def confirm(label: str, default: bool = False) -> bool: + if label == "Enable crew memory?": + return default + return False + + monkeypatch.setattr(json_crew, "_confirm", confirm) + monkeypatch.setattr(json_crew.click, "prompt", lambda *_, **__: "") + monkeypatch.setattr( + json_crew, + "pick_one", + lambda *_args, **_kwargs: pytest.fail("process should not be prompted"), + ) + + _agents, _tasks, settings = json_crew._wizard_agents_and_tasks( + skip_provider=True, + default_llm="openai/gpt-5.5", + ) + + assert settings == {"process": "sequential", "memory": True, "inputs": {}} + + +def test_json_wizard_shows_interpolation_hint(capsys): + json_crew._show_interpolation_hint("tasks") + + output = capsys.readouterr().out + assert "{placeholder}" in output + assert "dynamic values" in output + assert "{topic}" not in output + assert "Description >" not in output + assert '"description"' not in output + + +def test_json_wizard_text_prompt_uses_full_prompt_for_readline(monkeypatch): + prompts: list[str] = [] + + monkeypatch.setattr( + json_crew, "_readline_safe_prompt", lambda prompt: f"safe:{prompt}" + ) + monkeypatch.setattr( + "builtins.input", lambda prompt: prompts.append(prompt) or "Draft content" + ) + + assert json_crew._prompt_text("Goal", spacing_before=False) == "Draft content" + assert len(prompts) == 1 + assert prompts[0].startswith("safe:") + assert "Goal" in prompts[0] + assert " > " in prompts[0] + + +def test_json_wizard_tool_picker_prioritizes_common_tools(monkeypatch): + picker_calls: list[tuple[str, list[str], dict[str, object]]] = [] + + def pick_many(title: str, labels: list[str], **kwargs): + picker_calls.append((title, labels, kwargs)) + return [1, 3], None + + monkeypatch.setattr(json_crew, "pick_many", pick_many) + + tools = json_crew._select_tools() + + assert tools == ["SerperDevTool", "DirectoryReadTool"] + assert len(picker_calls) == 1 + labels = picker_calls[0][1] + assert 0 in picker_calls[0][2]["separator_indices"] + assert labels[0] == "── Common tools ──" + assert labels[1].strip().endswith("SerperDevTool") + assert labels[2].strip().endswith("ScrapeWebsiteTool") + assert labels[3].strip().endswith("DirectoryReadTool") + assert labels[4].strip().endswith("FileReadTool") + assert labels[5].strip().endswith("FileWriterTool") + assert labels[1].index("Google search") < labels[1].index("SerperDevTool") + assert "More tools" not in labels + + +def test_json_wizard_tool_picker_collapses_categories_by_default(monkeypatch): + picker_calls: list[tuple[str, list[str], dict[str, object]]] = [] + + def pick_many(title: str, labels: list[str], **kwargs): + picker_calls.append((title, labels, kwargs)) + return [], None + + monkeypatch.setattr(json_crew, "pick_many", pick_many) + + json_crew._select_tools() + + labels = picker_calls[0][1] + action_indices = picker_calls[0][2]["action_indices"] + # Categories show as collapsed action rows, not separators with tools + assert any(label.startswith("▸ Search & Research") for label in labels) + assert any(label.startswith("▸ Web Scraping") for label in labels) + assert not any(label.strip().endswith("BraveSearchTool") for label in labels) + assert len(action_indices) >= 4 + # Only the common tools section is visible beyond the category rows + assert len(labels) == 1 + 5 + len(action_indices) + + +def test_json_wizard_tool_picker_expands_one_category_at_a_time(monkeypatch): + picker_calls: list[tuple[str, list[str], dict[str, object]]] = [] + + def find_category_row(labels: list[str], category: str) -> int: + return next( + idx for idx, label in enumerate(labels) if category in label + ) + + def pick_many(title: str, labels: list[str], **kwargs): + picker_calls.append((title, labels, kwargs)) + call_num = len(picker_calls) + if call_num == 1: + return [], find_category_row(labels, "Search & Research") + if call_num == 2: + # Search & Research is expanded; select BraveSearchTool and + # expand Web Scraping instead + brave = next( + idx + for idx, label in enumerate(labels) + if label.strip().endswith("BraveSearchTool") + ) + return [brave], find_category_row(labels, "Web Scraping") + return [], None + + monkeypatch.setattr(json_crew, "pick_many", pick_many) + + tools = json_crew._select_tools() + + assert tools == ["BraveSearchTool"] + assert len(picker_calls) == 3 + # Second render: Search & Research expanded, others collapsed + labels2 = picker_calls[1][1] + assert any(label.startswith("▾ Search & Research") for label in labels2) + assert any(label.strip().endswith("BraveSearchTool") for label in labels2) + assert any(label.startswith("▸ Web Scraping") for label in labels2) + # Third render: Web Scraping expanded, Search & Research collapsed again + labels3 = picker_calls[2][1] + assert any(label.startswith("▸ Search & Research") for label in labels3) + assert any(label.startswith("▾ Web Scraping") for label in labels3) + assert not any(label.strip().endswith("BraveSearchTool") for label in labels3) + # The collapsed Search & Research row reports its selection count + assert any( + "Search & Research" in label and "1 selected" in label for label in labels3 + ) + # Cursor returns to the toggled category row + assert picker_calls[2][2]["initial_cursor"] == next( + idx for idx, label in enumerate(labels3) if "Web Scraping" in label + ) + + +def test_json_wizard_tool_picker_preserves_selection_across_renders(monkeypatch): + picker_calls: list[tuple[str, list[str], dict[str, object]]] = [] + + def pick_many(title: str, labels: list[str], **kwargs): + picker_calls.append((title, labels, kwargs)) + call_num = len(picker_calls) + if call_num == 1: + # Select a common tool, then expand a category + category_row = next( + idx for idx, label in enumerate(labels) if "Web Scraping" in label + ) + return [1], category_row + # Confirm without touching anything else + return sorted(kwargs["preselected"]), None + + monkeypatch.setattr(json_crew, "pick_many", pick_many) + + tools = json_crew._select_tools() + + # The common-tool selection survived the expand re-render via preselected + assert tools == ["SerperDevTool"] + assert 1 in picker_calls[1][2]["preselected"] + + +def test_json_wizard_tool_picker_lists_builtin_tools_across_categories(monkeypatch): + picker_calls: list[tuple[str, list[str], dict[str, object]]] = [] + expanded_labels: list[str] = [] + + def pick_many(title: str, labels: list[str], **kwargs): + picker_calls.append((title, labels, kwargs)) + expanded_labels.extend(labels) + action_indices = sorted(kwargs["action_indices"]) + call_num = len(picker_calls) + if call_num <= len(action_indices): + # Expand the n-th category (indices shift between renders, so + # recompute from this render's action rows) + return [], action_indices[call_num - 1] + return [], None + + monkeypatch.setattr(json_crew, "pick_many", pick_many) + + json_crew._select_tools() + + tool_names = { + label.rsplit(maxsplit=1)[-1] + for label in expanded_labels + if not label.startswith(("▸", "▾", "──")) + } + + assert { + "DirectorySearchTool", + "MDXSearchTool", + "XMLSearchTool", + "YoutubeVideoSearchTool", + "S3ReaderTool", + "E2BExecTool", + "TavilyResearchTool", + "SerplyNewsSearchTool", + "BrowserbaseLoadTool", + "PatronusEvalTool", + }.issubset(tool_names) + assert { + "MCPServerAdapter", + "MongoDBVectorSearchConfig", + "ScrapegraphScrapeToolSchema", + "SnowflakeConfig", + }.isdisjoint(tool_names) + + +def test_multi_picker_skips_separator_on_initial_cursor(monkeypatch): + cursors: list[int] = [] + + monkeypatch.setattr(tui_picker, "_read_key", lambda: "enter") + monkeypatch.setattr( + tui_picker, + "_draw_multi", + lambda _labels, cursor, *_args, **_kwargs: cursors.append(cursor), + ) + monkeypatch.setattr(tui_picker, "_clear_lines", lambda *_args, **_kwargs: None) + + assert tui_picker._arrow_select_multi( + ["── Common tools ──", "Google search via Serper API SerperDevTool"], + separator_indices={0}, + ) == ([], None) + assert cursors == [1] + + +def test_json_wizard_agent_attribute_prompts_are_compact(monkeypatch): + prompt_calls: list[tuple[str, bool]] = [] + prompt_values = { + "Role": "Senior Dev Rel", + "Goal": "Draft content", + "Backstory": "Knows developer communities", + } + + def prompt_text( + label: str, + default: str = "", + *, + spacing_before: bool = True, + ) -> str: + prompt_calls.append((label, spacing_before)) + return prompt_values[label] + + monkeypatch.setattr(json_crew, "_prompt_text", prompt_text) + monkeypatch.setattr(json_crew, "_select_model", lambda: "openai/gpt-5.5") + monkeypatch.setattr(json_crew, "pick_many", lambda *_args, **_kwargs: ([], None)) + monkeypatch.setattr(json_crew, "_confirm", lambda *_args, **_kwargs: False) + + agent = json_crew._wizard_agent(agent_num=1, existing_names=[]) + + assert agent is not None + assert prompt_calls == [ + ("Role", False), + ("Goal", False), + ("Backstory", False), + ] + + +def test_json_wizard_task_attribute_prompts_are_compact(monkeypatch): + prompt_calls: list[tuple[str, bool]] = [] + prompt_values = { + "Description": "Research latest release", + "Expected output": "Release summary", + } + + def prompt_text( + label: str, + default: str = "", + *, + spacing_before: bool = True, + ) -> str: + prompt_calls.append((label, spacing_before)) + return prompt_values[label] + + monkeypatch.setattr(json_crew, "_prompt_text", prompt_text) + + task = json_crew._wizard_task( + task_num=1, + agent_names=["senior_dev_rel"], + prior_task_names=[], + ) + + assert task is not None + assert prompt_calls == [ + ("Description", False), + ("Expected output", False), + ] + + +def test_json_create_provider_preselects_default_model(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + with mock.patch( + "crewai_cli.create_json_crew._wizard_agents_and_tasks" + ) as mock_wizard: + mock_wizard.return_value = ( + [ + { + "name": "researcher", + "role": "Researcher", + "goal": "Research", + "backstory": "Researcher", + "llm": "openai/gpt-5.5", + "tools": [], + "planning": False, + "allow_delegation": False, + } + ], + [ + { + "name": "research_task", + "description": "Research", + "expected_output": "Findings", + "agent": "researcher", + "context": [], + } + ], + {"process": "sequential", "memory": False, "inputs": {}}, + ) + + json_crew.create_json_crew("JSON Crew", provider="openai", skip_provider=True) + + mock_wizard.assert_called_once_with( + skip_provider=True, + default_llm="openai/gpt-5.5", + ) + assert (tmp_path / "json_crew" / "crew.jsonc").exists() + assert not (tmp_path / "json_crew" / "tests").exists() + assert not (tmp_path / "json_crew" / "config.jsonc").exists() + + crew_template = (tmp_path / "json_crew" / "crew.jsonc").read_text() + assert ( + '"guardrail": "Every factual claim needs context support."' + in crew_template + ) + assert '"guardrails": [' in crew_template + assert '"guardrail_max_retries": 2' in crew_template + assert "Docs: https://docs.crewai.com/concepts/tasks" in crew_template + assert '"output_pydantic": null' in crew_template + assert '"markdown": false' in crew_template + assert "Docs: https://docs.crewai.com/concepts/crews" in crew_template + assert '"manager_agent": "researcher"' in crew_template + assert '"output_log_file": "crew.log"' in crew_template + assert "Crew-level LLM fields also accept object form" in crew_template + assert '"chat_llm": {"model": "llama3", "provider": "ollama"' in ( + crew_template + ) + assert "Use {placeholder} in agent or task text" in crew_template + assert "`crewai run` prompts for any placeholders" in crew_template + assert "Use {placeholder} inputs here" in crew_template + + agent_template = ( + tmp_path / "json_crew" / "agents" / "researcher.jsonc" + ).read_text() + assert "You can use {placeholder} inputs in role, goal, or backstory" in ( + agent_template + ) + assert '"role": "Senior {industry} Researcher"' in agent_template + assert "Optional agent-level guardrail" in agent_template + assert '"guardrail_max_retries": 2' in agent_template + assert "Docs: https://docs.crewai.com/concepts/agents" in agent_template + assert '"reasoning": true' in agent_template + assert "For custom endpoints or deployment-based providers" in agent_template + assert '"deployment_name": "my-deployment", "provider": "azure"' in ( + agent_template + ) + assert '"planning_config": {' in agent_template + assert '"llm": {"model": "deepseek-chat", "provider": "deepseek"}' in ( + agent_template + ) + assert '"knowledge_sources": []' in agent_template + + +def test_json_provider_default_model_helper(): + assert json_crew._default_model_for_provider("openai") == "openai/gpt-5.5" + assert json_crew._default_model_for_provider("anthropic/claude-custom") == ( + "anthropic/claude-custom" + ) + assert json_crew._default_model_for_provider("unknown") is None + + +def test_json_wizard_task_reprompts_on_cancelled_agent_pick(monkeypatch): + """Esc on the agent picker must reprompt, not silently assign agent 0.""" + prompts = iter(["Do the research", "A report"]) + monkeypatch.setattr(json_crew, "_prompt_text", lambda *a, **k: next(prompts)) + + pick_calls: list[str] = [] + picks = iter([-1, 1]) + + def fake_pick_one(title: str, labels: list[str]) -> int: + pick_calls.append(title) + return next(picks) + + monkeypatch.setattr(json_crew, "pick_one", fake_pick_one) + + task = json_crew._wizard_task( + task_num=1, + agent_names=["first_agent", "second_agent"], + prior_task_names=[], + ) + + assert len(pick_calls) == 2 + assert task["agent"] == "second_agent" diff --git a/lib/cli/tests/test_crew_run_tui.py b/lib/cli/tests/test_crew_run_tui.py new file mode 100644 index 000000000..d978e12be --- /dev/null +++ b/lib/cli/tests/test_crew_run_tui.py @@ -0,0 +1,796 @@ +from datetime import datetime +import time + +import pytest + +from crewai.events.event_bus import crewai_event_bus +from crewai.events.types.observation_events import ( + GoalAchievedEarlyEvent, + PlanRefinementEvent, + PlanReplanTriggeredEvent, + PlanStepCompletedEvent, + PlanStepStartedEvent, + StepObservationCompletedEvent, + StepObservationFailedEvent, + StepObservationStartedEvent, +) +from crewai.events.types.tool_usage_events import ( + ToolUsageErrorEvent, + ToolUsageFinishedEvent, + ToolUsageStartedEvent, +) +from crewai_cli import run_crew +from crewai_cli.crew_run_tui import CrewRunApp + + +def _app_with_plan() -> CrewRunApp: + app = CrewRunApp() + app._plan = { + "plan": "Demo plan", + "steps": [ + {"step_number": 1, "description": "First"}, + {"step_number": 2, "description": "Second"}, + {"step_number": 3, "description": "Third"}, + ], + } + app._plan_step_status = {1: "pending", 2: "pending", 3: "pending"} + return app + + +def _log_entry(name: str) -> dict: + now = time.time() + return { + "tool_name": name, + "status": "success", + "args": None, + "result": f"{name} result", + "error": None, + "start_time": now, + "duration": 1.0, + "task_idx": 1, + } + + +def _emit_event(event: object) -> None: + future = crewai_event_bus.emit(None, event) + if future: + future.result(timeout=5) + + +def test_chain_deploy_skips_validation_after_auth_retry(monkeypatch) -> None: + create_calls: list[dict[str, object]] = [] + login_calls: list[bool] = [] + + class FakeDeployCommand: + attempts = 0 + + def create_crew(self, **kwargs) -> None: + create_calls.append(kwargs) + FakeDeployCommand.attempts += 1 + if FakeDeployCommand.attempts == 1: + raise SystemExit(1) + + class FakeAuthenticationCommand: + def login(self) -> None: + login_calls.append(True) + + monkeypatch.setattr("crewai_cli.deploy.main.DeployCommand", FakeDeployCommand) + monkeypatch.setattr( + "crewai_cli.authentication.main.AuthenticationCommand", + FakeAuthenticationCommand, + ) + + run_crew._chain_deploy() + + assert create_calls == [ + {"confirm": False, "skip_validate": True}, + {"confirm": False, "skip_validate": True}, + ] + assert login_calls == [True] + + +def test_plan_step_status_updates_only_the_explicit_step() -> None: + app = _app_with_plan() + + app._set_plan_step_status(2, "done") + + assert app._plan_step_status == { + 1: "pending", + 2: "done", + 3: "pending", + } + + +def test_step_observation_events_update_the_explicit_step() -> None: + app = _app_with_plan() + app._subscribe() + try: + future = crewai_event_bus.emit( + None, + StepObservationStartedEvent( + agent_role="Agent", + step_number=2, + step_description="Second", + ), + ) + if future: + future.result(timeout=5) + + assert app._plan_step_status == { + 1: "pending", + 2: "active", + 3: "pending", + } + + future = crewai_event_bus.emit( + None, + StepObservationCompletedEvent( + agent_role="Agent", + step_number=2, + step_description="Second", + step_completed_successfully=True, + ), + ) + if future: + future.result(timeout=5) + finally: + app._unsubscribe() + + assert app._plan_step_status == { + 1: "pending", + 2: "done", + 3: "pending", + } + + +def test_plan_step_lifecycle_events_update_the_explicit_step() -> None: + app = _app_with_plan() + app._subscribe() + try: + _emit_event( + PlanStepStartedEvent( + agent_role="Agent", + step_number=2, + step_description="Second", + ) + ) + + assert app._plan_step_status == { + 1: "pending", + 2: "active", + 3: "pending", + } + + _emit_event( + PlanStepCompletedEvent( + agent_role="Agent", + step_number=2, + step_description="Second", + success=True, + result="done", + ) + ) + finally: + app._unsubscribe() + + assert app._plan_step_status == { + 1: "pending", + 2: "done", + 3: "pending", + } + + +def test_failed_plan_step_lifecycle_event_marks_exact_step_failed() -> None: + app = _app_with_plan() + app._subscribe() + try: + _emit_event( + PlanStepCompletedEvent( + agent_role="Agent", + step_number=2, + step_description="Second", + success=False, + error="Step failed", + ) + ) + finally: + app._unsubscribe() + + assert app._plan_step_status == { + 1: "pending", + 2: "failed", + 3: "pending", + } + + +def test_tool_usage_events_do_not_advance_plan_steps() -> None: + app = _app_with_plan() + app._subscribe() + try: + future = crewai_event_bus.emit( + None, + ToolUsageStartedEvent(tool_name="search", tool_args={"query": "CrewAI"}), + ) + if future: + future.result(timeout=5) + + now = datetime.now() + future = crewai_event_bus.emit( + None, + ToolUsageFinishedEvent( + tool_name="search", + tool_args={"query": "CrewAI"}, + started_at=now, + finished_at=now, + output="result", + ), + ) + if future: + future.result(timeout=5) + finally: + app._unsubscribe() + + assert app._plan_step_status == { + 1: "pending", + 2: "pending", + 3: "pending", + } + + +def test_next_tool_does_not_mark_unfinished_tool_successful() -> None: + app = _app_with_plan() + app._subscribe() + try: + _emit_event( + ToolUsageStartedEvent(tool_name="search", tool_args={"query": "CrewAI"}), + ) + _emit_event( + ToolUsageStartedEvent(tool_name="scrape", tool_args={"url": "https://x"}), + ) + finally: + app._unsubscribe() + + assert app._log_entries[0]["status"] == "timeout" + assert app._log_entries[0]["result"] is None + assert app._log_entries[0]["error"] == ( + "No result received before the next tool started" + ) + assert app._log_entries[1]["status"] == "running" + assert app._plan_step_status == { + 1: "pending", + 2: "pending", + 3: "pending", + } + + +def test_internal_reasoning_function_call_is_hidden_from_activity_log() -> None: + app = _app_with_plan() + app._subscribe() + try: + future = crewai_event_bus.emit( + None, + ToolUsageStartedEvent( + tool_name="create_reasoning_plan", + tool_args={"plan": "Plan", "steps": [], "ready": True}, + ), + ) + if future: + future.result(timeout=5) + + now = datetime.now() + future = crewai_event_bus.emit( + None, + ToolUsageFinishedEvent( + tool_name="create_reasoning_plan", + tool_args={"plan": "Plan", "steps": [], "ready": True}, + started_at=now, + finished_at=now, + output='{"plan":"Plan","steps":[],"ready":true}', + ), + ) + if future: + future.result(timeout=5) + + future = crewai_event_bus.emit( + None, + ToolUsageErrorEvent( + tool_name="create_reasoning_plan", + tool_args={"plan": "Plan", "steps": [], "ready": True}, + error="internal planning fallback", + ), + ) + if future: + future.result(timeout=5) + finally: + app._unsubscribe() + + assert app._log_entries == [] + assert app._current_task_steps == [] + + +def test_tool_failure_does_not_override_successful_plan_step_completion() -> None: + app = _app_with_plan() + app._subscribe() + try: + _emit_event( + PlanStepStartedEvent( + agent_role="Agent", + step_number=1, + step_description="First", + ) + ) + _emit_event( + ToolUsageStartedEvent( + tool_name="search_the_internet_with_serper", + tool_args={"search_query": "CrewAI release"}, + plan_step_number=1, + plan_step_description="First", + ) + ) + _emit_event( + ToolUsageErrorEvent( + tool_name="search_the_internet_with_serper", + tool_args={"search_query": "CrewAI release"}, + plan_step_number=1, + plan_step_description="First", + error="No results", + ) + ) + _emit_event( + PlanStepCompletedEvent( + agent_role="Agent", + step_number=1, + step_description="First", + success=True, + result="Recovered with another source", + ) + ) + finally: + app._unsubscribe() + + assert app._plan_step_status == { + 1: "done", + 2: "pending", + 3: "pending", + } + + +def test_tool_event_step_metadata_is_stored_in_activity_log() -> None: + app = _app_with_plan() + app._subscribe() + try: + _emit_event( + ToolUsageStartedEvent( + tool_name="search_the_internet_with_serper", + tool_args={"search_query": "CrewAI release"}, + plan_step_number=2, + plan_step_description="Second", + ) + ) + now = datetime.now() + _emit_event( + ToolUsageFinishedEvent( + tool_name="search_the_internet_with_serper", + tool_args={"search_query": "CrewAI release"}, + plan_step_number=2, + plan_step_description="Second", + started_at=now, + finished_at=now, + output="Found official source", + ) + ) + finally: + app._unsubscribe() + + assert app._log_entries[0]["plan_step_number"] == 2 + assert app._plan_step_status == { + 1: "pending", + 2: "pending", + 3: "pending", + } + + +def test_starting_next_tool_does_not_infer_plan_step_progress() -> None: + app = _app_with_plan() + app._subscribe() + try: + _emit_event( + ToolUsageStartedEvent( + tool_name="search_the_internet_with_serper", + tool_args={"search_query": "CrewAI release"}, + ) + ) + _emit_event( + ToolUsageErrorEvent( + tool_name="search_the_internet_with_serper", + tool_args={"search_query": "CrewAI release"}, + error="No results", + ) + ) + _emit_event( + ToolUsageStartedEvent( + tool_name="read_website_content", + tool_args={"url": "https://example.com"}, + ) + ) + finally: + app._unsubscribe() + + assert app._log_entries[0]["status"] == "error" + assert app._log_entries[1]["status"] == "running" + assert app._plan_step_status == { + 1: "pending", + 2: "pending", + 3: "pending", + } + + +@pytest.mark.asyncio +async def test_crew_done_does_not_mark_unfinished_tool_successful() -> None: + app = _app_with_plan() + + async with app.run_test(size=(100, 40)) as pilot: + app._plan_step_status = {1: "failed", 2: "done", 3: "pending"} + app._log_entries = [ + { + "tool_name": "search", + "status": "running", + "args": '{"query": "CrewAI"}', + "result": None, + "error": None, + "start_time": time.time() - 2, + "duration": None, + "task_idx": 1, + } + ] + + app._on_crew_done("final output") + await pilot.pause() + + assert app._log_entries[0]["status"] == "timeout" + assert app._log_entries[0]["result"] is None + assert app._log_entries[0]["error"] == "No result received before crew completed" + assert app._plan_step_status == {1: "failed", 2: "done", 3: "done"} + + +def test_streamed_step_observation_updates_named_step_only() -> None: + app = _app_with_plan() + + updated = app._try_parse_step_observation( + '{"step_completed_successfully":true,' + '"key_information_learned":"Step 2 succeeded with the official source."}' + ) + + assert updated is True + assert app._plan_step_status == { + 1: "pending", + 2: "done", + 3: "pending", + } + + +def test_failed_streamed_step_observation_marks_named_step_failed() -> None: + app = _app_with_plan() + + updated = app._try_parse_step_observation( + '{"step_completed_successfully":false,' + '"key_information_learned":"Step 2 failed because the tool failed."}' + ) + + assert updated is True + assert app._plan_step_status == { + 1: "pending", + 2: "failed", + 3: "pending", + } + + +def test_streamed_goal_achieved_observation_collapses_remaining_steps_done() -> None: + app = _app_with_plan() + + updated = app._try_parse_step_observation( + '{"step_number":2,' + '"step_completed_successfully":true,' + '"key_information_learned":"Goal is already satisfied.",' + '"goal_already_achieved":true}' + ) + + assert updated is True + assert app._plan_step_status == { + 1: "done", + 2: "done", + 3: "done", + } + + +def test_task_completion_collapses_pending_plan_steps_but_preserves_failed() -> None: + app = _app_with_plan() + app._plan_step_status = {1: "failed", 2: "done", 3: "pending"} + + app._collapse_plan_on_task_done() + + assert app._plan_step_status == {1: "failed", 2: "done", 3: "done"} + + +def test_observation_failure_collapses_to_done_because_executor_continues() -> None: + app = _app_with_plan() + app._plan_step_status = {1: "done", 2: "active", 3: "pending"} + app._subscribe() + try: + future = crewai_event_bus.emit( + None, + StepObservationFailedEvent( + agent_role="Agent", + step_number=2, + step_description="Second", + error="observer timeout", + ), + ) + if future: + future.result(timeout=5) + finally: + app._unsubscribe() + + assert app._plan_step_status == { + 1: "done", + 2: "done", + 3: "pending", + } + + +def test_goal_achieved_event_collapses_remaining_steps_done() -> None: + app = _app_with_plan() + app._plan_step_status = {1: "done", 2: "active", 3: "pending"} + app._subscribe() + try: + future = crewai_event_bus.emit( + None, + GoalAchievedEarlyEvent( + agent_role="Agent", + step_number=2, + steps_completed=2, + steps_remaining=1, + ), + ) + if future: + future.result(timeout=5) + finally: + app._unsubscribe() + + assert app._plan_step_status == { + 1: "done", + 2: "done", + 3: "done", + } + + +def test_replan_event_keeps_old_plan_until_next_streamed_plan_replaces_it() -> None: + app = _app_with_plan() + app._subscribe() + try: + future = crewai_event_bus.emit( + None, + PlanReplanTriggeredEvent( + agent_role="Agent", + step_number=2, + replan_reason="Need updated sources", + replan_count=1, + completed_steps_preserved=1, + ), + ) + if future: + future.result(timeout=5) + finally: + app._unsubscribe() + + assert app._plan is not None + assert app._plan_step_status == {1: "pending", 2: "pending", 3: "pending"} + assert app._awaiting_replan is True + + app._try_parse_plan( + '{"plan":"Updated plan","steps":[' + '{"step_number":1,"description":"Updated first"},' + '{"step_number":2,"description":"Updated second"}]}' + ) + + assert app._plan == { + "plan": "Updated plan", + "steps": [ + {"step_number": 1, "description": "Updated first"}, + {"step_number": 2, "description": "Updated second"}, + ], + } + assert app._plan_step_status == {1: "pending", 2: "pending"} + assert app._awaiting_replan is False + + +def test_plan_refinement_updates_descriptions_without_new_statuses() -> None: + app = _app_with_plan() + app._plan_step_status = {1: "done", 2: "active", 3: "pending"} + app._subscribe() + try: + future = crewai_event_bus.emit( + None, + PlanRefinementEvent( + agent_role="Agent", + step_number=2, + refined_step_count=1, + refinements=["Step 3: Write the final answer from verified facts"], + ), + ) + if future: + future.result(timeout=5) + finally: + app._unsubscribe() + + assert app._plan_step_status == { + 1: "done", + 2: "done", + 3: "pending", + } + assert app._plan["steps"][2]["description"] == ( + "Write the final answer from verified facts" + ) + + +def test_step_observation_json_is_hidden_from_streaming_text() -> None: + app = _app_with_plan() + + assert ( + app._strip_step_observation_json( + 'Visible before {"step_completed_successfully":true,' + '"key_information_learned":"Step 2 succeeded."} visible after' + ) + == "Visible before visible after" + ) + + +@pytest.mark.asyncio +async def test_completed_run_keeps_activity_log_keyboard_navigation_active() -> None: + app = CrewRunApp() + + async with app.run_test(size=(100, 40)) as pilot: + app._log_entries = [_log_entry("search"), _log_entry("scrape")] + + app._on_crew_done("final output") + await pilot.pause() + + assert app.focused is app.query_one("#log-panel") + + await pilot.press("down", "enter") + await pilot.pause() + + assert app._log_cursor == 1 + assert app._log_expanded == {1} + + await pilot.press("up") + await pilot.pause() + + assert app._log_cursor == 0 + + +class _FakeTask: + fingerprint = None + + def __init__(self, task_id: str, name: str) -> None: + self.id = task_id + self.name = name + self.description = name + + +def test_async_task_completion_marks_the_right_sidebar_row() -> None: + """Overlapping tasks: completing task 1 while task 2 runs must not + mark task 2 done, and starting task 2 must not mark task 1 done.""" + from crewai.events.types.task_events import TaskCompletedEvent, TaskStartedEvent + from crewai.tasks.task_output import TaskOutput + + app = CrewRunApp(total_tasks=2, task_names=["first", "second"]) + app._subscribe() + try: + task1 = _FakeTask("id-1", "first") + task2 = _FakeTask("id-2", "second") + + for task in (task1, task2): + future = crewai_event_bus.emit( + None, TaskStartedEvent(context=None, task=task) + ) + if future: + future.result(timeout=5) + + # Both started: neither prematurely done + assert app._task_statuses == {1: "active", 2: "active"} + + future = crewai_event_bus.emit( + None, + TaskCompletedEvent( + output=TaskOutput(description="first", raw="done", agent="a"), + task=task1, + ), + ) + if future: + future.result(timeout=5) + + assert app._task_statuses == {1: "done", 2: "active"} + finally: + app._unsubscribe() + + +def test_pop_task_state_falls_back_to_current_task() -> None: + app = CrewRunApp(total_tasks=2, task_names=["first", "second"]) + app._current_task_idx = 2 + app._current_task_desc = "second" + + class _Evt: + task = None + task_name = "unknown" + + state = app._pop_task_state(_Evt()) + assert state["idx"] == 2 + assert state["desc"] == "second" + + +def test_overlapping_task_logs_keep_their_own_state() -> None: + """Task 1 completing after task 2 started must log its own description, + agent, and output — and must not steal or reset task 2's stream state.""" + from crewai.events.types.task_events import TaskCompletedEvent, TaskStartedEvent + from crewai.tasks.task_output import TaskOutput + + app = CrewRunApp(total_tasks=2, task_names=["first", "second"]) + app._subscribe() + try: + task1 = _FakeTask("id-1", "first") + task2 = _FakeTask("id-2", "second") + + for task in (task1, task2): + future = crewai_event_bus.emit( + None, TaskStartedEvent(context=None, task=task) + ) + if future: + future.result(timeout=5) + + # Task 2 is current and has streamed state in flight + app._task_full_output = "task two streaming output" + app._current_task_steps = [{"type": "llm", "summary": "thinking"}] + + future = crewai_event_bus.emit( + None, + TaskCompletedEvent( + output=TaskOutput( + description="first", raw="task one result", agent="a1" + ), + task=task1, + ), + ) + if future: + future.result(timeout=5) + + # Task 1's entry carries its own identity and output + entry1 = app._task_logs[-1] + assert entry1["idx"] == 1 + assert entry1["desc"] == "first" + assert entry1["output"] == "task one result" + assert entry1["steps"] == [] + + # Task 2's in-flight stream state was not consumed or reset + assert app._task_full_output == "task two streaming output" + assert app._current_task_steps == [{"type": "llm", "summary": "thinking"}] + + future = crewai_event_bus.emit( + None, + TaskCompletedEvent( + output=TaskOutput( + description="second", raw="task two result", agent="a2" + ), + task=task2, + ), + ) + if future: + future.result(timeout=5) + + entry2 = app._task_logs[-1] + assert entry2["idx"] == 2 + assert entry2["desc"] == "second" + assert entry2["output"] == "task two streaming output" + assert any(step.get("summary") == "thinking" for step in entry2["steps"]) + finally: + app._unsubscribe() diff --git a/lib/cli/tests/test_run_crew.py b/lib/cli/tests/test_run_crew.py new file mode 100644 index 000000000..2f408c6ba --- /dev/null +++ b/lib/cli/tests/test_run_crew.py @@ -0,0 +1,144 @@ +"""Tests for crewai_cli.run_crew JSON crew handling.""" + +import os +from pathlib import Path + +import pytest +from crewai_core.constants import CREWAI_TRAINED_AGENTS_FILE_ENV + +import crewai_cli.run_crew as run_crew_module + + +def test_run_crew_forwards_trained_agents_file_to_json_crews(monkeypatch): + """crewai run -f must reach JSON crews, not only classic subprocess crews.""" + monkeypatch.setattr(run_crew_module, "_has_json_crew", lambda: True) + called: dict = {} + + def fake_run_json_crew(trained_agents_file=None): + called["trained_agents_file"] = trained_agents_file + + monkeypatch.setattr(run_crew_module, "_run_json_crew", fake_run_json_crew) + + run_crew_module.run_crew(trained_agents_file="some.pkl") + + assert called == {"trained_agents_file": "some.pkl"} + + +def test_run_json_crew_exports_trained_agents_env(monkeypatch, tmp_path: Path): + """JSON crews run in-process, so the pickle path must land in the env var.""" + monkeypatch.chdir(tmp_path) + monkeypatch.delenv(CREWAI_TRAINED_AGENTS_FILE_ENV, raising=False) + + try: + # No crew.json(c) in tmp_path: the loader fails *after* the env var + # export, which is the part under test. + with pytest.raises(FileNotFoundError): + run_crew_module._run_json_crew(trained_agents_file="some.pkl") + assert os.environ[CREWAI_TRAINED_AGENTS_FILE_ENV] == "some.pkl" + finally: + os.environ.pop(CREWAI_TRAINED_AGENTS_FILE_ENV, None) + + +def test_run_json_crew_leaves_env_untouched_without_flag(monkeypatch, tmp_path: Path): + monkeypatch.chdir(tmp_path) + monkeypatch.delenv(CREWAI_TRAINED_AGENTS_FILE_ENV, raising=False) + + with pytest.raises(FileNotFoundError): + run_crew_module._run_json_crew() + + assert CREWAI_TRAINED_AGENTS_FILE_ENV not in os.environ + + +def test_missing_input_names_accepts_hyphenated_placeholders(): + """The prompt regex must accept the same names kickoff interpolation does.""" + from types import SimpleNamespace + + crew = SimpleNamespace( + agents=[ + SimpleNamespace( + role="Researcher", goal="Cover {my-topic}", backstory="" + ) + ], + tasks=[ + SimpleNamespace( + description="Write about {my-topic} for {target-audience}", + expected_output="Post", + output_file=None, + ) + ], + ) + + assert run_crew_module._missing_input_names(crew, {}) == [ + "my-topic", + "target-audience", + ] + + +def _patch_tui_run(monkeypatch, status: str): + """Stub the TUI pieces of _run_json_crew so only exit handling runs.""" + + class FakeApp: + def __init__(self, **kwargs): + self._status = status + self._crew_result = "result" if status == "completed" else None + self._want_deploy = False + + def run(self): + pass + + from types import SimpleNamespace + + crew = SimpleNamespace(name="Demo", tasks=[], agents=[]) + monkeypatch.setattr( + run_crew_module, "find_crew_json_file", lambda: Path("crew.jsonc") + ) + monkeypatch.setattr( + run_crew_module, + "_load_json_crew_for_tui", + lambda _path: (FakeApp, crew, {}, [], []), + ) + monkeypatch.setattr( + run_crew_module, "_prompt_for_missing_inputs", lambda _crew, inputs: inputs + ) + monkeypatch.setattr(run_crew_module, "_print_post_tui_summary", lambda _app: None) + + +def test_run_json_crew_failed_status_exits_nonzero(monkeypatch, tmp_path: Path): + monkeypatch.chdir(tmp_path) + _patch_tui_run(monkeypatch, status="failed") + + with pytest.raises(SystemExit) as exc_info: + run_crew_module._run_json_crew() + + assert exc_info.value.code == 1 + + +def test_run_json_crew_completed_status_returns_result(monkeypatch, tmp_path: Path): + monkeypatch.chdir(tmp_path) + _patch_tui_run(monkeypatch, status="completed") + + assert run_crew_module._run_json_crew() == "result" + + +def test_has_json_crew_defers_to_declared_flow_type(monkeypatch, tmp_path: Path): + """A flow project containing a stray crew.jsonc must still run as a flow.""" + monkeypatch.chdir(tmp_path) + (tmp_path / "crew.jsonc").write_text("{}") + (tmp_path / "pyproject.toml").write_text('[tool.crewai]\ntype = "flow"\n') + + assert run_crew_module._has_json_crew() is False + + +def test_has_json_crew_true_for_declared_crew_type(monkeypatch, tmp_path: Path): + monkeypatch.chdir(tmp_path) + (tmp_path / "crew.jsonc").write_text("{}") + (tmp_path / "pyproject.toml").write_text('[tool.crewai]\ntype = "crew"\n') + + assert run_crew_module._has_json_crew() is True + + +def test_has_json_crew_true_without_pyproject(monkeypatch, tmp_path: Path): + monkeypatch.chdir(tmp_path) + (tmp_path / "crew.jsonc").write_text("{}") + + assert run_crew_module._has_json_crew() is True diff --git a/lib/cli/tests/tools/test_main.py b/lib/cli/tests/tools/test_main.py index 1f26d3036..b8383cc0d 100644 --- a/lib/cli/tests/tools/test_main.py +++ b/lib/cli/tests/tools/test_main.py @@ -157,14 +157,16 @@ def test_install_api_error(mock_get, capsys, tool_command): mock_get.assert_called_once_with("error-tool") -@patch("crewai_cli.tools.main.git.Repository.fetch") -@patch("crewai_cli.tools.main.git.Repository.is_synced", return_value=False) -def test_publish_when_not_in_sync(mock_is_synced, mock_fetch, capsys, tool_command): +@patch("crewai_cli.tools.main.git.Repository") +def test_publish_when_not_in_sync(mock_repository, capsys, tool_command): + mock_repository.return_value.is_synced.return_value = False + with raises(SystemExit): tool_command.publish(is_public=True) output = capsys.readouterr().out assert "Local changes need to be resolved before publishing" in output + mock_repository.return_value.is_synced.assert_called_once_with() @patch("crewai_cli.tools.main.get_project_name", return_value="sample-tool") diff --git a/lib/crewai-files/src/crewai_files/core/sources.py b/lib/crewai-files/src/crewai_files/core/sources.py index 821a195c6..0a4204d4d 100644 --- a/lib/crewai-files/src/crewai_files/core/sources.py +++ b/lib/crewai-files/src/crewai_files/core/sources.py @@ -4,6 +4,7 @@ from __future__ import annotations from collections.abc import AsyncIterator, Iterator import inspect +import json import mimetypes from pathlib import Path from typing import Annotated, Any, BinaryIO, Protocol, cast, runtime_checkable @@ -23,6 +24,9 @@ from typing_extensions import TypeIs from crewai_files.core.constants import DEFAULT_MAX_FILE_SIZE_BYTES, MAGIC_BUFFER_SIZE +OCTET_STREAM = "application/octet-stream" + + @runtime_checkable class AsyncReadable(Protocol): """Protocol for async readable streams.""" @@ -56,13 +60,51 @@ class _AsyncReadableValidator: ValidatedAsyncReadable = Annotated[AsyncReadable, _AsyncReadableValidator()] -def _fallback_content_type(filename: str | None) -> str: - """Get content type from filename extension or return default.""" +def _detect_content_type_from_bytes(data: bytes) -> str | None: + if data.startswith(b"\x89PNG\r\n\x1a\n"): + return "image/png" + if data.startswith(b"\xff\xd8\xff"): + return "image/jpeg" + if data.startswith(b"%PDF-"): + return "application/pdf" + + try: + decoded = data.decode("utf-8") + except UnicodeDecodeError: + return None + + stripped = decoded.lstrip() + if stripped.startswith(("{", "[")): + try: + json.loads(decoded) + return "application/json" + except json.JSONDecodeError: + pass + + if "\x00" not in decoded: + return "text/plain" + + return None + + +def _fallback_content_type(filename: str | None, data: bytes | None = None) -> str: + """Get content type from filename extension, then content sniffing. + + The extension lookup runs first so specific types like ``text/csv`` or + ``application/xml`` are not degraded to generic sniffed types such as + ``text/plain``; byte sniffing only covers extensionless/unknown names. + """ if filename: mime_type, _ = mimetypes.guess_type(filename) if mime_type: return mime_type - return "application/octet-stream" + + if data: + content_type = _detect_content_type_from_bytes(data) + if content_type: + return content_type + + return OCTET_STREAM def generate_filename(content_type: str) -> str: @@ -97,9 +139,19 @@ def detect_content_type(data: bytes, filename: str | None = None) -> str: import magic result: str = magic.from_buffer(data[:MAGIC_BUFFER_SIZE], mime=True) - return result + if result != OCTET_STREAM: + return result + return _fallback_content_type(filename, data) except ImportError: - return _fallback_content_type(filename) + return _fallback_content_type(filename, data) + + +def _read_magic_header(path: Path) -> bytes | None: + try: + with path.open("rb") as file: + return file.read(MAGIC_BUFFER_SIZE) + except OSError: + return None def detect_content_type_from_path(path: Path, filename: str | None = None) -> str: @@ -115,13 +167,16 @@ def detect_content_type_from_path(path: Path, filename: str | None = None) -> st Returns: The detected MIME type. """ + fallback_filename = filename or path.name try: import magic result: str = magic.from_file(str(path), mime=True) - return result + if result != OCTET_STREAM: + return result + return _fallback_content_type(fallback_filename, _read_magic_header(path)) except ImportError: - return _fallback_content_type(filename or path.name) + return _fallback_content_type(fallback_filename, _read_magic_header(path)) class _BinaryIOValidator: diff --git a/lib/crewai-files/src/crewai_files/resolution/resolver.py b/lib/crewai-files/src/crewai_files/resolution/resolver.py index 0359c6253..d7f8e64f1 100644 --- a/lib/crewai-files/src/crewai_files/resolution/resolver.py +++ b/lib/crewai-files/src/crewai_files/resolution/resolver.py @@ -129,6 +129,20 @@ class FileResolver: """ return constraints is not None and constraints.supports_url_references + @classmethod + def _should_resolve_as_url_reference( + cls, + file: FileInput, + provider: ProviderType, + constraints: ProviderConstraints | None, + ) -> bool: + """Check if the provider can accept the current URL source directly.""" + if not cls._is_url_source(file) or not cls._supports_url(constraints): + return False + + provider_lower = provider.lower() + return "bedrock" not in provider_lower and "aws" not in provider_lower + @staticmethod def _resolve_as_url(file: FileInput) -> UrlReference: """Resolve a URL source as UrlReference. @@ -159,7 +173,7 @@ class FileResolver: """ constraints = get_constraints_for_provider(provider) - if self._is_url_source(file) and self._supports_url(constraints): + if self._should_resolve_as_url_reference(file, provider, constraints): return self._resolve_as_url(file) context = self._build_file_context(file) @@ -424,7 +438,7 @@ class FileResolver: """ constraints = get_constraints_for_provider(provider) - if self._is_url_source(file) and self._supports_url(constraints): + if self._should_resolve_as_url_reference(file, provider, constraints): return self._resolve_as_url(file) context = self._build_file_context(file) diff --git a/lib/crewai/src/crewai/agent/core.py b/lib/crewai/src/crewai/agent/core.py index 2686d66ff..ac2a2e29f 100644 --- a/lib/crewai/src/crewai/agent/core.py +++ b/lib/crewai/src/crewai/agent/core.py @@ -758,6 +758,31 @@ class Agent(BaseAgent): self._check_execution_error(e, task) return await self.aexecute_task(task, context, tools) + def message(self, content: str, **kwargs: Any) -> str: + """Send a single message and get a response. + + Creates a temporary Task + Crew, executes, and returns the raw output. + """ + from crewai.crew import Crew + from crewai.task import Task + from crewai.types.streaming import CrewStreamingOutput + + task = Task( + description=content, + expected_output="Respond to the user's message appropriately.", + agent=self, + ) + crew = Crew( + agents=[self], + tasks=[task], + verbose=self.verbose, + memory=self.memory or False, + ) + result = crew.kickoff() + if isinstance(result, CrewStreamingOutput): + return result.result.raw + return result.raw + def execute_task( self, task: Task, diff --git a/lib/crewai/src/crewai/agent/planning_config.py b/lib/crewai/src/crewai/agent/planning_config.py index 4575b6508..88868f332 100644 --- a/lib/crewai/src/crewai/agent/planning_config.py +++ b/lib/crewai/src/crewai/agent/planning_config.py @@ -1,9 +1,10 @@ from __future__ import annotations -from typing import Literal +from typing import Annotated, Literal -from pydantic import BaseModel, Field +from pydantic import BaseModel, BeforeValidator, Field +from crewai.agents.agent_builder.base_agent import _validate_llm_ref from crewai.llms.base_llm import BaseLLM @@ -69,7 +70,7 @@ class PlanningConfig(BaseModel): max_attempts=3, max_steps=10, plan_prompt="Create a focused plan for: {description}", - llm="gpt-4o-mini", + llm="gpt-5.4-mini", ), ) ``` @@ -139,7 +140,10 @@ class PlanningConfig(BaseModel): "whether to continue or replan. None means no per-step timeout." ), ) - llm: str | BaseLLM | None = Field( + llm: Annotated[ + str | BaseLLM | None, + BeforeValidator(_validate_llm_ref), + ] = Field( default=None, description="LLM to use for planning. Uses agent's LLM if None.", ) diff --git a/lib/crewai/src/crewai/agents/agent_adapters/openai_agents/openai_adapter.py b/lib/crewai/src/crewai/agents/agent_adapters/openai_agents/openai_adapter.py index 139d4dabc..33fa2d27f 100644 --- a/lib/crewai/src/crewai/agents/agent_adapters/openai_agents/openai_adapter.py +++ b/lib/crewai/src/crewai/agents/agent_adapters/openai_agents/openai_adapter.py @@ -81,7 +81,7 @@ class OpenAIAgentAdapter(BaseAgentAdapter): Raises: ImportError: If OpenAI agent dependencies are not installed. """ - self.llm = kwargs.pop("model", "gpt-4o-mini") + self.llm = kwargs.pop("model", "gpt-5.4-mini") super().__init__(**kwargs) self._tool_adapter = OpenAIAgentToolAdapter(tools=kwargs.get("tools")) self._converter_adapter = OpenAIConverterAdapter(agent_adapter=self) diff --git a/lib/crewai/src/crewai/agents/agent_builder/base_agent.py b/lib/crewai/src/crewai/agents/agent_builder/base_agent.py index b50799811..ded6bb40a 100644 --- a/lib/crewai/src/crewai/agents/agent_builder/base_agent.py +++ b/lib/crewai/src/crewai/agents/agent_builder/base_agent.py @@ -85,9 +85,28 @@ def _validate_llm_ref(value: Any) -> Any: import inspect llm_type = value.get("llm_type") - if not llm_type or llm_type not in _LLM_TYPE_REGISTRY: + if not llm_type: + model = ( + value.get("model") + or value.get("model_name") + or value.get("deployment_name") + ) + if not model: + raise ValueError( + "LLM config objects must include 'model', 'model_name', " + "or 'deployment_name', or a serialized 'llm_type'. " + f"Got keys: {list(value)}" + ) + from crewai.llm import LLM + + llm_kwargs = {**value, "model": model} + llm_kwargs.pop("model_name", None) + llm_kwargs.pop("deployment_name", None) + return LLM(**llm_kwargs) + + if llm_type not in _LLM_TYPE_REGISTRY: raise ValueError( - f"Unknown or missing llm_type: {llm_type!r}. " + f"Unknown llm_type: {llm_type!r}. " f"Expected one of {list(_LLM_TYPE_REGISTRY)}" ) dotted = _LLM_TYPE_REGISTRY[llm_type] @@ -618,7 +637,10 @@ class BaseAgent(BaseModel, ABC, metaclass=AgentMeta): if self.memory is True: from crewai.memory.unified_memory import Memory - self.memory = Memory() + memory_kwargs: dict[str, Any] = {} + if self.llm is not None: + memory_kwargs["llm"] = self.llm + self.memory = Memory(**memory_kwargs) elif self.memory is False: self.memory = None return self diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index f985da83c..92a1ce5fb 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -53,6 +53,7 @@ from crewai.types.callback import SerializableCallable from crewai.utilities.agent_utils import ( _llm_stop_words_applied, aget_llm_response, + build_text_tool_calling_fallback_message, convert_tools_to_openai_schema, enforce_rpm_limit, format_message_for_llm, @@ -64,6 +65,7 @@ from crewai.utilities.agent_utils import ( handle_unknown_error, has_reached_max_iterations, is_context_length_exceeded, + is_native_tool_calling_unsupported_error, parse_tool_call_args, process_llm_response, track_delegation_if_needed, @@ -464,6 +466,20 @@ class CrewAgentExecutor(BaseAgentExecutor): self._show_logs(formatted_answer) return formatted_answer + def _append_text_tool_calling_fallback_message(self) -> None: + """Add text tool-calling instructions after native tools are rejected.""" + if not self.tools: + return + self.messages.append( + format_message_for_llm( + build_text_tool_calling_fallback_message( + self.tools_description, + self.tools_names, + ), + role="user", + ) + ) + def _invoke_loop_native_tools(self) -> AgentFinish: """Execute agent loop using native function calling. @@ -557,6 +573,9 @@ class CrewAgentExecutor(BaseAgentExecutor): return formatted_answer except Exception as e: + if is_native_tool_calling_unsupported_error(e): + self._append_text_tool_calling_fallback_message() + return self._invoke_loop_react() if e.__class__.__module__.startswith("litellm"): raise e if is_context_length_exceeded(e): @@ -1369,6 +1388,9 @@ class CrewAgentExecutor(BaseAgentExecutor): return formatted_answer except Exception as e: + if is_native_tool_calling_unsupported_error(e): + self._append_text_tool_calling_fallback_message() + return await self._ainvoke_loop_react() if e.__class__.__module__.startswith("litellm"): raise e if is_context_length_exceeded(e): diff --git a/lib/crewai/src/crewai/agents/step_executor.py b/lib/crewai/src/crewai/agents/step_executor.py index 5fe517389..81238f473 100644 --- a/lib/crewai/src/crewai/agents/step_executor.py +++ b/lib/crewai/src/crewai/agents/step_executor.py @@ -29,14 +29,17 @@ from crewai.events.types.tool_usage_events import ( ToolUsageStartedEvent, ) from crewai.utilities.agent_utils import ( + build_text_tool_calling_fallback_message, build_tool_calls_assistant_message, check_native_tool_support, enforce_rpm_limit, execute_single_native_tool_call, extract_task_section, format_message_for_llm, + is_native_tool_calling_unsupported_error, is_tool_call_list, process_llm_response, + render_text_description_and_args, setup_native_tools, ) from crewai.utilities.i18n import I18N_DEFAULT @@ -153,6 +156,7 @@ class StepExecutor: if self._use_native_tools: result_text = self._execute_native( messages, + todo, tool_calls_made, max_step_iterations=max_step_iterations, step_timeout=step_timeout, @@ -161,6 +165,7 @@ class StepExecutor: else: result_text = self._execute_text_parsed( messages, + todo, tool_calls_made, max_step_iterations=max_step_iterations, step_timeout=step_timeout, @@ -176,6 +181,46 @@ class StepExecutor: execution_time=elapsed, ) except Exception as e: + if self._use_native_tools and is_native_tool_calling_unsupported_error(e): + try: + self._use_native_tools = False + self._openai_tools = [] + self._available_functions = {} + # Keep the conversation built so far (including any native + # tool round-trips already appended to ``messages``) and + # append the text-tooling instructions instead of + # restarting the step, so completed tool calls are not + # re-executed against a fresh context. + messages.append( + format_message_for_llm( + build_text_tool_calling_fallback_message( + render_text_description_and_args(self.tools), + ", ".join( + sanitize_tool_name(t.name) for t in self.tools + ), + ), + role="user", + ) + ) + result_text = self._execute_text_parsed( + messages, + todo, + tool_calls_made, + max_step_iterations=max_step_iterations, + step_timeout=step_timeout, + start_time=start_time, + ) + self._validate_expected_tool_usage(todo, tool_calls_made) + elapsed = time.monotonic() - start_time + return StepResult( + success=True, + result=result_text, + tool_calls_made=tool_calls_made, + execution_time=elapsed, + ) + except Exception as fallback_error: + e = fallback_error + elapsed = time.monotonic() - start_time return StepResult( success=False, @@ -272,6 +317,7 @@ class StepExecutor: def _execute_text_parsed( self, messages: list[LLMMessage], + todo: TodoItem, tool_calls_made: list[str], max_step_iterations: int = 15, step_timeout: int | None = None, @@ -310,7 +356,7 @@ class StepExecutor: if isinstance(formatted, AgentAction): tool_calls_made.append(formatted.tool) - tool_result = self._execute_text_tool_with_events(formatted) + tool_result = self._execute_text_tool_with_events(formatted, todo) last_tool_result = tool_result messages.append({"role": "assistant", "content": answer_str}) messages.append(self._build_observation_message(tool_result)) @@ -320,7 +366,9 @@ class StepExecutor: return last_tool_result - def _execute_text_tool_with_events(self, formatted: AgentAction) -> str: + def _execute_text_tool_with_events( + self, formatted: AgentAction, todo: TodoItem + ) -> str: """Execute text-parsed tool calls with tool usage events.""" args_dict = self._parse_tool_args(formatted.tool_input) agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown" @@ -333,6 +381,8 @@ class StepExecutor: from_agent=self.agent, from_task=self.task, agent_key=agent_key, + plan_step_number=todo.step_number, + plan_step_description=todo.description, ), ) @@ -368,6 +418,8 @@ class StepExecutor: from_agent=self.agent, from_task=self.task, agent_key=agent_key, + plan_step_number=todo.step_number, + plan_step_description=todo.description, error=e, ), ) @@ -382,6 +434,8 @@ class StepExecutor: from_agent=self.agent, from_task=self.task, agent_key=agent_key, + plan_step_number=todo.step_number, + plan_step_description=todo.description, started_at=started_at, finished_at=datetime.now(), ), @@ -474,6 +528,7 @@ class StepExecutor: def _execute_native( self, messages: list[LLMMessage], + todo: TodoItem, tool_calls_made: list[str], max_step_iterations: int = 15, step_timeout: int | None = None, @@ -513,7 +568,7 @@ class StepExecutor: if isinstance(answer, list) and answer and is_tool_call_list(answer): result = self._execute_native_tool_calls( - answer, messages, tool_calls_made + answer, messages, todo, tool_calls_made ) accumulated_results.append(result) continue @@ -526,6 +581,7 @@ class StepExecutor: self, tool_calls: list[Any], messages: list[LLMMessage], + todo: TodoItem, tool_calls_made: list[str], ) -> str: """Execute a batch of native tool calls and return their results. @@ -551,6 +607,8 @@ class StepExecutor: event_source=self, printer=PRINTER, verbose=bool(self.agent and self.agent.verbose), + plan_step_number=todo.step_number, + plan_step_description=todo.description, ) if call_result.func_name: diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py index 28520008e..e4444994e 100644 --- a/lib/crewai/src/crewai/crew.py +++ b/lib/crewai/src/crewai/crew.py @@ -658,7 +658,14 @@ class Crew(FlowTrackable, BaseModel): from crewai.rag.embeddings.factory import build_embedder embedder = build_embedder(cast(dict[str, Any], self.embedder)) - self._memory = Memory(embedder=embedder, root_scope=crew_root_scope) + memory_kwargs: dict[str, Any] = { + "embedder": embedder, + "root_scope": crew_root_scope, + } + memory_llm = self._memory_llm() + if memory_llm is not None: + memory_kwargs["llm"] = memory_llm + self._memory = Memory(**memory_kwargs) elif self.memory: # User passed a Memory / MemoryScope / MemorySlice instance # Respect user's configuration — don't auto-set root_scope @@ -668,6 +675,16 @@ class Crew(FlowTrackable, BaseModel): return self + def _memory_llm(self) -> str | BaseLLM | None: + """Return the LLM auto-created memory should use for analysis.""" + if self.chat_llm is not None: + return self.chat_llm + for agent in self.agents: + agent_llm: str | BaseLLM | None = getattr(agent, "llm", None) + if agent_llm is not None: + return agent_llm + return None + @model_validator(mode="after") def create_crew_knowledge(self) -> Crew: """Create the knowledge for the crew.""" diff --git a/lib/crewai/src/crewai/events/__init__.py b/lib/crewai/src/crewai/events/__init__.py index b026c451d..ce4a01a22 100644 --- a/lib/crewai/src/crewai/events/__init__.py +++ b/lib/crewai/src/crewai/events/__init__.py @@ -116,6 +116,11 @@ if TYPE_CHECKING: MemorySaveFailedEvent, MemorySaveStartedEvent, ) + from crewai.events.types.observation_events import ( + PlanStepCompletedEvent, + PlanStepEvent, + PlanStepStartedEvent, + ) from crewai.events.types.reasoning_events import ( AgentReasoningCompletedEvent, AgentReasoningFailedEvent, @@ -220,6 +225,9 @@ _LAZY_EVENT_MAPPING: dict[str, str] = { "MemorySaveCompletedEvent": "crewai.events.types.memory_events", "MemorySaveFailedEvent": "crewai.events.types.memory_events", "MemorySaveStartedEvent": "crewai.events.types.memory_events", + "PlanStepCompletedEvent": "crewai.events.types.observation_events", + "PlanStepEvent": "crewai.events.types.observation_events", + "PlanStepStartedEvent": "crewai.events.types.observation_events", "AgentReasoningCompletedEvent": "crewai.events.types.reasoning_events", "AgentReasoningFailedEvent": "crewai.events.types.reasoning_events", "AgentReasoningStartedEvent": "crewai.events.types.reasoning_events", @@ -349,6 +357,9 @@ __all__ = [ "MethodExecutionFailedEvent", "MethodExecutionFinishedEvent", "MethodExecutionStartedEvent", + "PlanStepCompletedEvent", + "PlanStepEvent", + "PlanStepStartedEvent", "ReasoningEvent", "SkillActivatedEvent", "SkillDiscoveryCompletedEvent", diff --git a/lib/crewai/src/crewai/events/event_types.py b/lib/crewai/src/crewai/events/event_types.py index dcf31cb03..f78278d50 100644 --- a/lib/crewai/src/crewai/events/event_types.py +++ b/lib/crewai/src/crewai/events/event_types.py @@ -99,6 +99,10 @@ from crewai.events.types.memory_events import ( MemorySaveFailedEvent, MemorySaveStartedEvent, ) +from crewai.events.types.observation_events import ( + PlanStepCompletedEvent, + PlanStepStartedEvent, +) from crewai.events.types.reasoning_events import ( AgentReasoningCompletedEvent, AgentReasoningFailedEvent, @@ -191,6 +195,8 @@ EventTypes = ( | MemoryRetrievalStartedEvent | MemoryRetrievalCompletedEvent | MemoryRetrievalFailedEvent + | PlanStepStartedEvent + | PlanStepCompletedEvent | MCPConnectionStartedEvent | MCPConnectionCompletedEvent | MCPConnectionFailedEvent diff --git a/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py b/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py index 4b22275c6..72bb2452e 100644 --- a/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py +++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py @@ -24,6 +24,7 @@ from crewai.events.listeners.tracing.types import TraceEvent from crewai.events.listeners.tracing.utils import ( get_user_id, is_tracing_enabled_in_context, + is_tui_mode, should_auto_collect_first_time_traces, ) from crewai.plus_api import PlusAPI @@ -74,6 +75,7 @@ class TraceBatchManager: self.defer_session_finalization: bool = False self._batch_finalized: bool = False self.backend_initialized: bool = False + self.trace_url: str | None = None self.ephemeral_trace_url: str | None = None try: self.plus_api = PlusAPI( @@ -108,7 +110,9 @@ class TraceBatchManager: self.record_start_time("execution") - if should_auto_collect_first_time_traces(): + if should_auto_collect_first_time_traces() or ( + is_tui_mode() and not is_tracing_enabled_in_context() + ): self.trace_batch_id = self.current_batch.batch_id else: self._initialize_backend_batch( @@ -411,6 +415,7 @@ class TraceBatchManager: else f"{base_url}/crewai_plus/ephemeral_trace_batches/{batch_id}?access_code={access_code}" ) + self.trace_url = return_link if is_ephemeral: self.ephemeral_trace_url = return_link @@ -428,7 +433,10 @@ class TraceBatchManager: title="Trace Batch Finalization", border_style="green", ) - if not should_auto_collect_first_time_traces(): + if ( + not should_auto_collect_first_time_traces() + and not is_tui_mode() + ): console.print(panel) return True diff --git a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py index f9d46a920..789e70e69 100644 --- a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py +++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py @@ -18,6 +18,7 @@ from crewai.events.listeners.tracing.trace_batch_manager import TraceBatchManage from crewai.events.listeners.tracing.types import TraceEvent from crewai.events.listeners.tracing.utils import ( is_tracing_enabled_in_context, + is_tui_mode, safe_serialize_to_dict, should_auto_collect_first_time_traces, should_enable_tracing, @@ -212,8 +213,8 @@ class TraceCollectionListener(BaseEventListener): not should_enable_tracing() and not is_tracing_enabled_in_context() and not should_auto_collect_first_time_traces() + and not is_tui_mode() ): - self._listeners_setup = True return self._register_flow_event_handlers(crewai_event_bus) @@ -297,6 +298,12 @@ class TraceCollectionListener(BaseEventListener): if self._nested_in_flow_execution(): return if self.batch_manager.batch_owner_type == "crew": + if is_tui_mode(): + if self.first_time_handler.is_first_time: + self.first_time_handler.mark_events_collected() + elif is_tracing_enabled_in_context() or should_enable_tracing(): + self.batch_manager.finalize_batch() + return if self.first_time_handler.is_first_time: self.first_time_handler.mark_events_collected() self.first_time_handler.handle_execution_completion() @@ -310,6 +317,12 @@ class TraceCollectionListener(BaseEventListener): return if self._nested_in_flow_execution(): return + if is_tui_mode(): + if self.first_time_handler.is_first_time: + self.first_time_handler.mark_events_collected() + elif is_tracing_enabled_in_context() or should_enable_tracing(): + self.batch_manager.finalize_batch() + return if self.first_time_handler.is_first_time: self.first_time_handler.mark_events_collected() self.first_time_handler.handle_execution_completion() diff --git a/lib/crewai/src/crewai/events/listeners/tracing/utils.py b/lib/crewai/src/crewai/events/listeners/tracing/utils.py index ee07006d5..b4a30b17f 100644 --- a/lib/crewai/src/crewai/events/listeners/tracing/utils.py +++ b/lib/crewai/src/crewai/events/listeners/tracing/utils.py @@ -42,6 +42,7 @@ __all__ = [ "is_first_execution", "is_tracing_enabled", "is_tracing_enabled_in_context", + "is_tui_mode", "mark_first_execution_completed", "mark_first_execution_done", "on_first_execution_tracing_confirmation", @@ -50,6 +51,7 @@ __all__ = [ "safe_serialize_to_dict", "set_suppress_tracing_messages", "set_tracing_enabled", + "set_tui_mode", "should_auto_collect_first_time_traces", "should_enable_tracing", "should_suppress_tracing_messages", @@ -71,6 +73,16 @@ _suppress_tracing_messages: ContextVar[bool] = ContextVar( "_suppress_tracing_messages", default=False ) +_tui_mode: ContextVar[bool] = ContextVar("_tui_mode", default=False) + + +def set_tui_mode(enabled: bool) -> object: + return _tui_mode.set(enabled) + + +def is_tui_mode() -> bool: + return _tui_mode.get() + def set_suppress_tracing_messages(suppress: bool) -> object: """Set whether to suppress tracing-related console messages. diff --git a/lib/crewai/src/crewai/events/types/observation_events.py b/lib/crewai/src/crewai/events/types/observation_events.py index beac6d235..5f1ef6ea0 100644 --- a/lib/crewai/src/crewai/events/types/observation_events.py +++ b/lib/crewai/src/crewai/events/types/observation_events.py @@ -26,6 +26,38 @@ class ObservationEvent(BaseEvent): self._set_agent_params(data) +class PlanStepEvent(BaseEvent): + """Base event for authoritative plan step lifecycle updates.""" + + type: str + agent_role: str + step_number: int + step_description: str = "" + tool_to_use: str | None = None + from_task: Any | None = None + from_agent: Any | None = None + + def __init__(self, **data: Any) -> None: + super().__init__(**data) + self._set_task_params(data) + self._set_agent_params(data) + + +class PlanStepStartedEvent(PlanStepEvent): + """Emitted when a concrete plan step starts executing.""" + + type: Literal["plan_step_started"] = "plan_step_started" + + +class PlanStepCompletedEvent(PlanStepEvent): + """Emitted when a concrete plan step reaches a terminal state.""" + + type: Literal["plan_step_completed"] = "plan_step_completed" + success: bool = True + result: str | None = None + error: str | None = None + + class StepObservationStartedEvent(ObservationEvent): """Emitted when the Planner begins observing a step's result. diff --git a/lib/crewai/src/crewai/events/types/tool_usage_events.py b/lib/crewai/src/crewai/events/types/tool_usage_events.py index e049a86ed..86e0a3087 100644 --- a/lib/crewai/src/crewai/events/types/tool_usage_events.py +++ b/lib/crewai/src/crewai/events/types/tool_usage_events.py @@ -21,6 +21,8 @@ class ToolUsageEvent(BaseEvent): agent: Any | None = None task_name: str | None = None task_id: str | None = None + plan_step_number: int | None = None + plan_step_description: str | None = None from_task: Any | None = None from_agent: Any | None = None diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index 5628ac3c0..c026c7509 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -46,6 +46,8 @@ from crewai.events.types.observation_events import ( GoalAchievedEarlyEvent, PlanRefinementEvent, PlanReplanTriggeredEvent, + PlanStepCompletedEvent, + PlanStepStartedEvent, ) from crewai.events.types.tool_usage_events import ( ToolUsageErrorEvent, @@ -73,6 +75,7 @@ from crewai.tools.base_tool import BaseTool from crewai.tools.structured_tool import CrewStructuredTool from crewai.utilities.agent_utils import ( _llm_stop_words_applied, + build_text_tool_calling_fallback_message, check_native_tool_support, enforce_rpm_limit, extract_tool_call_info, @@ -86,6 +89,7 @@ from crewai.utilities.agent_utils import ( has_reached_max_iterations, is_context_length_exceeded, is_inside_event_loop, + is_native_tool_calling_unsupported_error, is_tool_call_list, parse_tool_call_args, process_llm_response, @@ -241,6 +245,23 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): self._tool_name_mapping, ) = setup_native_tools(self.original_tools) + def _downgrade_to_text_tool_calling(self) -> None: + """Switch a running execution from native tools to text tool calls.""" + self.state.use_native_tools = False + self.state.pending_tool_calls.clear() + self._openai_tools = [] + self._available_functions = {} + if self.tools: + self.state.messages.append( + format_message_for_llm( + build_text_tool_calling_fallback_message( + self.tools_description, + self.tools_names, + ), + role="user", + ) + ) + def _is_tool_call_list(self, response: list[Any]) -> bool: """Check if a response is a list of tool calls.""" return is_tool_call_list(response) @@ -349,6 +370,84 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): self.state.todos = TodoList(items=todos) + def _emit_plan_step_started(self, todo: TodoItem) -> None: + try: + crewai_event_bus.emit( + self.agent, + event=PlanStepStartedEvent( + agent_role=self.agent.role, + step_number=todo.step_number, + step_description=todo.description, + tool_to_use=todo.tool_to_use, + from_task=self.task, + from_agent=self.agent, + ), + ) + except Exception: # noqa: S110 + pass + + def _emit_plan_step_completed( + self, + todo: TodoItem, + *, + success: bool, + result: str | None = None, + error: str | None = None, + ) -> None: + try: + crewai_event_bus.emit( + self.agent, + event=PlanStepCompletedEvent( + agent_role=self.agent.role, + step_number=todo.step_number, + step_description=todo.description, + tool_to_use=todo.tool_to_use, + success=success, + result=result, + error=error, + from_task=self.task, + from_agent=self.agent, + ), + ) + except Exception: # noqa: S110 + pass + + def _mark_todo_running(self, todo: TodoItem) -> None: + previous_status = todo.status + self.state.todos.mark_running(todo.step_number) + if previous_status != "running": + self._emit_plan_step_started(todo) + + def _mark_todo_completed( + self, + step_number: int, + result: str | None = None, + ) -> None: + todo = self.state.todos.get_by_step_number(step_number) + previous_status = todo.status if todo else None + self.state.todos.mark_completed(step_number, result=result) + todo = self.state.todos.get_by_step_number(step_number) + if todo and previous_status != "completed": + self._emit_plan_step_completed(todo, success=True, result=result) + + def _mark_todo_failed( + self, + step_number: int, + result: str | None = None, + error: str | None = None, + ) -> None: + todo = self.state.todos.get_by_step_number(step_number) + previous_status = todo.status if todo else None + self.state.todos.mark_failed(step_number, result=result) + todo = self.state.todos.get_by_step_number(step_number) + if todo and previous_status != "failed": + self._emit_plan_step_completed( + todo, + success=False, + result=result, + error=error, + ) + def _ensure_step_executor(self) -> Any: """Lazily create the StepExecutor (avoids circular imports).""" if self._step_executor is None: @@ -597,8 +696,10 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): and not observation.step_completed_successfully and observation.needs_full_replan ): - self.state.todos.mark_failed( - current_todo.step_number, result=current_todo.result + self._mark_todo_failed( + current_todo.step_number, + result=current_todo.result, + error=observation.replan_reason, ) if self.agent.verbose: PRINTER.print( @@ -614,8 +715,9 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): return "replan_now" if observation and not observation.step_completed_successfully: - self.state.todos.mark_failed( - current_todo.step_number, result=current_todo.result + self._mark_todo_failed( + current_todo.step_number, + result=current_todo.result, ) if self.agent.verbose: failed = len(self.state.todos.get_failed_todos()) @@ -629,9 +731,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): ) return "continue_plan" - self.state.todos.mark_completed( - current_todo.step_number, result=current_todo.result - ) + self._mark_todo_completed(current_todo.step_number, result=current_todo.result) if self.agent.verbose: completed = self.state.todos.completed_count @@ -661,7 +761,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # If observation is missing or step succeeded — continue if not observation or observation.step_completed_successfully: - self.state.todos.mark_completed( + self._mark_todo_completed( current_todo.step_number, result=current_todo.result ) if self.agent.verbose: @@ -676,8 +776,10 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # Step failed — only replan if observer explicitly requires it, # otherwise mark done and continue (same gate as low-effort). if observation.needs_full_replan: - self.state.todos.mark_failed( - current_todo.step_number, result=current_todo.result + self._mark_todo_failed( + current_todo.step_number, + result=current_todo.result, + error=observation.replan_reason, ) if self.agent.verbose: PRINTER.print( @@ -694,9 +796,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # Step failed but observer does not require a full replan — mark as # failed (not completed) so get_failed_todos() tracks it correctly. - self.state.todos.mark_failed( - current_todo.step_number, result=current_todo.result - ) + self._mark_todo_failed(current_todo.step_number, result=current_todo.result) if self.agent.verbose: failed = len(self.state.todos.get_failed_todos()) total = len(self.state.todos.items) @@ -731,12 +831,12 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): observation = self.state.observations.get(current_todo.step_number) if not observation: # No observation available — default to continue - self.state.todos.mark_completed(current_todo.step_number) + self._mark_todo_completed(current_todo.step_number) return "continue_plan" # Goal already achieved — early termination if observation.goal_already_achieved: - self.state.todos.mark_completed( + self._mark_todo_completed( current_todo.step_number, result=current_todo.result ) if self.agent.verbose: @@ -748,8 +848,10 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # Full replan needed if observation.needs_full_replan: - self.state.todos.mark_failed( - current_todo.step_number, result=current_todo.result + self._mark_todo_failed( + current_todo.step_number, + result=current_todo.result, + error=observation.replan_reason, ) if self.agent.verbose: PRINTER.print( @@ -761,9 +863,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # Step failed — also trigger replan if not observation.step_completed_successfully: - self.state.todos.mark_failed( - current_todo.step_number, result=current_todo.result - ) + self._mark_todo_failed(current_todo.step_number, result=current_todo.result) if self.agent.verbose: PRINTER.print( content="[Decide] Step failed — triggering replan", @@ -773,7 +873,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): return "replan_now" if observation.remaining_plan_still_valid and observation.suggested_refinements: - self.state.todos.mark_completed( + self._mark_todo_completed( current_todo.step_number, result=current_todo.result ) if self.agent.verbose: @@ -783,9 +883,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): ) return "refine_and_continue" - self.state.todos.mark_completed( - current_todo.step_number, result=current_todo.result - ) + self._mark_todo_completed(current_todo.step_number, result=current_todo.result) if self.agent.verbose: completed = self.state.todos.completed_count total = len(self.state.todos.items) @@ -961,7 +1059,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): return "needs_replan" if len(ready) == 1: - self.state.todos.mark_running(ready[0].step_number) + self._mark_todo_running(ready[0]) return "single_todo_ready" return "multiple_todos_ready" @@ -1099,7 +1197,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # Mark all ready todos as running for todo in ready: - self.state.todos.mark_running(todo.step_number) + self._mark_todo_running(todo) # Build context and executor for each todo, then run in parallel async def _run_step(todo: TodoItem) -> tuple[TodoItem, object]: @@ -1127,7 +1225,11 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): if isinstance(item, BaseException): error_msg = f"Error: {item!s}" todo.result = error_msg - self.state.todos.mark_failed(todo.step_number, result=error_msg) + self._mark_todo_failed( + todo.step_number, + result=error_msg, + error=error_msg, + ) if self.agent.verbose: PRINTER.print( content=f"Todo {todo.step_number} failed: {error_msg}", @@ -1197,9 +1299,9 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # Mark based on observation result if observation.step_completed_successfully: - self.state.todos.mark_completed(todo.step_number, result=todo.result) + self._mark_todo_completed(todo.step_number, result=todo.result) else: - self.state.todos.mark_failed(todo.step_number, result=todo.result) + self._mark_todo_failed(todo.step_number, result=todo.result) if self.agent.verbose: PRINTER.print( @@ -1349,7 +1451,11 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): def call_llm_native_tools( self, ) -> Literal[ - "native_tool_calls", "native_finished", "context_error", "todo_satisfied" + "native_tool_calls", + "native_finished", + "context_error", + "todo_satisfied", + "continue_reasoning", ]: """Execute LLM call with native function calling. @@ -1428,6 +1534,9 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): return self._route_finish_with_todos("native_finished") except Exception as e: + if is_native_tool_calling_unsupported_error(e): + self._downgrade_to_text_tool_calling() + return "continue_reasoning" if is_context_length_exceeded(e): self._last_context_error = e return "context_error" @@ -2085,7 +2194,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): step_number: The step number to mark. result: The result of the todo. """ - self.state.todos.mark_completed(step_number, result=result) + self._mark_todo_completed(step_number, result=result) if self.agent.verbose: completed = self.state.todos.completed_count diff --git a/lib/crewai/src/crewai/flow/async_feedback/__init__.py b/lib/crewai/src/crewai/flow/async_feedback/__init__.py index 02590a785..471279977 100644 --- a/lib/crewai/src/crewai/flow/async_feedback/__init__.py +++ b/lib/crewai/src/crewai/flow/async_feedback/__init__.py @@ -20,7 +20,7 @@ Example: @human_feedback( message="Review this:", emit=["approved", "rejected"], - llm="gpt-4o-mini", + llm="gpt-5.4-mini", provider=SlackProvider(), ) def review(self): diff --git a/lib/crewai/src/crewai/flow/async_feedback/types.py b/lib/crewai/src/crewai/flow/async_feedback/types.py index 911624cd9..d8c834fb1 100644 --- a/lib/crewai/src/crewai/flow/async_feedback/types.py +++ b/lib/crewai/src/crewai/flow/async_feedback/types.py @@ -47,7 +47,7 @@ class PendingFeedbackContext: method_output={"title": "Draft", "body": "..."}, message="Please review and approve or reject:", emit=["approved", "rejected"], - llm="gpt-4o-mini", + llm="gpt-5.4-mini", ) ``` """ diff --git a/lib/crewai/src/crewai/flow/dsl/_human_feedback.py b/lib/crewai/src/crewai/flow/dsl/_human_feedback.py index e1317b23d..1236ba6f7 100644 --- a/lib/crewai/src/crewai/flow/dsl/_human_feedback.py +++ b/lib/crewai/src/crewai/flow/dsl/_human_feedback.py @@ -23,7 +23,7 @@ __all__ = ["HumanFeedbackResult", "human_feedback"] def human_feedback( message: str, emit: Sequence[str] | None = None, - llm: str | BaseLLM | None = "gpt-4o-mini", + llm: str | BaseLLM | None = "gpt-5.4-mini", default_outcome: str | None = None, metadata: dict[str, Any] | None = None, provider: HumanFeedbackProvider | None = None, diff --git a/lib/crewai/src/crewai/flow/human_feedback.py b/lib/crewai/src/crewai/flow/human_feedback.py index c3a4a203f..a1fabd658 100644 --- a/lib/crewai/src/crewai/flow/human_feedback.py +++ b/lib/crewai/src/crewai/flow/human_feedback.py @@ -20,7 +20,7 @@ Example (synchronous, default): @human_feedback( message="Please review this content:", emit=["approved", "rejected"], - llm="gpt-4o-mini", + llm="gpt-5.4-mini", ) def generate_content(self): return {"title": "Article", "body": "Content..."} @@ -48,7 +48,7 @@ Example (asynchronous with custom provider): @human_feedback( message="Review this:", emit=["approved", "rejected"], - llm="gpt-4o-mini", + llm="gpt-5.4-mini", provider=SlackProvider(), ) def generate_content(self): @@ -173,7 +173,7 @@ class HumanFeedbackConfig: message: str emit: Sequence[str] | None = None - llm: str | BaseLLM | None = "gpt-4o-mini" + llm: str | BaseLLM | None = "gpt-5.4-mini" default_outcome: str | None = None metadata: dict[str, Any] | None = None provider: HumanFeedbackProvider | None = None @@ -212,7 +212,7 @@ def _validate_human_feedback_options( if not llm: raise ValueError( "llm is required when emit is specified. " - "Provide an LLM model string (e.g., 'gpt-4o-mini') or a BaseLLM instance. " + "Provide an LLM model string (e.g., 'gpt-5.4-mini') or a BaseLLM instance. " "See the CrewAI Human-in-the-Loop (HITL) documentation for more information: " "https://docs.crewai.com/en/learn/human-feedback-in-flows" ) @@ -235,12 +235,12 @@ def _resolve_llm_instance(llm: Any) -> Any: from crewai.llm import LLM if llm is None: - return LLM(model="gpt-4o-mini") + return LLM(model="gpt-5.4-mini") if isinstance(llm, str): return LLM(model=llm) if isinstance(llm, dict): deserialized = _deserialize_llm_from_context(llm) - return deserialized if deserialized is not None else LLM(model="gpt-4o-mini") + return deserialized if deserialized is not None else LLM(model="gpt-5.4-mini") return llm # already a BaseLLM instance @@ -362,7 +362,7 @@ def _distill_and_store_lessons( def human_feedback( message: str, emit: Sequence[str] | None = None, - llm: str | BaseLLM | None = "gpt-4o-mini", + llm: str | BaseLLM | None = "gpt-5.4-mini", default_outcome: str | None = None, metadata: dict[str, Any] | None = None, provider: HumanFeedbackProvider | None = None, diff --git a/lib/crewai/src/crewai/lite_agent.py b/lib/crewai/src/crewai/lite_agent.py index e2986a0da..bc79c590c 100644 --- a/lib/crewai/src/crewai/lite_agent.py +++ b/lib/crewai/src/crewai/lite_agent.py @@ -390,7 +390,10 @@ class LiteAgent(FlowTrackable, BaseModel): if self.memory is True: from crewai.memory.unified_memory import Memory - object.__setattr__(self, "_memory", Memory()) + memory_kwargs: dict[str, Any] = {} + if self.llm is not None: + memory_kwargs["llm"] = self.llm + object.__setattr__(self, "_memory", Memory(**memory_kwargs)) elif self.memory is not None and self.memory is not False: object.__setattr__(self, "_memory", self.memory) else: diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index af5dff68e..153bbd2d7 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -68,7 +68,17 @@ if TYPE_CHECKING: from crewai.tools.base_tool import BaseTool from crewai.utilities.types import LLMMessage -try: +load_dotenv() +logger = logging.getLogger(__name__) + +# litellm is lazy-loaded to avoid its module-level dotenv.load_dotenv() +# from polluting env vars (e.g. MODEL= overriding embedder model_name). +# The TYPE_CHECKING imports give mypy the real types; at runtime the names +# stay None until _ensure_litellm() rebinds them. +_litellm_loaded = False +LITELLM_AVAILABLE = False + +if TYPE_CHECKING: import litellm from litellm.litellm_core_utils.get_supported_openai_params import ( get_supported_openai_params, @@ -85,28 +95,70 @@ try: StreamingChoices as LiteLLMStreamingChoices, ) from litellm.utils import supports_response_schema - - LITELLM_AVAILABLE = True -except ImportError: - LITELLM_AVAILABLE = False - litellm = None # type: ignore[assignment] - Choices = None # type: ignore[assignment, misc] - LiteLLMDelta = None # type: ignore[assignment, misc] - Message = None # type: ignore[assignment, misc] - ModelResponseBase = None # type: ignore[assignment, misc] - ModelResponseStream = None # type: ignore[assignment, misc] - LiteLLMStreamingChoices = None # type: ignore[assignment, misc] - get_supported_openai_params = None # type: ignore[assignment] - ChatCompletionDeltaToolCall = None # type: ignore[assignment, misc] - Function = None # type: ignore[assignment, misc] - ModelResponse = None # type: ignore[assignment, misc] - supports_response_schema = None # type: ignore[assignment] +else: + litellm = None + Choices = None + LiteLLMDelta = None + Message = None + ModelResponseBase = None + ModelResponseStream = None + LiteLLMStreamingChoices = None + get_supported_openai_params = None + ChatCompletionDeltaToolCall = None + Function = None + ModelResponse = None + supports_response_schema = None -load_dotenv() -logger = logging.getLogger(__name__) -if LITELLM_AVAILABLE: - litellm.suppress_debug_info = True +def _ensure_litellm() -> bool: + """Lazy-load litellm on first use. Returns True if available.""" + global _litellm_loaded, LITELLM_AVAILABLE + global litellm, Choices, LiteLLMDelta, Message, ModelResponseBase + global ModelResponseStream, LiteLLMStreamingChoices, get_supported_openai_params + global ChatCompletionDeltaToolCall, Function + global ModelResponse, supports_response_schema + + if _litellm_loaded: + return LITELLM_AVAILABLE + _litellm_loaded = True + + try: + import litellm as _litellm + from litellm.litellm_core_utils.get_supported_openai_params import ( + get_supported_openai_params as _get_supported_openai_params, + ) + from litellm.types.utils import ( + ChatCompletionDeltaToolCall as _ChatCompletionDeltaToolCall, + Choices as _Choices, + Delta as _LiteLLMDelta, + Function as _Function, + Message as _Message, + ModelResponse as _ModelResponse, + ModelResponseBase as _ModelResponseBase, + ModelResponseStream as _ModelResponseStream, + StreamingChoices as _LiteLLMStreamingChoices, + ) + from litellm.utils import supports_response_schema as _supports_response_schema + + litellm = _litellm + Choices = _Choices # type: ignore[misc] + LiteLLMDelta = _LiteLLMDelta # type: ignore[misc] + Message = _Message # type: ignore[misc] + ModelResponseBase = _ModelResponseBase # type: ignore[misc] + ModelResponseStream = _ModelResponseStream # type: ignore[misc] + LiteLLMStreamingChoices = _LiteLLMStreamingChoices # type: ignore[misc] + get_supported_openai_params = _get_supported_openai_params + ChatCompletionDeltaToolCall = _ChatCompletionDeltaToolCall # type: ignore[misc] + Function = _Function # type: ignore[misc] + ModelResponse = _ModelResponse # type: ignore[misc] + supports_response_schema = _supports_response_schema + + _litellm.suppress_debug_info = True + LITELLM_AVAILABLE = True + except ImportError: + LITELLM_AVAILABLE = False + + return LITELLM_AVAILABLE MIN_CONTEXT: Final[int] = 1024 @@ -117,6 +169,7 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { "gpt-4": 8192, "gpt-4o": 128000, "gpt-4o-mini": 200000, + "gpt-5.4-mini": 200000, "gpt-4-turbo": 128000, "gpt-4.1": 1047576, # Based on official docs "gpt-4.1-mini-2025-04-14": 1047576, @@ -411,7 +464,8 @@ class LLM(BaseLLM): except Exception as e: raise ImportError(f"Error importing native provider: {e}") from e - if not LITELLM_AVAILABLE: + # FALLBACK to LiteLLM — lazy-load on first use + if not _ensure_litellm(): native_list = ", ".join(SUPPORTED_NATIVE_PROVIDERS) error_msg = ( f"Unable to initialize LLM with model '{model}'. " @@ -632,7 +686,7 @@ class LLM(BaseLLM): @model_validator(mode="after") def _init_litellm(self) -> LLM: self.is_litellm = True - if LITELLM_AVAILABLE: + if _ensure_litellm(): litellm.drop_params = True self.set_callbacks(self.callbacks or []) self.set_env_callbacks() @@ -2290,7 +2344,8 @@ class LLM(BaseLLM): Note: This validation only applies to the litellm fallback path. Native providers have their own validation. """ - if not LITELLM_AVAILABLE or supports_response_schema is None: + if not _ensure_litellm() or supports_response_schema is None: + # When litellm is not available, skip validation # (this path should only be reached for litellm fallback models) return @@ -2310,7 +2365,7 @@ class LLM(BaseLLM): Note: This method is only used by the litellm fallback path. Native providers override this method with their own implementation. """ - if not LITELLM_AVAILABLE: + if not _ensure_litellm(): # When litellm is not available, assume function calling is supported # (all modern models support it) return True @@ -2334,7 +2389,7 @@ class LLM(BaseLLM): if "gpt-5" in model_lower: return False - if not LITELLM_AVAILABLE or get_supported_openai_params is None: + if not _ensure_litellm() or get_supported_openai_params is None: # When litellm is not available, assume stop words are supported return True @@ -2382,7 +2437,8 @@ class LLM(BaseLLM): Note: This only affects the litellm fallback path. Native providers don't use litellm callbacks - they emit events via base_llm.py. """ - if not LITELLM_AVAILABLE: + if not _ensure_litellm(): + # When litellm is not available, callbacks are still stored # but not registered with litellm globals return @@ -2420,7 +2476,8 @@ class LLM(BaseLLM): This will set `litellm.success_callback` to ["langfuse", "langsmith"] and `litellm.failure_callback` to ["langfuse"]. """ - if not LITELLM_AVAILABLE: + if not _ensure_litellm(): + # When litellm is not available, env callbacks have no effect return with suppress_warnings(): diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index 618ed5811..579ca5eba 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -1300,6 +1300,7 @@ class AzureCompletion(BaseLLM): "gpt-4": 8192, "gpt-4o": 128000, "gpt-4o-mini": 200000, + "gpt-5.4-mini": 200000, "gpt-4-turbo": 128000, "gpt-35-turbo": 16385, "gpt-3.5-turbo": 16385, diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py index 4a610423c..d8972e1de 100644 --- a/lib/crewai/src/crewai/llms/providers/openai/completion.py +++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py @@ -2406,6 +2406,7 @@ class OpenAICompletion(BaseLLM): "gpt-4": 8192, "gpt-4o": 128000, "gpt-4o-mini": 200000, + "gpt-5.4-mini": 200000, "gpt-4-turbo": 128000, "gpt-4.1": 1047576, "gpt-4.1-mini-2025-04-14": 1047576, diff --git a/lib/crewai/src/crewai/memory/storage/backend.py b/lib/crewai/src/crewai/memory/storage/backend.py index 147b9e229..b9a9a5546 100644 --- a/lib/crewai/src/crewai/memory/storage/backend.py +++ b/lib/crewai/src/crewai/memory/storage/backend.py @@ -8,6 +8,39 @@ from typing import Any, Protocol, runtime_checkable from crewai.memory.types import MemoryRecord, ScopeInfo +class EmbeddingDimensionMismatchError(ValueError): + """Raised when an embedding's dimensionality doesn't match the existing store. + + The most common cause is upgrading CrewAI across the default-embedder + change (text-embedding-3-small, 1536 dims → text-embedding-3-large, + 3072 dims) while keeping a local memory store created before the upgrade. + + Deliberately not a ``RuntimeError``: background-save plumbing treats + ``RuntimeError`` as interpreter/executor shutdown and silently drops the + save, which would swallow this actionable migration error. + """ + + def __init__(self, stored_dim: int, new_dim: int) -> None: + self.stored_dim = stored_dim + self.new_dim = new_dim + super().__init__( + f"Embedding dimension mismatch: this memory store contains " + f"{stored_dim}-dimensional vectors, but the current embedder produced " + f"a {new_dim}-dimensional vector.\n\n" + "This usually means the store was created with a different embedding " + "model. CrewAI's default embedder changed from " + "text-embedding-3-small (1536 dims) to text-embedding-3-large " + "(3072 dims), so memory stores created before the upgrade are " + "incompatible with the new default.\n\n" + "To fix, do one of the following:\n" + " - Reset local memory so it is rebuilt with the new embedder:\n" + " crewai reset-memories --memory (or crew.reset_memories())\n" + " - Keep existing memories by pinning the previous embedder:\n" + ' embedder={"provider": "openai", ' + '"config": {"model": "text-embedding-3-small"}}' + ) + + @runtime_checkable class StorageBackend(Protocol): """Protocol for pluggable memory storage backends.""" diff --git a/lib/crewai/src/crewai/memory/storage/lancedb_storage.py b/lib/crewai/src/crewai/memory/storage/lancedb_storage.py index 4e88e967c..8c9d640a5 100644 --- a/lib/crewai/src/crewai/memory/storage/lancedb_storage.py +++ b/lib/crewai/src/crewai/memory/storage/lancedb_storage.py @@ -15,15 +15,16 @@ from typing import Any from crewai_core.lock_store import lock as store_lock import lancedb # type: ignore[import-untyped] +from crewai.memory.storage.backend import EmbeddingDimensionMismatchError from crewai.memory.types import MemoryRecord, ScopeInfo _logger = logging.getLogger(__name__) -# Default embedding vector dimensionality (matches OpenAI text-embedding-3-small). +# Default embedding vector dimensionality (matches OpenAI text-embedding-3-large). # Used when creating new tables and for zero-vector placeholder scans. # Callers can override via the ``vector_dim`` constructor parameter. -DEFAULT_VECTOR_DIM = 1536 +DEFAULT_VECTOR_DIM = 3072 # Safety cap on the number of rows returned by a single scan query. # Prevents unbounded memory use when scanning large tables for scope info, @@ -288,13 +289,19 @@ class LanceDBStorage: def save(self, records: list[MemoryRecord]) -> None: if not records: return - # Auto-detect dimension from the first real embedding. + # Auto-detect dimension from the first real embedding and validate + # the whole batch against it — a silent mismatch would otherwise be + # zero-filled below and corrupt search results. dim = None for r in records: if r.embedding and len(r.embedding) > 0: - dim = len(r.embedding) - break + if dim is None: + dim = len(r.embedding) + elif len(r.embedding) != dim: + raise EmbeddingDimensionMismatchError(dim, len(r.embedding)) is_new_table = self._table is None + if not is_new_table and dim and self._vector_dim and dim != self._vector_dim: + raise EmbeddingDimensionMismatchError(self._vector_dim, dim) with store_lock(self._lock_name): self._ensure_table(vector_dim=dim) rows = [self._record_to_row(rec) for rec in records] @@ -311,6 +318,15 @@ class LanceDBStorage: def update(self, record: MemoryRecord) -> None: """Update a record by ID. Preserves created_at, updates last_accessed.""" + if ( + self._table is not None + and record.embedding + and self._vector_dim + and len(record.embedding) != self._vector_dim + ): + raise EmbeddingDimensionMismatchError( + self._vector_dim, len(record.embedding) + ) with store_lock(self._lock_name): self._ensure_table() safe_id = str(record.id).replace("'", "''") @@ -363,6 +379,10 @@ class LanceDBStorage: ) -> list[tuple[MemoryRecord, float]]: if self._table is None: return [] + if self._vector_dim and len(query_embedding) != self._vector_dim: + raise EmbeddingDimensionMismatchError( + self._vector_dim, len(query_embedding) + ) query = self._table.search(query_embedding) if scope_prefix is not None and scope_prefix.strip("/"): prefix = scope_prefix.rstrip("/") diff --git a/lib/crewai/src/crewai/memory/storage/qdrant_edge_storage.py b/lib/crewai/src/crewai/memory/storage/qdrant_edge_storage.py index d819094e9..1fdacb47d 100644 --- a/lib/crewai/src/crewai/memory/storage/qdrant_edge_storage.py +++ b/lib/crewai/src/crewai/memory/storage/qdrant_edge_storage.py @@ -36,6 +36,7 @@ from qdrant_edge import ( UpdateOperation, ) +from crewai.memory.storage.backend import EmbeddingDimensionMismatchError from crewai.memory.types import MemoryRecord, ScopeInfo @@ -43,7 +44,7 @@ _logger = logging.getLogger(__name__) VECTOR_NAME: Final[str] = "memory" -DEFAULT_VECTOR_DIM: Final[int] = 1536 +DEFAULT_VECTOR_DIM: Final[int] = 3072 _SCROLL_BATCH: Final[int] = 256 @@ -183,6 +184,10 @@ class QdrantEdgeStorage: except Exception: _logger.debug("Index creation failed (may already exist)", exc_info=True) + def _has_existing_data(self) -> bool: + """True when either shard already holds persisted records.""" + return self._local_has_data or self._central_path.exists() + def _record_to_point(self, record: MemoryRecord) -> Point: """Convert a MemoryRecord to a Qdrant Point.""" return Point( @@ -277,11 +282,19 @@ class QdrantEdgeStorage: if not records: return + # Validate the batch is internally consistent before touching the + # store-level dimension. + batch_dim = 0 + for r in records: + if r.embedding and len(r.embedding) > 0: + if batch_dim == 0: + batch_dim = len(r.embedding) + elif len(r.embedding) != batch_dim: + raise EmbeddingDimensionMismatchError(batch_dim, len(r.embedding)) if self._vector_dim == 0: - for r in records: - if r.embedding and len(r.embedding) > 0: - self._vector_dim = len(r.embedding) - break + self._vector_dim = batch_dim + elif batch_dim and batch_dim != self._vector_dim and self._has_existing_data(): + raise EmbeddingDimensionMismatchError(self._vector_dim, batch_dim) if self._config is None and self._vector_dim > 0: self._config = self._build_config(self._vector_dim) if self._config is None: @@ -308,6 +321,14 @@ class QdrantEdgeStorage: min_score: float = 0.0, ) -> list[tuple[MemoryRecord, float]]: """Search both central and local shards, merge results.""" + if ( + self._vector_dim + and len(query_embedding) != self._vector_dim + and self._has_existing_data() + ): + raise EmbeddingDimensionMismatchError( + self._vector_dim, len(query_embedding) + ) filt = self._build_scope_filter(scope_prefix) fetch_limit = limit * 3 if (categories or metadata_filter) else limit all_scored: list[tuple[dict[str, Any], float, bool]] = [] @@ -466,6 +487,16 @@ class QdrantEdgeStorage: def update(self, record: MemoryRecord) -> None: """Update a record by upserting with the same point ID.""" + if ( + self._config is not None + and record.embedding + and self._vector_dim + and len(record.embedding) != self._vector_dim + and self._has_existing_data() + ): + raise EmbeddingDimensionMismatchError( + self._vector_dim, len(record.embedding) + ) if self._config is None: if record.embedding and len(record.embedding) > 0: self._vector_dim = len(record.embedding) diff --git a/lib/crewai/src/crewai/memory/unified_memory.py b/lib/crewai/src/crewai/memory/unified_memory.py index 75191b203..c6f75d14c 100644 --- a/lib/crewai/src/crewai/memory/unified_memory.py +++ b/lib/crewai/src/crewai/memory/unified_memory.py @@ -66,7 +66,7 @@ class Memory(BaseModel): memory_kind: Literal["memory"] = "memory" llm: Annotated[BaseLLM | str, PlainValidator(_passthrough)] = Field( - default="gpt-4o-mini", + default="gpt-5.4-mini", description="LLM for analysis (model name or BaseLLM instance).", ) storage: Annotated[StorageBackend | str, PlainValidator(_passthrough)] = Field( @@ -239,7 +239,7 @@ class Memory(BaseModel): raise RuntimeError( f"Memory requires an LLM for analysis but initialization failed: {e}\n\n" "To fix this, do one of the following:\n" - " - Set OPENAI_API_KEY for the default model (gpt-4o-mini)\n" + " - Set OPENAI_API_KEY for the default model (gpt-5.4-mini)\n" ' - Pass a different model: Memory(llm="anthropic/claude-3-haiku-20240307")\n' ' - Pass any LLM instance: Memory(llm=LLM(model="your-model"))\n' " - To skip LLM analysis, pass all fields explicitly to remember()\n" @@ -261,7 +261,7 @@ class Memory(BaseModel): raise RuntimeError( f"Memory requires an embedder for vector search but initialization failed: {e}\n\n" "To fix this, do one of the following:\n" - " - Set OPENAI_API_KEY for the default embedder (text-embedding-3-small)\n" + " - Set OPENAI_API_KEY for the default embedder (text-embedding-3-large)\n" ' - Pass a different embedder: Memory(embedder={{"provider": "google", "config": {{...}}}})\n' " - Pass a callable: Memory(embedder=my_embedding_function)\n\n" f"Docs: {self._MEMORY_DOCS_URL}" @@ -322,12 +322,16 @@ class Memory(BaseModel): """Block until all pending background saves have completed. Called automatically by ``recall()`` and should be called by the - crew at shutdown to ensure no saves are lost. + crew at shutdown to ensure no saves are lost. Background save failures + are already reported through ``MemorySaveFailedEvent`` and should not + fail the task, crew, or flow that produced the output. """ with self._pending_lock: pending = list(self._pending_saves) for future in pending: - future.result() # blocks until done; re-raises exceptions + if future.cancelled(): + continue + future.exception() # blocks until done without re-raising failures def close(self) -> None: """Drain pending saves, flush storage, and shut down the background thread pool.""" @@ -605,12 +609,16 @@ class Memory(BaseModel): root_scope, ) elapsed_ms = (time.perf_counter() - start) * 1000 - except RuntimeError: + except RuntimeError as e: # The encoding pipeline uses asyncio.run() -> to_thread() internally. # If the process is shutting down, the default executor is closed and # to_thread raises "cannot schedule new futures after shutdown". # Silently abandon the save -- the process is exiting anyway. - return [] + # Any other RuntimeError must propagate so the save future's + # done-callback reports it via MemorySaveFailedEvent. + if "cannot schedule new futures" in str(e): + return [] + raise try: crewai_event_bus.emit( diff --git a/lib/crewai/src/crewai/project/__init__.py b/lib/crewai/src/crewai/project/__init__.py index b712138cc..fabbbbe76 100644 --- a/lib/crewai/src/crewai/project/__init__.py +++ b/lib/crewai/src/crewai/project/__init__.py @@ -14,6 +14,8 @@ from crewai.project.annotations import ( tool, ) from crewai.project.crew_base import CrewBase +from crewai.project.crew_loader import load_crew, load_crew_and_kickoff +from crewai.project.json_loader import load_agent, strip_jsonc_comments __all__ = [ @@ -25,8 +27,12 @@ __all__ = [ "callback", "crew", "llm", + "load_agent", + "load_crew", + "load_crew_and_kickoff", "output_json", "output_pydantic", + "strip_jsonc_comments", "task", "tool", ] diff --git a/lib/crewai/src/crewai/project/crew_loader.py b/lib/crewai/src/crewai/project/crew_loader.py new file mode 100644 index 000000000..fce183f77 --- /dev/null +++ b/lib/crewai/src/crewai/project/crew_loader.py @@ -0,0 +1,101 @@ +"""Load crew definitions from JSON/JSONC files and produce Crew instances.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from pydantic import ValidationError + +from crewai.project.json_loader import ( + JSONProjectError, + JSONProjectValidationError, + _crew_kwargs_from_definition, + _task_kwargs_from_definition, + load_json_crew_project, +) + + +def load_crew( + source: Path | str, + agents_dir: Path | None = None, +) -> tuple[Any, dict[str, Any]]: + """Load a ``Crew`` from a JSON/JSONC definition file. + + The definition file describes the crew's agents, tasks, process type, and + default inputs. Agent definitions are resolved from individual + ``.jsonc`` / ``.json`` files inside an ``agents/`` directory. + """ + from crewai import Agent, Crew, Task + + crew_path = Path(source) + project = load_json_crew_project(crew_path, agents_dir=agents_dir) + + agents_map: dict[str, Any] = {} + for name in project.agent_names: + agent_def = project.agents[name] + try: + agents_map[name] = Agent(**agent_def.kwargs) + except ValidationError as exc: + raise JSONProjectError( + f"{agent_def.path}: validation failed: {exc}" + ) from exc + except Exception as exc: + raise JSONProjectError( + f"{agent_def.path}: failed to load agent: {exc}" + ) from exc + + tasks_list: list[Task] = [] + task_name_map: dict[str, Task] = {} + + for index, task_defn in enumerate(project.task_definitions): + source_label = f"{crew_path}: tasks[{index}]" + task_kwargs = _task_kwargs_from_definition( + task_defn, + agents_map=agents_map, + task_name_map=task_name_map, + source=source_label, + project_root=crew_path.parent, + ) + try: + task = Task(**task_kwargs) + except ValidationError as exc: + raise JSONProjectError(f"{source_label}: validation failed: {exc}") from exc + + tasks_list.append(task) + task_name = task_defn.get("name") + if isinstance(task_name, str) and task_name: + task_name_map[task_name] = task + + crew_kwargs = _crew_kwargs_from_definition( + project.definition, + agents=list(agents_map.values()), + tasks=tasks_list, + agents_map=agents_map, + source=crew_path, + ) + + try: + crew = Crew(**crew_kwargs) + except ValidationError as exc: + raise JSONProjectError(f"{crew_path}: validation failed: {exc}") from exc + except JSONProjectValidationError: + raise + except Exception as exc: + raise JSONProjectError(f"{crew_path}: failed to load crew: {exc}") from exc + + return crew, project.definition.get("inputs", {}) + + +def load_crew_and_kickoff( + crew_path: Path | str, + input_overrides: dict[str, Any] | None = None, +) -> Any: + """Convenience function: load a crew and immediately kick it off.""" + crew, default_inputs = load_crew(crew_path) + + merged_inputs = {**default_inputs} + if input_overrides: + merged_inputs.update(input_overrides) + + return crew.kickoff(inputs=merged_inputs) diff --git a/lib/crewai/src/crewai/project/json_loader.py b/lib/crewai/src/crewai/project/json_loader.py new file mode 100644 index 000000000..88d00ee26 --- /dev/null +++ b/lib/crewai/src/crewai/project/json_loader.py @@ -0,0 +1,837 @@ +"""Loader utilities for JSON/JSONC agent, crew, task, and tool definitions.""" + +from __future__ import annotations + +from dataclasses import dataclass +import json +import logging +from pathlib import Path +import re +from typing import Any + +from pydantic import ValidationError + + +logger = logging.getLogger(__name__) + + +class JSONProjectError(ValueError): + """User-facing error raised while loading JSON-first crew projects.""" + + +class JSONProjectValidationError(JSONProjectError): + """Aggregates validation errors found without executing a JSON project.""" + + def __init__(self, errors: list[str]) -> None: + self.errors = errors + super().__init__("\n".join(errors)) + + +_AGENT_RUNTIME_FIELDS = { + "id", + "crew", + "cache_handler", + "tools_handler", + "tools_results", + "knowledge", + "knowledge_storage", + "adapted_agent", + "agent_knowledge_context", + "crew_knowledge_context", + "knowledge_search_query", + "execution_context", + "checkpoint_kickoff_event_id", +} + +_TASK_RUNTIME_FIELDS = { + "id", + "used_tools", + "tools_errors", + "delegations", + "output", + "processed_by_agents", + "retry_count", + "start_time", + "end_time", + "checkpoint_original_description", + "checkpoint_original_expected_output", +} + +_CREW_RUNTIME_FIELDS = { + "id", + "usage_metrics", + "task_execution_output_json_files", + "execution_logs", + "token_usage", + "execution_context", + "checkpoint_inputs", + "checkpoint_train", + "checkpoint_kickoff_event_id", +} + + +JSON_PROJECT_EXTENSIONS = (".jsonc", ".json") + + +@dataclass(frozen=True) +class JSONAgentDefinition: + """Parsed JSON agent definition and constructor kwargs.""" + + name: str + path: Path + definition: dict[str, Any] + kwargs: dict[str, Any] + + +@dataclass(frozen=True) +class JSONCrewProject: + """Parsed JSON crew project used by runtime loading and validation.""" + + crew_path: Path + agents_dir: Path + definition: dict[str, Any] + agent_names: list[str] + agents: dict[str, JSONAgentDefinition] + task_definitions: list[dict[str, Any]] + + +def find_json_project_file(directory: str | Path, stem: str) -> Path | None: + """Return ``stem.jsonc`` or ``stem.json``, preferring JSONC.""" + root = Path(directory) + for ext in JSON_PROJECT_EXTENSIONS: + candidate = root / f"{stem}{ext}" + if candidate.exists(): + return candidate + return None + + +def find_crew_json_file(project_root: str | Path = ".") -> Path | None: + """Find the JSON crew definition in a project root.""" + return find_json_project_file(project_root, "crew") + + +def strip_jsonc_comments(text: str) -> str: + """Strip JSONC comments and trailing commas while preserving string values.""" + without_comments = _strip_jsonc_comments(text) + return _strip_trailing_commas(without_comments) + + +def parse_jsonc(text: str, source: str | Path = "") -> Any: + """Parse JSON/JSONC text into Python data with path-aware error messages.""" + source_label = str(source) + try: + return json.loads(strip_jsonc_comments(text)) + except json.JSONDecodeError as exc: + raise JSONProjectError( + f"{source_label}: invalid JSON at line {exc.lineno}, " + f"column {exc.colno}: {exc.msg}" + ) from exc + + +def load_jsonc_file(source: str | Path) -> Any: + """Load a JSON or JSONC file.""" + path = Path(source) + return parse_jsonc(path.read_text(encoding="utf-8"), source=path) + + +def load_agent(source: str | Path) -> Any: + """Load an existing ``Agent`` from a ``.json`` / ``.jsonc`` definition file.""" + from crewai import Agent + + path = Path(source) + defn = _expect_object(load_jsonc_file(path), path) + root = path.parent.parent if path.parent.name == "agents" else Path.cwd() + agent_kwargs = _agent_kwargs_from_definition(defn, path, project_root=root) + + try: + return Agent(**agent_kwargs) + except ValidationError as exc: + raise JSONProjectError(_format_validation_error(path, exc)) from exc + except Exception as exc: + raise JSONProjectError(f"{path}: failed to load agent: {exc}") from exc + + +def validate_crew_project( + source: str | Path, + agents_dir: Path | None = None, +) -> JSONCrewProject: + """Validate JSON crew structure without kicking off the crew.""" + return load_json_crew_project(source, agents_dir=agents_dir, collect_errors=True) + + +def load_json_crew_project( + source: str | Path, + agents_dir: Path | None = None, + *, + collect_errors: bool = False, +) -> JSONCrewProject: + """Parse and structurally validate a JSON crew project. + + When ``collect_errors`` is true, all discoverable structural errors are + returned as a single ``JSONProjectValidationError`` for deploy validation. + Runtime loading keeps the previous fail-fast behavior where possible. + """ + crew_path = Path(source) + if agents_dir is None: + agents_dir = crew_path.parent / "agents" + + errors: list[str] = [] + + def fail(message: str, exc_type: type[Exception] = JSONProjectError) -> None: + if collect_errors: + errors.append(message) + return + raise exc_type(message) + + def fail_many(messages: list[str]) -> None: + if not messages: + return + if collect_errors: + errors.extend(messages) + return + raise JSONProjectValidationError(messages) + + try: + defn = _expect_object(load_jsonc_file(crew_path), crew_path) + except Exception as exc: + if collect_errors: + raise JSONProjectValidationError([str(exc)]) from exc + raise + + fail_many( + _field_errors( + defn, + _crew_allowed_fields(), + _CREW_RUNTIME_FIELDS, + crew_path, + {"inputs"}, + ) + ) + + agent_names = defn.get("agents", []) + if not isinstance(agent_names, list) or not agent_names: + fail(f"{crew_path}: 'agents' must be a non-empty list") + agent_names = [] + + agents_dir = Path(agents_dir) + agent_definitions: dict[str, JSONAgentDefinition] = {} + for agent_name in agent_names: + if not isinstance(agent_name, str) or not agent_name: + fail(f"{crew_path}: each agent reference must be a non-empty string") + continue + agent_file = find_json_project_file(agents_dir, agent_name) + if agent_file is None: + message = ( + f"Agent definition for '{agent_name}' not found in {agents_dir} " + f"(tried {agent_name}.jsonc and {agent_name}.json)" + ) + if collect_errors: + errors.append( + f"{crew_path}: agent '{agent_name}' not found in {agents_dir} " + f"(tried {agent_name}.jsonc and {agent_name}.json)" + ) + else: + raise FileNotFoundError(message) + continue + try: + agent_defn = _expect_object(load_jsonc_file(agent_file), agent_file) + agent_kwargs = _agent_kwargs_from_definition( + agent_defn, + agent_file, + # Validation must never execute project code (custom tools). + resolve_tools=not collect_errors, + project_root=crew_path.parent, + ) + except Exception as exc: + if collect_errors: + errors.append(str(exc)) + continue + raise + agent_definitions[agent_name] = JSONAgentDefinition( + name=agent_name, + path=agent_file, + definition=agent_defn, + kwargs=agent_kwargs, + ) + + task_defs = defn.get("tasks", []) + if not isinstance(task_defs, list) or not task_defs: + fail(f"{crew_path}: 'tasks' must be a non-empty list") + task_defs = [] + + known_tasks: set[str] = set() + known_agents = {name for name in agent_names if isinstance(name, str)} + for index, task_defn in enumerate(task_defs): + task_path = f"{crew_path}: tasks[{index}]" + if not isinstance(task_defn, dict): + fail(f"{task_path} must be an object") + continue + fail_many( + _field_errors( + task_defn, + _task_allowed_fields(), + _TASK_RUNTIME_FIELDS, + task_path, + ) + ) + missing_required = [ + f"{task_path} missing required field '{required}'" + for required in ("description", "expected_output") + if required not in task_defn + ] + fail_many(missing_required) + + agent_ref = task_defn.get("agent") + if agent_ref is not None and agent_ref not in known_agents: + fail( + f"{task_path} references agent '{agent_ref}' which is not in the crew agents list" + ) + + fail_many( + _tool_definition_errors(task_defn.get("tools"), task_path, crew_path.parent) + ) + + context_names = task_defn.get("context") + if context_names is not None: + if not isinstance(context_names, list): + fail(f"{task_path} field 'context' must be a list of task names") + else: + fail_many( + [ + f"{task_path} has context reference '{ctx_name}' but that task " + "has not been defined yet" + for ctx_name in context_names + if ctx_name not in known_tasks + ] + ) + + task_name = task_defn.get("name") + if isinstance(task_name, str) and task_name: + known_tasks.add(task_name) + + if errors: + raise JSONProjectValidationError(errors) + + return JSONCrewProject( + crew_path=crew_path, + agents_dir=agents_dir, + definition=defn, + agent_names=list(agent_names), + agents=agent_definitions, + task_definitions=task_defs, + ) + + +def _strip_jsonc_comments(text: str) -> str: + result: list[str] = [] + i = 0 + in_string = False + escape = False + + while i < len(text): + char = text[i] + + if in_string: + result.append(char) + if escape: + escape = False + elif char == "\\": + escape = True + elif char == '"': + in_string = False + i += 1 + continue + + if char == '"': + in_string = True + result.append(char) + i += 1 + continue + + next_char = text[i + 1] if i + 1 < len(text) else "" + if char == "/" and next_char == "/": + i += 2 + while i < len(text) and text[i] not in "\r\n": + i += 1 + continue + + if char == "/" and next_char == "*": + i += 2 + closed = False + while i < len(text) - 1: + if text[i] == "\n": + result.append("\n") + if text[i] == "*" and text[i + 1] == "/": + i += 2 + closed = True + break + i += 1 + if not closed: + raise JSONProjectError("unterminated block comment in JSONC input") + continue + + result.append(char) + i += 1 + + return "".join(result) + + +def _strip_trailing_commas(text: str) -> str: + result: list[str] = [] + i = 0 + in_string = False + escape = False + + while i < len(text): + char = text[i] + + if in_string: + result.append(char) + if escape: + escape = False + elif char == "\\": + escape = True + elif char == '"': + in_string = False + i += 1 + continue + + if char == '"': + in_string = True + result.append(char) + i += 1 + continue + + if char == ",": + j = i + 1 + while j < len(text) and text[j].isspace(): + j += 1 + if j < len(text) and text[j] in "}]": + i += 1 + continue + + result.append(char) + i += 1 + + return "".join(result) + + +def _expect_object(value: Any, source: str | Path) -> dict[str, Any]: + if not isinstance(value, dict): + raise JSONProjectError(f"{source}: expected a JSON object") + return value + + +def _agent_kwargs_from_definition( + defn: dict[str, Any], + path: Path | str, + *, + resolve_tools: bool = True, + project_root: Path | None = None, +) -> dict[str, Any]: + errors = _field_errors( + defn, + _agent_allowed_fields(), + _AGENT_RUNTIME_FIELDS, + path, + {"settings"}, + ) + for required in ("role", "goal", "backstory"): + if required not in defn: + errors.append(f"{path}: missing required field '{required}'") + + settings = defn.get("settings", {}) + if settings is None: + settings = {} + if not isinstance(settings, dict): + errors.append(f"{path}: 'settings' must be an object when provided") + settings = {} + else: + errors.extend( + _field_errors( + settings, + _agent_allowed_fields(), + _AGENT_RUNTIME_FIELDS, + f"{path}: settings", + ) + ) + + if errors: + raise JSONProjectValidationError(errors) + + agent_kwargs = { + key: value for key, value in defn.items() if key in _agent_allowed_fields() + } + agent_kwargs.update(settings) + if resolve_tools: + _resolve_tool_fields(agent_kwargs, project_root=project_root) + else: + # Validation/deploy mode: check tool declarations structurally without + # importing or instantiating anything — custom: tools execute + # project Python on resolution, which must not happen here. + tool_errors = _tool_definition_errors( + agent_kwargs.get("tools"), path, project_root + ) + if tool_errors: + raise JSONProjectValidationError(tool_errors) + return agent_kwargs + + +def _task_kwargs_from_definition( + task_defn: dict[str, Any], + agents_map: dict[str, Any], + task_name_map: dict[str, Any], + source: str, + project_root: Path | None = None, +) -> dict[str, Any]: + errors = _field_errors( + task_defn, + _task_allowed_fields(), + _TASK_RUNTIME_FIELDS, + source, + ) + if errors: + raise JSONProjectValidationError(errors) + + task_kwargs = { + key: value for key, value in task_defn.items() if key in _task_allowed_fields() + } + + agent_ref = task_kwargs.get("agent") + if agent_ref is not None and isinstance(agent_ref, str): + if agent_ref not in agents_map: + raise JSONProjectError( + f"{source} references agent '{agent_ref}' which is not in the crew agents list" + ) + task_kwargs["agent"] = agents_map[agent_ref] + + context_names = task_kwargs.get("context") + if context_names: + context_tasks: list[Any] = [] + for ctx_name in context_names: + if ctx_name not in task_name_map: + raise JSONProjectError( + f"{source} has context reference '{ctx_name}' but that task " + "has not been defined yet" + ) + context_tasks.append(task_name_map[ctx_name]) + task_kwargs["context"] = context_tasks + + _resolve_tool_fields(task_kwargs, project_root=project_root) + return task_kwargs + + +def _crew_kwargs_from_definition( + defn: dict[str, Any], + agents: list[Any], + tasks: list[Any], + agents_map: dict[str, Any], + source: Path | str, +) -> dict[str, Any]: + errors = _field_errors( + defn, + _crew_allowed_fields(), + _CREW_RUNTIME_FIELDS, + source, + {"inputs"}, + ) + if errors: + raise JSONProjectValidationError(errors) + + crew_kwargs = { + key: value for key, value in defn.items() if key in _crew_allowed_fields() + } + crew_kwargs["agents"] = agents + crew_kwargs["tasks"] = tasks + + manager_agent = crew_kwargs.get("manager_agent") + if isinstance(manager_agent, str): + if manager_agent not in agents_map: + raise JSONProjectError( + f"{source}: manager_agent '{manager_agent}' is not in the crew agents list" + ) + crew_kwargs["manager_agent"] = agents_map[manager_agent] + + return crew_kwargs + + +def _resolve_tool_fields( + kwargs: dict[str, Any], project_root: Path | None = None +) -> None: + tools = kwargs.get("tools") + if tools is not None: + kwargs["tools"] = _resolve_tools(tools, project_root=project_root) + + +def _field_errors( + data: dict[str, Any], + allowed_fields: set[str], + runtime_fields: set[str], + source: str | Path, + extra_allowed: set[str] | None = None, +) -> list[str]: + extra_allowed = extra_allowed or set() + keys = set(data) + runtime = sorted(keys & runtime_fields) + unknown = sorted(keys - allowed_fields - runtime_fields - extra_allowed) + + errors: list[str] = [] + if runtime: + errors.append( + f"{source}: runtime-only field(s) are not supported in JSON config: " + + ", ".join(runtime) + ) + if unknown: + errors.append(f"{source}: unsupported field(s): " + ", ".join(unknown)) + return errors + + +def _agent_allowed_fields() -> set[str]: + from crewai import Agent + + return set(Agent.model_fields) - _AGENT_RUNTIME_FIELDS + + +def _task_allowed_fields() -> set[str]: + from crewai import Task + + return set(Task.model_fields) - _TASK_RUNTIME_FIELDS + + +def _crew_allowed_fields() -> set[str]: + from crewai import Crew + + return set(Crew.model_fields) - _CREW_RUNTIME_FIELDS + + +def _format_validation_error(path: str | Path, exc: ValidationError) -> str: + return f"{path}: validation failed: {exc}" + + +def _resolve_tools(tool_defs: list[Any], project_root: Path | None = None) -> list[Any]: + """Resolve tool specs into tool instances or serialized BaseTool dicts. + + Strings keep the existing shorthand behavior. Dicts are passed through so + ``BaseTool``'s Pydantic validator can hydrate serialized ``tool_type`` data. + """ + if not isinstance(tool_defs, list): + raise JSONProjectError("'tools' must be a list") + + tools: list[Any] = [] + for tool_def in tool_defs: + if isinstance(tool_def, dict): + tools.append(tool_def) + continue + if not isinstance(tool_def, str): + raise JSONProjectError( + f"Tool definitions must be strings or objects, got {type(tool_def).__name__}" + ) + if not tool_def: + continue + if tool_def.startswith("custom:"): + tools.append(_resolve_custom_tool(tool_def[7:], project_root=project_root)) + continue + try: + tool_cls = _find_tool_class(tool_def) + except Exception as e: + raise JSONProjectError(f"Failed to resolve tool '{tool_def}': {e}") from e + if tool_cls is None: + raise JSONProjectError( + f"Unknown tool '{tool_def}'. Tool names must match a class from " + f"the 'crewai_tools' package (e.g. 'SerperDevTool') or use the " + f"'custom:' prefix for a tool defined in tools/.py." + ) + try: + tools.append(tool_cls()) + except Exception as e: + raise JSONProjectError( + f"Failed to initialize tool '{tool_def}': {e}" + ) from e + return tools + + +_tool_class_cache: dict[str, type | None] = {} + + +def _find_tool_class(name: str) -> type | None: + """Look up a tool class by name from the ``crewai_tools`` package.""" + if name in _tool_class_cache: + return _tool_class_cache[name] + + candidates = [name] + if not name.endswith("Tool"): + candidates.append(name + "Tool") + snake_pascal = "".join(word.capitalize() for word in name.split("_")) + "Tool" + if snake_pascal not in candidates: + candidates.append(snake_pascal) + + for class_name in candidates: + cls = _try_import_tool(class_name) + if cls is not None: + _tool_class_cache[name] = cls + return cls + + _tool_class_cache[name] = None + return None + + +def _try_import_tool(class_name: str) -> type | None: + """Attempt to import a single tool class without loading all of crewai_tools.""" + import re as _re + + base = ( + class_name.removesuffix("Tool") if class_name.endswith("Tool") else class_name + ) + snake = _re.sub(r"(?<=[a-z0-9])(?=[A-Z])", "_", base).lower() + tool_snake = snake + "_tool" if not snake.endswith("_tool") else snake + + module_paths = [ + f"crewai_tools.tools.{tool_snake}.{tool_snake}", + f"crewai_tools.tools.{tool_snake}", + ] + + for mod_path in module_paths: + cls = _import_tool_class(mod_path, class_name) + if cls is not None: + return cls + + try: + import crewai_tools + + return getattr(crewai_tools, class_name, None) + except ImportError: + return None + + +def _import_tool_class(mod_path: str, class_name: str) -> type | None: + try: + import importlib + + mod = importlib.import_module(mod_path) + except (ImportError, ModuleNotFoundError): + return None + return getattr(mod, class_name, None) + + +_CUSTOM_TOOL_NAME_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*") + + +def _custom_tool_file(tool_name: str, project_root: Path | None) -> Path: + """Return the validated path of a custom tool inside ``tools/``. + + Rejects names that aren't plain identifiers and (belt-and-suspenders) + any resolved path that escapes the project's ``tools/`` directory, so + ``custom:../evil`` or absolute-path style names cannot execute code + outside the project. + """ + if not _CUSTOM_TOOL_NAME_RE.fullmatch(tool_name): + raise JSONProjectError( + f"Invalid custom tool name 'custom:{tool_name}': names must match " + f"[A-Za-z_][A-Za-z0-9_]* and resolve to tools/.py inside " + f"the project." + ) + tools_dir = ((project_root or Path.cwd()) / "tools").resolve() + tool_file = (tools_dir / f"{tool_name}.py").resolve() + try: + tool_file.relative_to(tools_dir) + except ValueError: + raise JSONProjectError( + f"Custom tool 'custom:{tool_name}' resolves outside the project's " + f"tools/ directory." + ) from None + return tool_file + + +def _tool_definition_errors( + tool_defs: Any, source: Path | str, project_root: Path | None +) -> list[str]: + """Structurally validate tool declarations WITHOUT importing anything. + + Used by validation/deploy paths where executing project code (which + ``custom:`` resolution does) would be unsafe. Library tool names are not + resolved here either — that requires importing crewai_tools modules and + would falsely fail when optional dependencies are absent in the + validation environment. + """ + if tool_defs is None: + return [] + if not isinstance(tool_defs, list): + return [f"{source}: 'tools' must be a list"] + errors: list[str] = [] + for tool_def in tool_defs: + if isinstance(tool_def, dict): + continue + if not isinstance(tool_def, str): + errors.append( + f"{source}: tool definitions must be strings or objects, " + f"got {type(tool_def).__name__}" + ) + continue + if not tool_def.startswith("custom:"): + continue + try: + tool_file = _custom_tool_file(tool_def[7:], project_root) + except JSONProjectError as exc: + errors.append(f"{source}: {exc}") + continue + if not tool_file.exists(): + errors.append( + f"{source}: custom tool '{tool_def}' not found: expected " + f"{tool_file}. Create the file with a BaseTool subclass, or " + f"remove the tool from your crew JSON." + ) + return errors + + +def _resolve_custom_tool(tool_name: str, project_root: Path | None = None) -> Any: + """Resolve a custom tool from the project's ``tools/`` directory. + + Note: ``custom:`` tools execute ``tools/.py`` as local Python + code at load time — JSON configs referencing them are no longer pure data. + Only run JSON crew projects from sources you trust. Validation paths must + use ``_tool_definition_errors`` instead, which never executes anything. + """ + tool_file = _custom_tool_file(tool_name, project_root) + if not tool_file.exists(): + raise JSONProjectError( + f"Custom tool 'custom:{tool_name}' not found: expected {tool_file}. " + f"Create the file with a BaseTool subclass, or remove the tool from " + f"your crew JSON." + ) + try: + import importlib.util + + spec = importlib.util.spec_from_file_location( + f"custom_tools.{tool_name}", tool_file + ) + if spec is None or spec.loader is None: + raise JSONProjectError( + f"Could not load custom tool 'custom:{tool_name}' from {tool_file}" + ) + logger.debug("Executing custom tool module: %s", tool_file) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + from crewai.tools.base_tool import BaseTool + + for attr_name in dir(module): + attr = getattr(module, attr_name) + if ( + isinstance(attr, type) + and issubclass(attr, BaseTool) + and attr is not BaseTool + ): + # Concrete subclasses supply name/description defaults that + # BaseTool's signature requires. + tool_cls: type[Any] = attr + return tool_cls() + raise JSONProjectError( + f"No BaseTool subclass found in {tool_file}. Custom tools must " + f"define a class inheriting from crewai.tools.BaseTool." + ) + except JSONProjectError: + raise + except Exception as e: + raise JSONProjectError( + f"Failed to load custom tool 'custom:{tool_name}' from {tool_file}: {e}" + ) from e diff --git a/lib/crewai/src/crewai/rag/embeddings/providers/microsoft/azure.py b/lib/crewai/src/crewai/rag/embeddings/providers/microsoft/azure.py index e1d03dd19..38433c1d9 100644 --- a/lib/crewai/src/crewai/rag/embeddings/providers/microsoft/azure.py +++ b/lib/crewai/src/crewai/rag/embeddings/providers/microsoft/azure.py @@ -5,7 +5,7 @@ from typing import Any from chromadb.utils.embedding_functions.openai_embedding_function import ( OpenAIEmbeddingFunction, ) -from pydantic import AliasChoices, Field +from pydantic import AliasChoices, Field, model_validator from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider @@ -13,6 +13,14 @@ from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider class AzureProvider(BaseEmbeddingsProvider[OpenAIEmbeddingFunction]): """Azure OpenAI embeddings provider.""" + @model_validator(mode="before") + @classmethod + def _normalize_model_alias(cls, data: Any) -> Any: + if isinstance(data, dict) and "model" in data and "model_name" not in data: + data = data.copy() + data["model_name"] = data["model"] + return data + embedding_callable: type[OpenAIEmbeddingFunction] = Field( default=OpenAIEmbeddingFunction, description="Azure OpenAI embedding function class", @@ -43,13 +51,11 @@ class AzureProvider(BaseEmbeddingsProvider[OpenAIEmbeddingFunction]): ), ) model_name: str = Field( - default="text-embedding-ada-002", + default="text-embedding-3-large", description="Model name to use for embeddings", validation_alias=AliasChoices( "EMBEDDINGS_OPENAI_MODEL_NAME", - "OPENAI_MODEL_NAME", "AZURE_OPENAI_MODEL_NAME", - "model", ), ) default_headers: dict[str, Any] | None = Field( diff --git a/lib/crewai/src/crewai/rag/embeddings/providers/microsoft/types.py b/lib/crewai/src/crewai/rag/embeddings/providers/microsoft/types.py index 45dc2b2ef..d63e9d889 100644 --- a/lib/crewai/src/crewai/rag/embeddings/providers/microsoft/types.py +++ b/lib/crewai/src/crewai/rag/embeddings/providers/microsoft/types.py @@ -12,7 +12,7 @@ class AzureProviderConfig(TypedDict, total=False): api_base: str api_type: Annotated[str, "azure"] api_version: str - model_name: Annotated[str, "text-embedding-ada-002"] + model_name: Annotated[str, "text-embedding-3-large"] default_headers: dict[str, Any] dimensions: int deployment_id: Required[str] diff --git a/lib/crewai/src/crewai/rag/embeddings/providers/openai/openai_provider.py b/lib/crewai/src/crewai/rag/embeddings/providers/openai/openai_provider.py index 67017add4..15c8389a5 100644 --- a/lib/crewai/src/crewai/rag/embeddings/providers/openai/openai_provider.py +++ b/lib/crewai/src/crewai/rag/embeddings/providers/openai/openai_provider.py @@ -5,7 +5,7 @@ from typing import Any from chromadb.utils.embedding_functions.openai_embedding_function import ( OpenAIEmbeddingFunction, ) -from pydantic import AliasChoices, Field +from pydantic import AliasChoices, Field, model_validator from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider @@ -13,6 +13,14 @@ from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider class OpenAIProvider(BaseEmbeddingsProvider[OpenAIEmbeddingFunction]): """OpenAI embeddings provider.""" + @model_validator(mode="before") + @classmethod + def _normalize_model_alias(cls, data: Any) -> Any: + if isinstance(data, dict) and "model" in data and "model_name" not in data: + data = data.copy() + data["model_name"] = data["model"] + return data + embedding_callable: type[OpenAIEmbeddingFunction] = Field( default=OpenAIEmbeddingFunction, description="OpenAI embedding function class", @@ -23,12 +31,11 @@ class OpenAIProvider(BaseEmbeddingsProvider[OpenAIEmbeddingFunction]): validation_alias=AliasChoices("EMBEDDINGS_OPENAI_API_KEY", "OPENAI_API_KEY"), ) model_name: str = Field( - default="text-embedding-ada-002", + default="text-embedding-3-large", description="Model name to use for embeddings", validation_alias=AliasChoices( "EMBEDDINGS_OPENAI_MODEL_NAME", - "OPENAI_MODEL_NAME", - "model", + "model_name", ), ) api_base: str | None = Field( diff --git a/lib/crewai/src/crewai/rag/embeddings/providers/openai/types.py b/lib/crewai/src/crewai/rag/embeddings/providers/openai/types.py index eed17e83e..ef74bb086 100644 --- a/lib/crewai/src/crewai/rag/embeddings/providers/openai/types.py +++ b/lib/crewai/src/crewai/rag/embeddings/providers/openai/types.py @@ -9,7 +9,7 @@ class OpenAIProviderConfig(TypedDict, total=False): """Configuration for OpenAI provider.""" api_key: str - model_name: Annotated[str, "text-embedding-ada-002"] + model_name: Annotated[str, "text-embedding-3-large"] api_base: str api_type: str api_version: str diff --git a/lib/crewai/src/crewai/telemetry/telemetry.py b/lib/crewai/src/crewai/telemetry/telemetry.py index f13faed2b..df9fb26b8 100644 --- a/lib/crewai/src/crewai/telemetry/telemetry.py +++ b/lib/crewai/src/crewai/telemetry/telemetry.py @@ -931,7 +931,7 @@ class Telemetry: value: The attribute value. """ - if span is None: + if span is None or value is None: return def _operation() -> None: diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py index 399d74954..80f8ab242 100644 --- a/lib/crewai/src/crewai/utilities/agent_utils.py +++ b/lib/crewai/src/crewai/utilities/agent_utils.py @@ -65,6 +65,15 @@ class SummaryContent(TypedDict): console = Console() _MULTIPLE_NEWLINES: Final[re.Pattern[str]] = re.compile(r"\n+") +_NATIVE_TOOL_UNSUPPORTED_PATTERNS: Final[tuple[str, ...]] = ( + "does not support tools", + "doesn't support tools", + "tools are not supported", + "tool calling is not supported", + "tool calls are not supported", + "function calling is not supported", + "does not support function calling", +) def is_inside_event_loop() -> bool: @@ -1273,6 +1282,28 @@ def check_native_tool_support(llm: Any, original_tools: list[BaseTool] | None) - ) +def is_native_tool_calling_unsupported_error(error: BaseException) -> bool: + """Return whether an error means native tool calling is unavailable.""" + message = str(error).lower() + return any(pattern in message for pattern in _NATIVE_TOOL_UNSUPPORTED_PATTERNS) + + +def build_text_tool_calling_fallback_message( + tools_description: str, + tools_names: str, +) -> str: + """Build instructions for downgrading native tools to text tool calls.""" + text_tooling_prompt = I18N_DEFAULT.slice("tools").format( + tools=tools_description, + tool_names=tools_names, + ) + return ( + "Native tool calling is unavailable for this model/provider. " + "Continue using CrewAI text tool calling instead.\n" + f"{text_tooling_prompt}" + ) + + def setup_native_tools( original_tools: list[BaseTool], ) -> tuple[ @@ -1365,6 +1396,8 @@ def execute_single_native_tool_call( event_source: Any, printer: Printer | None = None, verbose: bool = False, + plan_step_number: int | None = None, + plan_step_description: str | None = None, ) -> NativeToolCallResult: """Execute a single native tool call with full lifecycle management. @@ -1446,6 +1479,8 @@ def execute_single_native_tool_call( from_agent=agent, from_task=task, agent_key=agent_key, + plan_step_number=plan_step_number, + plan_step_description=plan_step_description, ), ) @@ -1509,6 +1544,8 @@ def execute_single_native_tool_call( from_agent=agent, from_task=task, agent_key=agent_key, + plan_step_number=plan_step_number, + plan_step_description=plan_step_description, error=e, ), ) @@ -1542,6 +1579,8 @@ def execute_single_native_tool_call( from_agent=agent, from_task=task, agent_key=agent_key, + plan_step_number=plan_step_number, + plan_step_description=plan_step_description, started_at=started_at, finished_at=datetime.now(), ), diff --git a/lib/crewai/src/crewai/utilities/llm_utils.py b/lib/crewai/src/crewai/utilities/llm_utils.py index dd3a8dcac..09507157c 100644 --- a/lib/crewai/src/crewai/utilities/llm_utils.py +++ b/lib/crewai/src/crewai/utilities/llm_utils.py @@ -11,12 +11,13 @@ logger = logging.getLogger(__name__) def create_llm( - llm_value: str | LLM | Any | None = None, + llm_value: str | dict[str, Any] | LLM | Any | None = None, ) -> LLM | BaseLLM | None: """Creates or returns an LLM instance based on the given llm_value. Args: - llm_value: LLM instance, model name string, None, or an object with LLM attributes. + llm_value: LLM instance, model name string, config dict, None, or an + object with LLM attributes. Returns: A BaseLLM instance if successful, or None if something fails. @@ -32,6 +33,26 @@ def create_llm( logger.error(f"Error instantiating LLM from string: {e}") raise e + if isinstance(llm_value, dict): + try: + model = ( + llm_value.get("model") + or llm_value.get("model_name") + or llm_value.get("deployment_name") + ) + if not model: + raise ValueError( + "LLM config dictionaries must include 'model', " + "'model_name', or 'deployment_name'" + ) + llm_params = {**llm_value, "model": model} + llm_params.pop("model_name", None) + llm_params.pop("deployment_name", None) + return LLM(**llm_params) + except Exception as e: + logger.error(f"Error instantiating LLM from dict: {e}") + raise e + if llm_value is None: return _llm_via_environment_or_fallback() diff --git a/lib/crewai/src/crewai/utilities/planning_handler.py b/lib/crewai/src/crewai/utilities/planning_handler.py index 2497b9fc8..7a4053660 100644 --- a/lib/crewai/src/crewai/utilities/planning_handler.py +++ b/lib/crewai/src/crewai/utilities/planning_handler.py @@ -52,7 +52,7 @@ class CrewPlanner: planning_agent_llm: Optional LLM model for the planning agent. Defaults to None. """ self.tasks = tasks - self.planning_agent_llm = planning_agent_llm or "gpt-4o-mini" + self.planning_agent_llm = planning_agent_llm or "gpt-5.4-mini" def _handle_crew_planning(self) -> PlannerTaskPydanticOutput: """Handles the Crew planning by creating detailed step-by-step plans for each task. diff --git a/lib/crewai/src/crewai/utilities/token_counter_callback.py b/lib/crewai/src/crewai/utilities/token_counter_callback.py index 751aa1a03..6ab392ad8 100644 --- a/lib/crewai/src/crewai/utilities/token_counter_callback.py +++ b/lib/crewai/src/crewai/utilities/token_counter_callback.py @@ -13,15 +13,6 @@ from crewai.agents.agent_builder.utilities.base_token_process import TokenProces from crewai.utilities.logger_utils import suppress_warnings -try: - from litellm.integrations.custom_logger import CustomLogger as LiteLLMCustomLogger - - LITELLM_AVAILABLE = True -except ImportError: - LiteLLMCustomLogger = None # type: ignore[misc, assignment] - LITELLM_AVAILABLE = False - - class TokenCalcHandler(BaseModel): """Handler for calculating and tracking token usage in LLM calls. diff --git a/lib/crewai/tests/agents/test_agent.py b/lib/crewai/tests/agents/test_agent.py index 86e525b63..0435bed94 100644 --- a/lib/crewai/tests/agents/test_agent.py +++ b/lib/crewai/tests/agents/test_agent.py @@ -28,6 +28,19 @@ from crewai.tools import tool from crewai.utilities import RPMController +def test_agent_memory_true_uses_agent_llm_model(): + agent = Agent( + role="test role", + goal="test goal", + backstory="test backstory", + llm="ollama/llama3", + memory=True, + ) + + assert agent.memory is not None + assert agent.memory.llm == "ollama/llama3" + + def test_agent_llm_creation_with_env_vars(): original_api_key = os.environ.get("OPENAI_API_KEY") original_api_base = os.environ.get("OPENAI_API_BASE") diff --git a/lib/crewai/tests/agents/test_agent_executor.py b/lib/crewai/tests/agents/test_agent_executor.py index b22bee401..992f7460b 100644 --- a/lib/crewai/tests/agents/test_agent_executor.py +++ b/lib/crewai/tests/agents/test_agent_executor.py @@ -59,6 +59,10 @@ from crewai.experimental.agent_executor import ( ) from crewai.agents.parser import AgentAction, AgentFinish from crewai.events.event_bus import crewai_event_bus +from crewai.events.types.observation_events import ( + PlanStepCompletedEvent, + PlanStepStartedEvent, +) from crewai.events.types.tool_usage_events import ( ToolUsageFinishedEvent, ToolUsageStartedEvent, @@ -318,6 +322,41 @@ class TestAgentExecutor: assert result == "native_finished" assert get_llm_response_mock.call_args.kwargs["response_model"] is None + def test_call_llm_native_tools_falls_back_when_provider_rejects_tools( + self, mock_dependencies + ): + """Provider-level unsupported tools errors should downgrade to ReAct.""" + executor = _build_executor( + **mock_dependencies, + original_tools=[Mock()], + callbacks=[], + ) + executor._openai_tools = [{"type": "function", "function": {"name": "lookup"}}] + executor.state.use_native_tools = True + executor.state.pending_tool_calls = [Mock()] + executor.state.messages = [{"role": "user", "content": "Use a tool"}] + executor.tools = [Mock()] + executor.tools_names = "lookup" + executor.tools_description = "lookup: search for information" + + with patch( + "crewai.experimental.agent_executor.get_llm_response", + side_effect=RuntimeError( + "Error code: 400 - registry.ollama.ai/library/mariner:latest " + "does not support tools" + ), + ): + result = executor.call_llm_native_tools() + + assert result == "continue_reasoning" + assert executor.state.use_native_tools is False + assert executor.state.pending_tool_calls == [] + assert executor.state.messages[-1]["role"] == "user" + assert "Native tool calling is unavailable" in executor.state.messages[-1][ + "content" + ] + assert "Action Input" in executor.state.messages[-1]["content"] + def test_finalize_success(self, mock_dependencies): """Test finalize with valid AgentFinish.""" with patch.object(AgentExecutor, "_show_logs") as mock_show_logs: @@ -545,6 +584,7 @@ class TestStepExecutorCriticalFixes: tool = Mock() tool.name = "count_words" + tool.description = "count_words: Counts words in text" task = Mock() task.name = "test-task" task.description = "test task description" @@ -610,13 +650,126 @@ class TestStepExecutorCriticalFixes: "crewai.agents.step_executor.execute_tool_and_check_finality", return_value=ToolResult(result="2", result_as_answer=False), ): - output = step_executor._execute_text_tool_with_events(action) + todo = TodoItem(step_number=2, description="Count words") + output = step_executor._execute_text_tool_with_events(action, todo) crewai_event_bus.flush() assert output == "2" assert len(started_events) >= 1 assert len(finished_events) >= 1 + assert started_events[-1].plan_step_number == 2 + assert started_events[-1].plan_step_description == "Count words" + assert finished_events[-1].plan_step_number == 2 + assert finished_events[-1].plan_step_description == "Count words" + + def test_step_executor_falls_back_when_native_tools_are_rejected( + self, step_executor + ): + """Plan steps should retry through text tool calls when native tools fail.""" + step_executor._use_native_tools = True + step_executor._openai_tools = [{"type": "function", "function": {"name": "count_words"}}] + step_executor._available_functions = {"count_words": Mock()} + todo = TodoItem(step_number=1, description="Count words") + context = StepExecutionContext(task_description="task", task_goal="goal") + + with ( + patch.object( + step_executor, + "_execute_native", + side_effect=RuntimeError( + "registry.ollama.ai/library/mariner:latest does not support tools" + ), + ), + patch.object( + step_executor, + "_execute_text_parsed", + return_value="Counted words", + ) as text_parsed, + ): + result = step_executor.execute(todo, context) + + assert result.success is True + assert result.result == "Counted words" + assert step_executor._use_native_tools is False + fallback_messages = text_parsed.call_args.args[0] + # The original conversation is preserved (system + user) and the + # text-tooling instructions are appended instead of rebuilding. + assert fallback_messages[0]["role"] == "system" + assert fallback_messages[-1]["role"] == "user" + assert "Action Input" in fallback_messages[-1]["content"] + + def test_plan_step_lifecycle_events_are_emitted_from_todo_transitions( + self, mock_dependencies + ): + """Todo transitions should publish authoritative plan step events.""" + from crewai.utilities.planning_types import TodoList + + executor = _build_executor(**mock_dependencies) + todo = TodoItem( + step_number=1, + description="Search the official release", + tool_to_use="search", + ) + executor.state.todos = TodoList(items=[todo]) + + started_events: list[PlanStepStartedEvent] = [] + completed_events: list[PlanStepCompletedEvent] = [] + + with crewai_event_bus.scoped_handlers(): + + @crewai_event_bus.on(PlanStepStartedEvent) + def _on_started(_source, event): + started_events.append(event) + + @crewai_event_bus.on(PlanStepCompletedEvent) + def _on_completed(_source, event): + completed_events.append(event) + + executor._mark_todo_running(todo) + executor._mark_todo_completed(1, result="Found release") + crewai_event_bus.flush() + + assert todo.status == "completed" + assert len(started_events) == 1 + assert started_events[0].step_number == 1 + assert started_events[0].step_description == "Search the official release" + assert started_events[0].tool_to_use == "search" + assert len(completed_events) == 1 + assert completed_events[0].success is True + assert completed_events[0].step_number == 1 + assert completed_events[0].result == "Found release" + + def test_failed_todo_transition_emits_failed_plan_step_event( + self, mock_dependencies + ): + """Failed todo transitions should publish failed plan step events.""" + from crewai.utilities.planning_types import TodoList + + executor = _build_executor(**mock_dependencies) + todo = TodoItem(step_number=1, description="Search release") + executor.state.todos = TodoList(items=[todo]) + completed_events: list[PlanStepCompletedEvent] = [] + + with crewai_event_bus.scoped_handlers(): + + @crewai_event_bus.on(PlanStepCompletedEvent) + def _on_completed(_source, event): + completed_events.append(event) + + executor._mark_todo_failed( + 1, + result="Error: no result", + error="No result", + ) + crewai_event_bus.flush() + + assert todo.status == "failed" + assert len(completed_events) == 1 + assert completed_events[0].success is False + assert completed_events[0].step_number == 1 + assert completed_events[0].result == "Error: no result" + assert completed_events[0].error == "No result" @patch("crewai.experimental.agent_executor.handle_output_parser_exception") def test_recover_from_parser_error( @@ -1649,6 +1802,12 @@ class TestReasoningEffort: executor.handle_step_observed_medium = ( AgentExecutor.handle_step_observed_medium.__get__(executor) ) + executor._mark_todo_completed = ( + AgentExecutor._mark_todo_completed.__get__(executor) + ) + executor._mark_todo_failed = ( + AgentExecutor._mark_todo_failed.__get__(executor) + ) success_todo = TodoItem( step_number=1, @@ -1715,6 +1874,9 @@ class TestReasoningEffort: executor.handle_step_observed_low = ( AgentExecutor.handle_step_observed_low.__get__(executor) ) + executor._mark_todo_completed = ( + AgentExecutor._mark_todo_completed.__get__(executor) + ) todo = TodoItem( step_number=1, @@ -1748,6 +1910,12 @@ class TestReasoningEffort: executor.handle_step_observed_low = ( AgentExecutor.handle_step_observed_low.__get__(executor) ) + executor._mark_todo_completed = ( + AgentExecutor._mark_todo_completed.__get__(executor) + ) + executor._mark_todo_failed = ( + AgentExecutor._mark_todo_failed.__get__(executor) + ) todo = TodoItem( step_number=1, @@ -2065,13 +2233,13 @@ class TestTodoStatusTracking: from crewai.experimental.agent_executor import AgentExecutor source = inspect.getsource(AgentExecutor.handle_step_observed_medium) - assert "mark_failed" in source, ( - "handle_step_observed_medium should use mark_failed for failed steps" + assert "_mark_todo_failed" in source, ( + "handle_step_observed_medium should use _mark_todo_failed for failed steps" ) failed_no_replan_idx = source.index("failed but no replan") after_comment = source[failed_no_replan_idx:] - assert "mark_completed" not in after_comment, ( - "mark_completed should not be called on failed steps" + assert "_mark_todo_completed" not in after_comment, ( + "_mark_todo_completed should not be called on failed steps" ) def test_failed_step_appears_in_get_failed_todos(self): diff --git a/lib/crewai/tests/agents/test_lite_agent.py b/lib/crewai/tests/agents/test_lite_agent.py index 354faf54b..a4f404c85 100644 --- a/lib/crewai/tests/agents/test_lite_agent.py +++ b/lib/crewai/tests/agents/test_lite_agent.py @@ -1096,6 +1096,7 @@ def test_lite_agent_memory_true_resolves_to_default_memory(): ) assert agent._memory is not None assert isinstance(agent._memory, Memory) + assert agent._memory.llm is agent.llm @pytest.mark.filterwarnings("ignore:LiteAgent is deprecated") diff --git a/lib/crewai/tests/agents/test_native_tool_calling.py b/lib/crewai/tests/agents/test_native_tool_calling.py index e3094db01..b7e0df199 100644 --- a/lib/crewai/tests/agents/test_native_tool_calling.py +++ b/lib/crewai/tests/agents/test_native_tool_calling.py @@ -17,6 +17,7 @@ import pytest from pydantic import BaseModel, Field from crewai import Agent, Crew, Task +from crewai.agents.parser import AgentFinish from crewai.events import crewai_event_bus from crewai.hooks import register_after_tool_call_hook, register_before_tool_call_hook from crewai.hooks.tool_hooks import ToolCallHookContext @@ -1196,6 +1197,50 @@ class TestNativeToolCallingJsonParseError: assert result["result"] == "ran: print(1)" + def test_native_tool_loop_falls_back_when_provider_rejects_tools(self) -> None: + """Unsupported native tools errors should continue through ReAct.""" + + class SearchTool(BaseTool): + name: str = "search" + description: str = "Search for information" + + def _run(self, query: str) -> str: + return f"result for {query}" + + executor = self._make_executor([SearchTool()]) + executor.llm = Mock() + executor.messages = [{"role": "user", "content": "Search for CrewAI"}] + executor.callbacks = [] + executor.iterations = 0 + executor.max_iter = 3 + executor.request_within_rpm_limit = None + executor.respect_context_window = False + + fallback_finish = AgentFinish( + thought="done", + output="final", + text="Final Answer: final", + ) + with ( + patch( + "crewai.agents.crew_agent_executor.get_llm_response", + side_effect=RuntimeError( + "registry.ollama.ai/library/mariner:latest does not support tools" + ), + ), + patch.object( + executor, + "_invoke_loop_react", + return_value=fallback_finish, + ) as react_loop, + ): + result = executor._invoke_loop_native_tools() + + assert result is fallback_finish + react_loop.assert_called_once() + assert "Native tool calling is unavailable" in executor.messages[-1]["content"] + assert "Action Input" in executor.messages[-1]["content"] + def test_dict_args_bypass_json_parsing(self) -> None: """When func_args is already a dict, no JSON parsing occurs.""" diff --git a/lib/crewai/tests/cli/test_run_crew.py b/lib/crewai/tests/cli/test_run_crew.py index 077741193..34074e526 100644 --- a/lib/crewai/tests/cli/test_run_crew.py +++ b/lib/crewai/tests/cli/test_run_crew.py @@ -1,12 +1,22 @@ """Tests for the ``crewai run`` command and its subprocess plumbing.""" +from pathlib import Path +import sys +from types import ModuleType +from types import SimpleNamespace from unittest import mock from click.testing import CliRunner import pytest from crewai_cli.cli import run -from crewai_cli.run_crew import CrewType, execute_command +from crewai_cli.run_crew import ( + CrewType, + _load_json_crew_for_tui, + _missing_input_names, + _prompt_for_missing_inputs, + execute_command, +) @pytest.fixture @@ -14,15 +24,17 @@ def runner() -> CliRunner: return CliRunner() -@mock.patch("crewai_cli.cli.run_crew") +@mock.patch("crewai_cli.run_crew.run_crew") def test_run_passes_filename_to_run_crew(run_crew_mock: mock.Mock, runner: CliRunner) -> None: result = runner.invoke(run, ["-f", "my_custom_trained.pkl"]) - run_crew_mock.assert_called_once_with(trained_agents_file="my_custom_trained.pkl") + run_crew_mock.assert_called_once_with( + trained_agents_file="my_custom_trained.pkl", + ) assert result.exit_code == 0 -@mock.patch("crewai_cli.cli.run_crew") +@mock.patch("crewai_cli.run_crew.run_crew") def test_run_without_filename_passes_none(run_crew_mock: mock.Mock, runner: CliRunner) -> None: result = runner.invoke(run) @@ -56,4 +68,101 @@ def test_execute_command_omits_env_var_when_filename_absent( execute_command(CrewType.STANDARD) _, kwargs = subprocess_run.call_args - assert "CREWAI_TRAINED_AGENTS_FILE" not in kwargs["env"] \ No newline at end of file + assert "CREWAI_TRAINED_AGENTS_FILE" not in kwargs["env"] + + +def test_missing_input_names_scans_agent_and_task_placeholders() -> None: + crew = SimpleNamespace( + agents=[ + SimpleNamespace( + role="Researcher for {topic}", + goal="Write for {audience}", + backstory="Ignore escaped {{not_an_input}}", + ) + ], + tasks=[ + SimpleNamespace( + description="Research {topic}", + expected_output="A post for {channel}", + output_file="{slug}.md", + ) + ], + ) + + assert _missing_input_names(crew, {"topic": "AI"}) == [ + "audience", + "channel", + "slug", + ] + + +def test_prompt_for_missing_inputs_merges_runtime_values(monkeypatch) -> None: + crew = SimpleNamespace( + agents=[SimpleNamespace(role="Researcher", goal="Cover {topic}", backstory="")], + tasks=[ + SimpleNamespace( + description="Write for {audience}", + expected_output="Post", + output_file=None, + ) + ], + ) + values = {"audience": "developers"} + + def prompt(label: str, **_kwargs: object) -> str: + if "audience" in str(label): + return values["audience"] + raise AssertionError(f"Unexpected prompt: {label}") + + monkeypatch.setattr("crewai_cli.run_crew.click.prompt", prompt) + + assert _prompt_for_missing_inputs(crew, {"topic": "AI"}) == { + "topic": "AI", + "audience": "developers", + } + + +def test_load_json_crew_for_tui_prepares_metadata_before_prompt(monkeypatch) -> None: + class FakeApp: + pass + + fake_tui_module = ModuleType("crewai_cli.crew_run_tui") + fake_tui_module.CrewRunApp = FakeApp + monkeypatch.setitem(sys.modules, "crewai_cli.crew_run_tui", fake_tui_module) + + crew = SimpleNamespace( + name="Demo Crew", + tasks=[ + SimpleNamespace(name="research_task", description="Research"), + SimpleNamespace(name="", description="Write summary for developers"), + ], + agents=[ + SimpleNamespace(role="Researcher", name="researcher"), + SimpleNamespace(role="", name="writer"), + ], + ) + prepared: list[object] = [] + + monkeypatch.setattr( + "crewai_cli.run_crew._json_loading_status", + lambda _message: mock.MagicMock(), + ) + monkeypatch.setattr( + "crewai_cli.run_crew._load_json_crew", + lambda _path: (crew, {"topic": "AI"}), + ) + monkeypatch.setattr( + "crewai_cli.run_crew._prepare_json_crew_for_tui", + lambda loaded_crew: prepared.append(loaded_crew), + ) + + app_cls, loaded_crew, default_inputs, task_names, agent_names = ( + _load_json_crew_for_tui(Path("crew.jsonc")) + ) + + assert app_cls is FakeApp + assert loaded_crew is crew + assert default_inputs == {"topic": "AI"} + assert task_names == ["research_task", "Write summary for developers"] + assert agent_names == ["Researcher", "writer"] + assert prepared == [crew] diff --git a/lib/crewai/tests/knowledge/test_knowledge.py b/lib/crewai/tests/knowledge/test_knowledge.py index a736b3050..7dd9cade3 100644 --- a/lib/crewai/tests/knowledge/test_knowledge.py +++ b/lib/crewai/tests/knowledge/test_knowledge.py @@ -531,6 +531,7 @@ def test_docling_source(mock_vector_db): @pytest.mark.vcr +@pytest.mark.timeout(180) def test_multiple_docling_sources() -> None: urls: list[Path | str] = [ "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/", diff --git a/lib/crewai/tests/llms/bedrock/test_bedrock.py b/lib/crewai/tests/llms/bedrock/test_bedrock.py index d7421e852..d48334d31 100644 --- a/lib/crewai/tests/llms/bedrock/test_bedrock.py +++ b/lib/crewai/tests/llms/bedrock/test_bedrock.py @@ -383,6 +383,7 @@ def test_bedrock_completion_with_tools(): assert len(call_kwargs['tools']) > 0 +@pytest.mark.timeout(180) def test_bedrock_raises_error_when_model_not_found(bedrock_mocks): """Test that BedrockCompletion raises appropriate error when model not found""" from botocore.exceptions import ClientError diff --git a/lib/crewai/tests/memory/test_dimension_mismatch.py b/lib/crewai/tests/memory/test_dimension_mismatch.py new file mode 100644 index 000000000..d4f3310e4 --- /dev/null +++ b/lib/crewai/tests/memory/test_dimension_mismatch.py @@ -0,0 +1,159 @@ +"""Embedding dimension mismatch must fail loudly with migration guidance. + +The default embedder changed from text-embedding-3-small (1536 dims) to +text-embedding-3-large (3072 dims); stores created before the upgrade must +not silently zero-fill vectors or return empty search results. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from crewai.memory.storage.backend import EmbeddingDimensionMismatchError +from crewai.memory.types import MemoryRecord + + +@pytest.fixture +def lancedb_path(tmp_path: Path) -> Path: + return tmp_path / "mem" + + +def _record(dim: int, content: str = "test") -> MemoryRecord: + return MemoryRecord(content=content, scope="/foo", embedding=[0.1] * dim) + + +def test_lancedb_save_mismatch_raises(lancedb_path: Path) -> None: + from crewai.memory.storage.lancedb_storage import LanceDBStorage + + storage = LanceDBStorage(path=str(lancedb_path), vector_dim=4) + storage.save([_record(4)]) + + with pytest.raises(EmbeddingDimensionMismatchError) as exc_info: + storage.save([_record(8, "new embedder output")]) + + message = str(exc_info.value) + assert "4-dimensional" in message + assert "8-dimensional" in message + assert "crewai reset-memories --memory" in message + assert "text-embedding-3-small" in message + + +def test_lancedb_mixed_batch_mismatch_raises(lancedb_path: Path) -> None: + """A single save() batch with inconsistent dimensions must be rejected.""" + from crewai.memory.storage.lancedb_storage import LanceDBStorage + + storage = LanceDBStorage(path=str(lancedb_path), vector_dim=4) + storage.save([_record(4)]) + + with pytest.raises(EmbeddingDimensionMismatchError): + storage.save([_record(4), _record(8, "stray dimension")]) + + +def test_lancedb_mixed_batch_on_fresh_store_raises(lancedb_path: Path) -> None: + from crewai.memory.storage.lancedb_storage import LanceDBStorage + + storage = LanceDBStorage(path=str(lancedb_path)) + with pytest.raises(EmbeddingDimensionMismatchError): + storage.save([_record(4), _record(8)]) + + +def test_lancedb_search_mismatch_raises(lancedb_path: Path) -> None: + from crewai.memory.storage.lancedb_storage import LanceDBStorage + + storage = LanceDBStorage(path=str(lancedb_path), vector_dim=4) + storage.save([_record(4)]) + + with pytest.raises(EmbeddingDimensionMismatchError): + storage.search([0.1] * 8) + + +def test_lancedb_update_mismatch_raises(lancedb_path: Path) -> None: + from crewai.memory.storage.lancedb_storage import LanceDBStorage + + storage = LanceDBStorage(path=str(lancedb_path), vector_dim=4) + record = _record(4) + storage.save([record]) + + stale = MemoryRecord( + id=record.id, content="updated", scope="/foo", embedding=[0.1] * 8 + ) + with pytest.raises(EmbeddingDimensionMismatchError): + storage.update(stale) + + +def test_lancedb_reopened_store_detects_mismatch(lancedb_path: Path) -> None: + """The upgrade scenario: an old store reopened with a new embedder.""" + from crewai.memory.storage.lancedb_storage import LanceDBStorage + + old = LanceDBStorage(path=str(lancedb_path), vector_dim=4) + old.save([_record(4)]) + + reopened = LanceDBStorage(path=str(lancedb_path)) + with pytest.raises(EmbeddingDimensionMismatchError): + reopened.save([_record(8)]) + with pytest.raises(EmbeddingDimensionMismatchError): + reopened.search([0.1] * 8) + + +def test_lancedb_matching_dim_still_works(lancedb_path: Path) -> None: + from crewai.memory.storage.lancedb_storage import LanceDBStorage + + storage = LanceDBStorage(path=str(lancedb_path), vector_dim=4) + storage.save([_record(4)]) + storage.save([_record(4, "second")]) + + assert len(storage.search([0.1] * 4, limit=5)) == 2 + + +def test_error_is_not_a_runtime_error() -> None: + """Background-save plumbing treats RuntimeError as executor shutdown and + silently drops the save — the mismatch must not be classified that way.""" + err = EmbeddingDimensionMismatchError(1536, 3072) + assert not isinstance(err, RuntimeError) + assert isinstance(err, ValueError) + + +def test_background_save_propagates_dimension_mismatch(tmp_path: Path) -> None: + from unittest.mock import MagicMock + + from crewai.memory.unified_memory import Memory + + mem = Memory( + storage=str(tmp_path / "db"), + llm=MagicMock(), + embedder=lambda texts: [[0.1] * 4 for _ in texts], + ) + + def raise_mismatch(*_args: object, **_kwargs: object) -> None: + raise EmbeddingDimensionMismatchError(1536, 3072) + + mem._encode_batch = raise_mismatch # type: ignore[method-assign] + + with pytest.raises(EmbeddingDimensionMismatchError): + mem._background_encode_batch(["content"], None, None, None, None, None, False, None) + + +def test_background_save_still_swallows_shutdown_runtime_error(tmp_path: Path) -> None: + from unittest.mock import MagicMock + + from crewai.memory.unified_memory import Memory + + mem = Memory( + storage=str(tmp_path / "db"), + llm=MagicMock(), + embedder=lambda texts: [[0.1] * 4 for _ in texts], + ) + + def raise_shutdown(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("cannot schedule new futures after shutdown") + + mem._encode_batch = raise_shutdown # type: ignore[method-assign] + + assert ( + mem._background_encode_batch( + ["content"], None, None, None, None, None, False, None + ) + == [] + ) diff --git a/lib/crewai/tests/memory/test_memory_root_scope.py b/lib/crewai/tests/memory/test_memory_root_scope.py index d7594ab42..567384a34 100644 --- a/lib/crewai/tests/memory/test_memory_root_scope.py +++ b/lib/crewai/tests/memory/test_memory_root_scope.py @@ -409,6 +409,36 @@ class TestCrewAutoScoping: assert crew._memory is not None assert hasattr(crew._memory, "root_scope") assert crew._memory.root_scope == "/crew/research-crew" + assert crew._memory.llm is agent.llm + + def test_crew_memory_true_prefers_chat_llm(self) -> None: + """Auto-created crew memory uses chat_llm when configured.""" + from crewai.agent import Agent + from crewai.crew import Crew + from crewai.task import Task + + agent = Agent( + role="Researcher", + goal="Research", + backstory="Expert researcher", + llm="openai/gpt-4o-mini", + ) + task = Task( + description="Do research", + expected_output="Report", + agent=agent, + ) + + crew = Crew( + name="Research Crew", + agents=[agent], + tasks=[task], + chat_llm="ollama/llama3", + memory=True, + ) + + assert crew._memory is not None + assert crew._memory.llm == "ollama/llama3" def test_crew_memory_instance_preserves_no_root_scope( self, tmp_path: Path, mock_embedder: MagicMock diff --git a/lib/crewai/tests/memory/test_unified_memory.py b/lib/crewai/tests/memory/test_unified_memory.py index 776c36a2c..665893233 100644 --- a/lib/crewai/tests/memory/test_unified_memory.py +++ b/lib/crewai/tests/memory/test_unified_memory.py @@ -4,6 +4,7 @@ from __future__ import annotations from datetime import datetime, timedelta from pathlib import Path +import threading from unittest.mock import MagicMock import pytest @@ -489,8 +490,8 @@ def test_composite_score_reranks_results( """Same semantic score: high-importance recent memory ranks first.""" from crewai.memory.unified_memory import Memory - # Use same dim as default LanceDB (1536) so storage does not overwrite embedding - emb = [0.1] * 1536 + # Use same dim as default LanceDB (3072) so storage does not overwrite embedding + emb = [0.1] * 3072 mem = Memory( storage=str(tmp_path / "rerank_db"), llm=MagicMock(), @@ -974,6 +975,42 @@ def test_recall_drains_pending_writes(tmp_path: Path, mock_embedder: MagicMock) assert "Python" in matches[0].record.content +def test_drain_writes_reports_background_save_failure_without_raising( + tmp_path: Path, mock_embedder: MagicMock +) -> None: + """Background memory failures should be reported without failing cleanup.""" + from crewai.events.event_bus import crewai_event_bus + from crewai.events.types.memory_events import MemorySaveFailedEvent + from crewai.memory.unified_memory import Memory + + failure_seen = threading.Event() + failures: list[MemorySaveFailedEvent] = [] + mem = Memory( + storage=str(tmp_path / "db"), + llm=MagicMock(), + embedder=mock_embedder, + ) + + def fail_save() -> None: + raise ValueError("invalid model ID") + + with crewai_event_bus.scoped_handlers(): + + @crewai_event_bus.on(MemorySaveFailedEvent) + def on_memory_save_failed(_source, event): + failures.append(event) + failure_seen.set() + + mem._submit_save(fail_save) + mem.drain_writes() + + assert failure_seen.wait(timeout=2) + + assert failures + assert failures[0].value == "background save" + assert failures[0].error == "invalid model ID" + + def test_close_drains_and_shuts_down(tmp_path: Path, mock_embedder: MagicMock) -> None: """close() should drain pending saves and shut down the pool.""" from crewai.memory.unified_memory import Memory diff --git a/lib/crewai/tests/project/test_crew_loader.py b/lib/crewai/tests/project/test_crew_loader.py new file mode 100644 index 000000000..04af84372 --- /dev/null +++ b/lib/crewai/tests/project/test_crew_loader.py @@ -0,0 +1,424 @@ +"""Tests for crewai.project.crew_loader.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from crewai.llms.base_llm import BaseLLM +from crewai.project.json_loader import JSONProjectError, JSONProjectValidationError +from crewai.project.crew_loader import load_crew + + +def _write_agent(agents_dir: Path, name: str, **overrides) -> Path: + defn = { + "role": f"{name} role", + "goal": f"{name} goal", + "backstory": f"{name} backstory", + } + defn.update(overrides) + f = agents_dir / f"{name}.jsonc" + f.write_text(json.dumps(defn)) + return f + + +def _write_crew(project_dir: Path, crew_def: dict) -> Path: + f = project_dir / "crew.jsonc" + f.write_text(json.dumps(crew_def)) + return f + + +class TestLoadCrew: + def test_minimal_crew(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "researcher") + + crew_def = { + "name": "test_crew", + "agents": ["researcher"], + "tasks": [ + { + "name": "research", + "description": "Do research", + "expected_output": "Research findings", + "agent": "researcher", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, inputs = load_crew(crew_file) + assert crew.name == "test_crew" + assert len(crew.agents) == 1 + assert len(crew.tasks) == 1 + assert crew.tasks[0].description == "Do research" + assert inputs == {} + + def test_crew_with_default_inputs(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "researcher") + + crew_def = { + "name": "test_crew", + "agents": ["researcher"], + "tasks": [ + { + "name": "research", + "description": "Research {topic}", + "expected_output": "Findings about {topic}", + "agent": "researcher", + } + ], + "inputs": {"topic": "AI"}, + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, inputs = load_crew(crew_file) + assert inputs == {"topic": "AI"} + + def test_crew_with_multiple_agents(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "researcher") + _write_agent(agents_dir, "writer") + + crew_def = { + "name": "multi_crew", + "agents": ["researcher", "writer"], + "tasks": [ + { + "name": "research", + "description": "Do research", + "expected_output": "Findings", + "agent": "researcher", + }, + { + "name": "write", + "description": "Write report", + "expected_output": "Report", + "agent": "writer", + "context": ["research"], + }, + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file) + assert len(crew.agents) == 2 + assert len(crew.tasks) == 2 + # Second task should have context referencing first task + assert crew.tasks[1].context is not None + assert len(crew.tasks[1].context) == 1 + + def test_crew_hierarchical_process(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "worker") + + crew_def = { + "name": "hier_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Do work", + "expected_output": "Work done", + "agent": "worker", + } + ], + "process": "hierarchical", + "manager_llm": "openai/gpt-4o", + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file) + from crewai import Process + assert crew.process == Process.hierarchical + + def test_crew_accepts_llm_config_objects(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "worker", llm="ollama/llama3") + + crew_def = { + "name": "llm_config_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Do work", + "expected_output": "Work done", + "agent": "worker", + } + ], + "process": "hierarchical", + "manager_llm": { + "model": "llama3", + "provider": "ollama", + "base_url": "http://localhost:11434", + }, + "planning_llm": { + "model": "deepseek-chat", + "provider": "deepseek", + "api_key": "test-key", + }, + "chat_llm": { + "model": "openrouter/anthropic/claude-3-opus", + "api_key": "test-key", + }, + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file) + + assert isinstance(crew.manager_llm, BaseLLM) + assert crew.manager_llm.model == "llama3" + assert crew.manager_llm.provider == "ollama" + assert crew.manager_llm.base_url == "http://localhost:11434/v1" + assert isinstance(crew.planning_llm, BaseLLM) + assert crew.planning_llm.model == "deepseek-chat" + assert crew.planning_llm.provider == "deepseek" + assert isinstance(crew.chat_llm, BaseLLM) + assert crew.chat_llm.model == "anthropic/claude-3-opus" + assert crew.chat_llm.provider == "openrouter" + + def test_crew_accepts_public_crew_config_fields(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "worker") + + crew_def = { + "name": "config_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Do work", + "expected_output": "Work done", + "agent": "worker", + } + ], + "cache": False, + "max_rpm": 12, + "planning": True, + "planning_llm": "openai/gpt-4o-mini", + "share_crew": False, + "output_log_file": "crew.log", + "tracing": False, + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file) + assert crew.cache is False + assert crew.max_rpm == 12 + assert crew.planning is True + assert crew.planning_llm == "openai/gpt-4o-mini" + assert crew.output_log_file == "crew.log" + assert crew.tracing is False + + def test_crew_with_output_file(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "writer") + + crew_def = { + "name": "output_crew", + "agents": ["writer"], + "tasks": [ + { + "name": "write", + "description": "Write something", + "expected_output": "Written content", + "agent": "writer", + "output_file": "output.md", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file) + assert crew.tasks[0].output_file == "output.md" + + def test_task_accepts_public_task_config_fields(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "writer") + + schema = { + "title": "ReportOutput", + "type": "object", + "properties": { + "summary": {"type": "string"}, + }, + "required": ["summary"], + } + crew_def = { + "name": "task_config_crew", + "agents": ["writer"], + "tasks": [ + { + "name": "write", + "description": "Write something", + "expected_output": "Written content", + "agent": "writer", + "output_json": schema, + "response_model": schema, + "create_directory": False, + "human_input": True, + "markdown": True, + "guardrail": "Return a summary field.", + "guardrail_max_retries": 1, + "allow_crewai_trigger_context": False, + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file) + task = crew.tasks[0] + assert task.output_json is not None + assert "summary" in task.output_json.model_fields + assert task.response_model is not None + assert task.create_directory is False + assert task.human_input is True + assert task.markdown is True + assert task.guardrail == "Return a summary field." + assert task.allow_crewai_trigger_context is False + + def test_missing_agent_file_raises(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + + crew_def = { + "name": "broken_crew", + "agents": ["nonexistent"], + "tasks": [], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(FileNotFoundError, match="nonexistent"): + load_crew(crew_file) + + def test_task_references_unknown_agent_raises(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "researcher") + + crew_def = { + "name": "bad_ref_crew", + "agents": ["researcher"], + "tasks": [ + { + "name": "task1", + "description": "Do something", + "expected_output": "Something", + "agent": "unknown_agent", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectError, match="unknown_agent"): + load_crew(crew_file) + + def test_task_context_order_dependency(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "worker") + + crew_def = { + "name": "order_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "task2", + "description": "Second task", + "expected_output": "Output", + "agent": "worker", + "context": ["task1"], + }, + { + "name": "task1", + "description": "First task", + "expected_output": "Output", + "agent": "worker", + }, + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectError, match="task1"): + load_crew(crew_file) + + def test_runtime_fields_are_rejected(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "worker") + + crew_def = { + "name": "bad_runtime_crew", + "id": "00000000-0000-4000-8000-000000000000", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Work", + "expected_output": "Done", + "agent": "worker", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectValidationError, match="runtime-only"): + load_crew(crew_file) + + def test_custom_agents_dir(self, tmp_path: Path): + custom_dir = tmp_path / "my_agents" + custom_dir.mkdir() + _write_agent(custom_dir, "analyst") + + crew_def = { + "name": "custom_dir_crew", + "agents": ["analyst"], + "tasks": [ + { + "name": "analyze", + "description": "Analyze data", + "expected_output": "Analysis", + "agent": "analyst", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file, agents_dir=custom_dir) + assert len(crew.agents) == 1 + + def test_crew_verbose_and_memory_flags(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "worker") + + crew_def = { + "name": "flags_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Work", + "expected_output": "Done", + "agent": "worker", + } + ], + "verbose": True, + "memory": True, + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file) + assert crew.verbose is True diff --git a/lib/crewai/tests/project/test_json_loader.py b/lib/crewai/tests/project/test_json_loader.py new file mode 100644 index 000000000..be8a034d1 --- /dev/null +++ b/lib/crewai/tests/project/test_json_loader.py @@ -0,0 +1,465 @@ +"""Tests for crewai.project.json_loader.""" + +from __future__ import annotations + +import json +from pathlib import Path +import sys + +import pytest + +from crewai.llms.base_llm import BaseLLM +from crewai.project.json_loader import ( + JSONProjectValidationError, + find_json_project_file, + load_agent, + strip_jsonc_comments, +) + + +class TestStripJsoncComments: + def test_strips_single_line_comments(self): + text = '{\n "key": "value" // this is a comment\n}' + result = strip_jsonc_comments(text) + data = json.loads(result) + assert data["key"] == "value" + + def test_strips_block_comments(self): + text = '{\n /* block comment */\n "key": "value"\n}' + result = strip_jsonc_comments(text) + data = json.loads(result) + assert data["key"] == "value" + + def test_preserves_urls_with_double_slash(self): + text = '{\n "url": "https://example.com"\n}' + result = strip_jsonc_comments(text) + data = json.loads(result) + assert data["url"] == "https://example.com" + + def test_preserves_comment_markers_inside_strings(self): + text = """{ + "url": "https://example.com/a//b", + "pattern": "keep /* this */ text", + "text": "value // not a comment", +}""" + result = strip_jsonc_comments(text) + data = json.loads(result) + assert data["url"] == "https://example.com/a//b" + assert data["pattern"] == "keep /* this */ text" + assert data["text"] == "value // not a comment" + + def test_removes_trailing_commas(self): + text = '{\n "a": 1,\n "b": 2,\n}' + result = strip_jsonc_comments(text) + data = json.loads(result) + assert data == {"a": 1, "b": 2} + + def test_removes_trailing_commas_in_arrays(self): + text = '{"arr": [1, 2, 3,]}' + result = strip_jsonc_comments(text) + data = json.loads(result) + assert data["arr"] == [1, 2, 3] + + def test_plain_json_unchanged(self): + text = '{"key": "value"}' + result = strip_jsonc_comments(text) + assert json.loads(result) == {"key": "value"} + + +def test_find_json_project_file_prefers_jsonc(tmp_path: Path): + (tmp_path / "agent.json").write_text("{}") + jsonc_path = tmp_path / "agent.jsonc" + jsonc_path.write_text("{}") + + assert find_json_project_file(tmp_path, "agent") == jsonc_path + + +class TestLoadAgent: + def test_load_minimal_agent(self, tmp_path: Path): + agent_def = { + "role": "Researcher", + "goal": "Find information", + "backstory": "Expert researcher.", + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + assert agent.role == "Researcher" + assert agent.goal == "Find information" + assert agent.backstory == "Expert researcher." + + def test_load_agent_with_llm(self, tmp_path: Path): + agent_def = { + "role": "Coder", + "goal": "Write code", + "backstory": "Expert coder.", + "llm": "openai/gpt-4o", + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + assert agent.role == "Coder" + + def test_load_agent_with_llm_config_object(self, tmp_path: Path): + agent_def = { + "role": "Coder", + "goal": "Write code", + "backstory": "Expert coder.", + "llm": { + "model": "llama3", + "provider": "ollama", + "temperature": 0.2, + "base_url": "http://localhost:11434", + }, + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + + assert isinstance(agent.llm, BaseLLM) + assert agent.llm.model == "llama3" + assert agent.llm.provider == "ollama" + assert agent.llm.temperature == 0.2 + assert agent.llm.base_url == "http://localhost:11434/v1" + + def test_load_agent_with_planning_config_llm_object(self, tmp_path: Path): + agent_def = { + "role": "Planner", + "goal": "Plan work", + "backstory": "Expert planner.", + "llm": "ollama/llama3", + "planning_config": { + "reasoning_effort": "high", + "llm": { + "model": "deepseek-chat", + "provider": "deepseek", + "api_key": "test-key", + }, + }, + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + + assert agent.planning_config is not None + assert isinstance(agent.planning_config.llm, BaseLLM) + assert agent.planning_config.llm.model == "deepseek-chat" + assert agent.planning_config.llm.provider == "deepseek" + assert agent.planning_config.llm.api_key == "test-key" + + def test_load_agent_with_settings_block(self, tmp_path: Path): + agent_def = { + "role": "Analyst", + "goal": "Analyze data", + "backstory": "Data expert.", + "settings": { + "verbose": True, + "allow_delegation": True, + "max_iter": 10, + "cache": False, + }, + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + assert agent.role == "Analyst" + assert agent.verbose is True + assert agent.allow_delegation is True + assert agent.max_iter == 10 + assert agent.cache is False + + def test_load_agent_with_top_level_settings(self, tmp_path: Path): + agent_def = { + "role": "Analyst", + "goal": "Analyze data", + "backstory": "Data expert.", + "verbose": True, + "max_iter": 15, + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + assert agent.verbose is True + assert agent.max_iter == 15 + + def test_load_agent_accepts_public_agent_config_fields(self, tmp_path: Path): + agent_def = { + "role": "Analyst", + "goal": "Analyze data", + "backstory": "Data expert.", + "max_execution_time": 30, + "use_system_prompt": False, + "system_template": "system: {{ .System }}", + "prompt_template": "prompt: {{ .Prompt }}", + "response_template": "response: {{ .Response }}", + "inject_date": True, + "date_format": "%Y", + "guardrail": "Only return concise answers.", + "guardrail_max_retries": 1, + "security_config": {"fingerprint": "agent-seed"}, + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + assert agent.max_execution_time == 30 + assert agent.use_system_prompt is False + assert agent.system_template == "system: {{ .System }}" + assert agent.inject_date is True + assert agent.guardrail == "Only return concise answers." + + def test_load_agent_accepts_serialized_tool_dict( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + module = tmp_path / "test_tools.py" + module.write_text( + "from crewai.tools.base_tool import BaseTool\n" + "class EchoTool(BaseTool):\n" + " name: str = 'echo'\n" + " description: str = 'Echo input'\n" + " def _run(self, value: str = '') -> str:\n" + " return value\n" + ) + monkeypatch.syspath_prepend(str(tmp_path)) + sys.modules.pop("test_tools", None) + + agent_def = { + "role": "Tool User", + "goal": "Use tools", + "backstory": "Tool expert.", + "tools": [ + { + "tool_type": "test_tools.EchoTool", + "name": "echo", + "description": "Echo input", + } + ], + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + assert len(agent.tools or []) == 1 + assert agent.tools[0].name == "echo" + + def test_load_agent_rejects_runtime_fields(self, tmp_path: Path): + agent_def = { + "id": "00000000-0000-4000-8000-000000000000", + "role": "Analyst", + "goal": "Analyze data", + "backstory": "Data expert.", + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + with pytest.raises(JSONProjectValidationError, match="runtime-only"): + load_agent(agent_file) + + def test_settings_block_takes_precedence(self, tmp_path: Path): + agent_def = { + "role": "Analyst", + "goal": "Analyze data", + "backstory": "Data expert.", + "verbose": False, + "settings": { + "verbose": True, + }, + } + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + agent = load_agent(agent_file) + assert agent.verbose is True + + def test_load_agent_from_jsonc(self, tmp_path: Path): + jsonc_content = """{ + // This is a JSONC file with comments + "role": "Writer", + "goal": "Write articles", + "backstory": "Expert writer.", + /* multi-line + comment */ +}""" + agent_file = tmp_path / "agent.jsonc" + agent_file.write_text(jsonc_content) + + agent = load_agent(agent_file) + assert agent.role == "Writer" + + def test_load_agent_missing_required_fields(self, tmp_path: Path): + agent_def = {"role": "Incomplete"} + agent_file = tmp_path / "agent.json" + agent_file.write_text(json.dumps(agent_def)) + + with pytest.raises(Exception): + load_agent(agent_file) + + def test_load_agent_file_not_found(self): + with pytest.raises(FileNotFoundError): + load_agent(Path("/nonexistent/agent.json")) + + +class TestResolveTools: + def test_unknown_tool_raises_with_guidance(self): + from crewai.project.json_loader import JSONProjectError, _resolve_tools + + with pytest.raises(JSONProjectError, match="Unknown tool 'NotARealToolXYZ'"): + _resolve_tools(["NotARealToolXYZ"]) + + def test_missing_custom_tool_raises(self, tmp_path, monkeypatch): + from crewai.project.json_loader import JSONProjectError, _resolve_tools + + monkeypatch.chdir(tmp_path) + with pytest.raises(JSONProjectError, match="custom:missing"): + _resolve_tools(["custom:missing"]) + + def test_custom_tool_without_basetool_subclass_raises(self, tmp_path, monkeypatch): + from crewai.project.json_loader import JSONProjectError, _resolve_tools + + monkeypatch.chdir(tmp_path) + tools_dir = tmp_path / "tools" + tools_dir.mkdir() + (tools_dir / "empty.py").write_text("x = 1\n") + + with pytest.raises(JSONProjectError, match="No BaseTool subclass"): + _resolve_tools(["custom:empty"]) + + def test_custom_tool_resolves(self, tmp_path, monkeypatch): + from crewai.project.json_loader import _resolve_tools + + monkeypatch.chdir(tmp_path) + tools_dir = tmp_path / "tools" + tools_dir.mkdir() + (tools_dir / "echo.py").write_text( + "from crewai.tools.base_tool import BaseTool\n" + "\n" + "class EchoTool(BaseTool):\n" + " name: str = 'echo'\n" + " description: str = 'echo input'\n" + "\n" + " def _run(self, text: str) -> str:\n" + " return text\n" + ) + + tools = _resolve_tools(["custom:echo"]) + + assert len(tools) == 1 + assert tools[0].name == "echo" + + def test_serialized_tool_dicts_pass_through(self): + from crewai.project.json_loader import _resolve_tools + + spec = {"tool_type": "some.module.Tool"} + assert _resolve_tools([spec]) == [spec] + + +class TestValidationDoesNotExecuteTools: + def _write_project(self, root, tool_line='"custom:landmine"'): + agents_dir = root / "agents" + agents_dir.mkdir() + (agents_dir / "worker.jsonc").write_text( + "{\n" + ' "role": "Worker",\n' + ' "goal": "Work",\n' + ' "backstory": "Works hard",\n' + f' "tools": [{tool_line}]\n' + "}\n" + ) + crew_path = root / "crew.jsonc" + crew_path.write_text( + "{\n" + ' "agents": ["worker"],\n' + ' "tasks": [\n' + ' {"name": "t1", "description": "Do work", ' + '"expected_output": "Done", "agent": "worker"}\n' + " ]\n" + "}\n" + ) + return crew_path + + def test_validate_does_not_execute_custom_tool_code(self, tmp_path): + from crewai.project.json_loader import validate_crew_project + + sentinel = tmp_path / "executed.txt" + tools_dir = tmp_path / "tools" + tools_dir.mkdir() + (tools_dir / "landmine.py").write_text( + f"open({str(sentinel)!r}, 'w').write('boom')\n" + ) + crew_path = self._write_project(tmp_path) + + project = validate_crew_project(crew_path, tmp_path / "agents") + + assert not sentinel.exists(), "validation must not execute tools/.py" + assert project.agent_names == ["worker"] + + def test_validate_reports_missing_custom_tool_file(self, tmp_path): + from crewai.project.json_loader import ( + JSONProjectValidationError, + validate_crew_project, + ) + + crew_path = self._write_project(tmp_path) + + with pytest.raises(JSONProjectValidationError) as exc_info: + validate_crew_project(crew_path, tmp_path / "agents") + + assert "custom:landmine" in str(exc_info.value) + assert "not found" in str(exc_info.value) + + def test_validate_reports_path_escaping_custom_tool(self, tmp_path): + from crewai.project.json_loader import ( + JSONProjectValidationError, + validate_crew_project, + ) + + crew_path = self._write_project(tmp_path, tool_line='"custom:../evil"') + + with pytest.raises(JSONProjectValidationError) as exc_info: + validate_crew_project(crew_path, tmp_path / "agents") + + assert "Invalid custom tool name" in str(exc_info.value) + + +class TestCustomToolPathSafety: + @pytest.mark.parametrize( + "bad_name", + ["../evil", "..", "sub/inner", "/etc/passwd", "a-b", "", "name.py"], + ) + def test_unsafe_names_rejected_at_runtime(self, bad_name, tmp_path, monkeypatch): + from crewai.project.json_loader import JSONProjectError, _resolve_tools + + monkeypatch.chdir(tmp_path) + with pytest.raises(JSONProjectError, match="Invalid custom tool name"): + _resolve_tools([f"custom:{bad_name}"]) + + def test_resolves_relative_to_project_root_not_cwd(self, tmp_path, monkeypatch): + from crewai.project.json_loader import _resolve_tools + + project_root = tmp_path / "project" + tools_dir = project_root / "tools" + tools_dir.mkdir(parents=True) + (tools_dir / "echo.py").write_text( + "from crewai.tools.base_tool import BaseTool\n" + "\n" + "class EchoTool(BaseTool):\n" + " name: str = 'echo'\n" + " description: str = 'echo input'\n" + "\n" + " def _run(self, text: str) -> str:\n" + " return text\n" + ) + elsewhere = tmp_path / "elsewhere" + elsewhere.mkdir() + monkeypatch.chdir(elsewhere) + + tools = _resolve_tools(["custom:echo"], project_root=project_root) + + assert len(tools) == 1 + assert tools[0].name == "echo" diff --git a/lib/crewai/tests/rag/embeddings/test_backward_compatibility.py b/lib/crewai/tests/rag/embeddings/test_backward_compatibility.py index d10a75cde..09c1e233d 100644 --- a/lib/crewai/tests/rag/embeddings/test_backward_compatibility.py +++ b/lib/crewai/tests/rag/embeddings/test_backward_compatibility.py @@ -46,6 +46,30 @@ class TestModelKeyBackwardCompatibility: ) assert provider.model_name == "text-embedding-3-large" + def test_openai_provider_ignores_chat_model_env(self, monkeypatch): + """Test OpenAI embeddings don't inherit the chat model env var.""" + monkeypatch.setenv("OPENAI_MODEL_NAME", "gpt-5.5") + monkeypatch.setenv("MODEL", "gpt-5.5") + monkeypatch.delenv("EMBEDDINGS_OPENAI_MODEL_NAME", raising=False) + + provider = OpenAIProvider(api_key="test-key") + + assert provider.model_name == "text-embedding-3-large" + + def test_azure_provider_ignores_openai_chat_model_env(self, monkeypatch): + """Test Azure embeddings don't inherit the OpenAI chat model env var.""" + monkeypatch.setenv("OPENAI_MODEL_NAME", "gpt-5.5") + monkeypatch.setenv("MODEL", "gpt-5.5") + monkeypatch.delenv("EMBEDDINGS_OPENAI_MODEL_NAME", raising=False) + monkeypatch.delenv("AZURE_OPENAI_MODEL_NAME", raising=False) + + provider = AzureProvider( + api_key="test-key", + deployment_id="test-deployment", + ) + + assert provider.model_name == "text-embedding-3-large" + def test_cohere_provider_accepts_model_key(self): """Test Cohere provider accepts 'model' as alias for 'model_name'.""" provider = CohereProvider( @@ -361,4 +385,4 @@ class TestLegacyConfigurationFormats: deployment_id="test-deployment", model="text-embedding-3-large", ) - assert provider.model_name == "text-embedding-3-large" \ No newline at end of file + assert provider.model_name == "text-embedding-3-large" diff --git a/lib/crewai/tests/telemetry/test_telemetry.py b/lib/crewai/tests/telemetry/test_telemetry.py index e0da60c37..a37bac6c7 100644 --- a/lib/crewai/tests/telemetry/test_telemetry.py +++ b/lib/crewai/tests/telemetry/test_telemetry.py @@ -5,7 +5,6 @@ from unittest.mock import patch import pytest from crewai import Agent, Crew, Task from crewai.telemetry import Telemetry -from opentelemetry import trace from opentelemetry.sdk.trace import TracerProvider @@ -85,9 +84,8 @@ def test_telemetry_fails_due_connect_timeout(export_mock, logger_mock): os.environ, {"CREWAI_DISABLE_TELEMETRY": "false", "OTEL_SDK_DISABLED": "false"} ): telemetry = Telemetry() - telemetry.set_tracer() - tracer = trace.get_tracer(__name__) + tracer = telemetry.provider.get_tracer(__name__) with tracer.start_as_current_span("test-span"): agent = Agent( role="agent", @@ -103,7 +101,7 @@ def test_telemetry_fails_due_connect_timeout(export_mock, logger_mock): crew = Crew(agents=[agent], tasks=[task], name="TestCrew") crew.kickoff() - trace.get_tracer_provider().force_flush() + telemetry.provider.force_flush() assert export_mock.called assert logger_mock.call_count == export_mock.call_count diff --git a/lib/crewai/tests/test_async_human_feedback.py b/lib/crewai/tests/test_async_human_feedback.py index 95eed50b7..74b17f6b2 100644 --- a/lib/crewai/tests/test_async_human_feedback.py +++ b/lib/crewai/tests/test_async_human_feedback.py @@ -1066,7 +1066,11 @@ class TestLLMObjectPreservedInContext: persistence = SQLiteFlowPersistence(db_path) from crewai.llm import LLM - mock_llm_obj = LLM(model="gemini-2.0-flash", provider="gemini") + mock_llm_obj = LLM( + model="llama3", + provider="ollama", + base_url="http://localhost:11434", + ) class PausingProvider: def __init__(self, persistence: SQLiteFlowPersistence): @@ -1116,19 +1120,19 @@ class TestLLMObjectPreservedInContext: assert provider.captured_context is not None assert isinstance(provider.captured_context.llm, dict) - assert provider.captured_context.llm["model"] == "gemini/gemini-2.0-flash" + assert provider.captured_context.llm["model"] == "ollama/llama3" flow_id = result.context.flow_id loaded = persistence.load_pending_feedback(flow_id) assert loaded is not None _, loaded_context = loaded assert isinstance(loaded_context.llm, dict) - assert loaded_context.llm["model"] == "gemini/gemini-2.0-flash" + assert loaded_context.llm["model"] == "ollama/llama3" flow2 = TestFlow.from_pending(flow_id, persistence) assert flow2._pending_feedback_context is not None assert isinstance(flow2._pending_feedback_context.llm, dict) - assert flow2._pending_feedback_context.llm["model"] == "gemini/gemini-2.0-flash" + assert flow2._pending_feedback_context.llm["model"] == "ollama/llama3" with patch.object(flow2, "_collapse_to_outcome", return_value="approved") as mock_collapse: flow2.resume("this looks good, proceed!") @@ -1140,7 +1144,7 @@ class TestLLMObjectPreservedInContext: assert call_kwargs.kwargs["outcomes"] == ["needs_changes", "approved"] # LLM should be a live object (from _human_feedback_llm) or reconstructed, not None assert call_kwargs.kwargs["llm"] is not None - assert getattr(call_kwargs.kwargs["llm"], "model", None) == "gemini-2.0-flash" + assert getattr(call_kwargs.kwargs["llm"], "model", None) == "llama3" assert flow2.last_human_feedback.outcome == "approved" assert flow2.result_path == "approved" @@ -1172,20 +1176,24 @@ class TestLLMObjectPreservedInContext: from crewai.flow.human_feedback import _serialize_llm_for_context from crewai.llm import LLM - llm = LLM(model="gemini-2.0-flash", provider="gemini") + llm = LLM( + model="llama3", + provider="ollama", + base_url="http://localhost:11434", + ) result = _serialize_llm_for_context(llm) assert isinstance(result, dict) - assert result["model"] == "gemini/gemini-2.0-flash" + assert result["model"] == "ollama/llama3" def test_provider_prefix_not_doubled_when_already_present(self) -> None: """Test that provider prefix is not added when model already has a slash.""" from crewai.flow.human_feedback import _serialize_llm_for_context from crewai.llm import LLM - llm = LLM(model="gemini/gemini-2.0-flash") + llm = LLM(model="ollama/llama3", base_url="http://localhost:11434") result = _serialize_llm_for_context(llm) assert isinstance(result, dict) - assert result["model"] == "gemini/gemini-2.0-flash" + assert result["model"] == "ollama/llama3" def test_no_provider_attr_falls_back_to_bare_model(self) -> None: """Test that objects without to_config_dict fall back to model string.""" diff --git a/lib/crewai/tests/test_human_feedback_decorator.py b/lib/crewai/tests/test_human_feedback_decorator.py index 2cb6c104d..d3dac0a22 100644 --- a/lib/crewai/tests/test_human_feedback_decorator.py +++ b/lib/crewai/tests/test_human_feedback_decorator.py @@ -597,7 +597,7 @@ class TestHumanFeedbackLearn: flow.memory.remember_many.assert_not_called() def test_learn_true_uses_default_llm(self): - """When learn=True and llm is not explicitly set, the default gpt-4o-mini is used.""" + """When learn=True and llm is not explicitly set, the default gpt-5.4-mini is used.""" @human_feedback(message="Review:", learn=True) def test_method(self): @@ -606,8 +606,8 @@ class TestHumanFeedbackLearn: config = test_method.__human_feedback_config__ assert config is not None assert config.learn is True - # llm defaults to "gpt-4o-mini" at the function level - assert config.llm == "gpt-4o-mini" + # llm defaults to "gpt-5.4-mini" at the function level + assert config.llm == "gpt-5.4-mini" def test_pre_review_failure_logs_and_returns_raw_output(self, caplog): """Pre-review LLM failure falls back to raw output AND logs a warning.""" diff --git a/lib/crewai/tests/test_human_feedback_integration.py b/lib/crewai/tests/test_human_feedback_integration.py index 5c07243e3..db978fedd 100644 --- a/lib/crewai/tests/test_human_feedback_integration.py +++ b/lib/crewai/tests/test_human_feedback_integration.py @@ -850,24 +850,22 @@ class TestLLMConfigPreservation: assert _deserialize_llm_from_context(None) is None def test_serialize_llm_preserves_provider_specific_fields(self): - """Test that provider-specific fields like project/location are serialized.""" + """Test that provider-specific fields like base_url are serialized.""" from crewai.flow.human_feedback import _serialize_llm_for_context from crewai.llm import LLM - # Create a Gemini-style LLM with project and non-default location llm = LLM( - model="gemini-2.0-flash", - provider="gemini", - project="my-project", - location="europe-west1", + model="llama3", + provider="ollama", + base_url="http://localhost:11434", temperature=0.3, ) serialized = _serialize_llm_for_context(llm) assert isinstance(serialized, dict) - assert serialized.get("project") == "my-project" - assert serialized.get("location") == "europe-west1" + assert serialized.get("model") == "ollama/llama3" + assert serialized.get("base_url") == "http://localhost:11434/v1" assert serialized.get("temperature") == 0.3 def test_config_preserved_through_full_flow_execution(self): diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index a386fd3cc..1c98d751e 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -463,6 +463,7 @@ def test_anthropic_message_formatting(anthropic_llm, system_message, user_messag anthropic_llm._format_messages_for_anthropic([{"invalid": "message"}]) +@pytest.mark.vcr() def test_deepseek_r1_with_open_router(): if not os.getenv("OPEN_ROUTER_API_KEY"): pytest.skip("OPEN_ROUTER_API_KEY not set; skipping test.") diff --git a/lib/crewai/tests/test_streaming_integration.py b/lib/crewai/tests/test_streaming_integration.py index f89fe7ff7..718720e98 100644 --- a/lib/crewai/tests/test_streaming_integration.py +++ b/lib/crewai/tests/test_streaming_integration.py @@ -247,8 +247,9 @@ class TestStreamingFlowIntegration: result = streaming.result assert result is not None - @pytest.mark.vcr() @pytest.mark.asyncio + @pytest.mark.timeout(180) + @pytest.mark.vcr() async def test_async_flow_streaming_from_docs(self) -> None: """Test async flow streaming example from documentation.""" diff --git a/lib/crewai/tests/utilities/test_llm_utils.py b/lib/crewai/tests/utilities/test_llm_utils.py index f6e17265a..34263452f 100644 --- a/lib/crewai/tests/utilities/test_llm_utils.py +++ b/lib/crewai/tests/utilities/test_llm_utils.py @@ -23,6 +23,49 @@ def test_create_llm_with_valid_model_string() -> None: assert llm.model == "gpt-4o" +def test_create_llm_with_config_dict() -> None: + with patch.dict(os.environ, {}, clear=True): + llm = create_llm( + llm_value={ + "model": "llama3", + "provider": "ollama", + "temperature": 0.2, + "base_url": "http://localhost:11434", + } + ) + + assert isinstance(llm, BaseLLM) + assert llm.model == "llama3" + assert llm.provider == "ollama" + assert llm.temperature == 0.2 + assert llm.base_url == "http://localhost:11434/v1" + + +@pytest.mark.parametrize( + ("model_key", "model_value"), + [ + ("model_name", "llama3"), + ("deployment_name", "custom-deployment"), + ], +) +def test_create_llm_with_config_dict_model_aliases( + model_key: str, + model_value: str, +) -> None: + with patch.dict(os.environ, {}, clear=True): + llm = create_llm( + llm_value={ + model_key: model_value, + "provider": "ollama", + "base_url": "http://localhost:11434", + } + ) + + assert isinstance(llm, BaseLLM) + assert llm.model == model_value + assert llm.provider == "ollama" + + def test_create_llm_with_invalid_model_string() -> None: with patch.dict(os.environ, {"OPENAI_API_KEY": "fake-key"}, clear=True): # For invalid model strings, create_llm succeeds but call() fails with API error diff --git a/lib/crewai/tests/utilities/test_planning_handler.py b/lib/crewai/tests/utilities/test_planning_handler.py index 045c8ba52..3bd2c8fc9 100644 --- a/lib/crewai/tests/utilities/test_planning_handler.py +++ b/lib/crewai/tests/utilities/test_planning_handler.py @@ -66,7 +66,7 @@ class TestInternalCrewPlanner: ), ) result = crew_planner._handle_crew_planning() - assert crew_planner.planning_agent_llm == "gpt-4o-mini" + assert crew_planner.planning_agent_llm == "gpt-5.4-mini" assert isinstance(result, PlannerTaskPydanticOutput) assert len(result.list_of_plans_per_task) == len(crew_planner.tasks) execute.assert_called_once()