Compare commits

...

4 Commits

Author SHA1 Message Date
Iris Clawd
3bd055fcf3 feat: add command allowlist validation for MCP stdio transport
Add an optional allowed_commands parameter to StdioTransport that
validates the command basename against an allowlist before spawning
a subprocess. This provides defense-in-depth against configuration-
driven command injection as MCP server discovery becomes more dynamic.

- DEFAULT_ALLOWED_COMMANDS includes common runtimes: python, python3,
  node, npx, uvx, uv, deno, docker
- Validation checks os.path.basename(command) for cross-platform support
- Users can extend the allowlist, pass a custom set, or set
  allowed_commands=None to disable the check entirely
- No breaking change: all currently documented MCP server examples use
  commands in the default allowlist
- MCPServerStdio config model updated with allowed_commands field
- tool_resolver passes allowed_commands through to StdioTransport

Closes #5080
2026-03-30 22:08:51 +00:00
Lorenze Jay
bb9bcd6823 refactor: remove unused and methods from (#5172)
This commit cleans up the  class by removing the  and  methods, which are no longer needed. The changes help streamline the code and improve maintainability.
2026-03-30 15:01:58 -07:00
Lucas Gomide
ac14b9127e fix: handle GPT-5.x models not supporting the stop API parameter (#5144)
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
GPT-5.x models reject the `stop` parameter at the API level with "Unsupported parameter: 'stop' is not supported with this model". This breaks CrewAI executions when routing through LiteLLM (e.g. via
OpenAI-compatible gateways like Asimov), because the LiteLLM fallback path always includes `stop` in the API request params.

The native OpenAI provider was unaffected because it never sends `stop` to the API — it applies stop words client-side via `_apply_stop_words()`. However, when the request goes through LiteLLM (custom endpoints, proxy gateways),
`stop` is sent as an API parameter and GPT-5.x rejects it.

Additionally, the existing retry logic that catches this error only matched the OpenAI API error format ("Unsupported parameter") but missed
LiteLLM's own pre-validation error format ("does not support parameters"), so the self-healing retry never triggered for LiteLLM-routed calls.
2026-03-30 11:36:51 -04:00
Thiago Moretto
98b7626784 feat: extract and publish tool metadata to AMP (#4298)
* Exporting tool's metadata to AMP - initial work

* Fix payload (nest under `tools` key)

* Remove debug message + code simplification

* Priting out detected tools

* Extract module name

* fix: address PR review feedback for tool metadata extraction

- Use sha256 instead of md5 for module name hashing (lint S324)
- Filter required list to match filtered properties in JSON schema

* fix: Use sha256 instead of md5 for module name hashing (lint S324)

- Add missing mocks to metadata extraction failure test

* style: fix ruff formatting

* fix: resolve mypy type errors in utils.py

* fix: address bot review feedback on tool metadata

- Use `is not None` instead of truthiness check so empty tools list
  is sent to the API rather than being silently dropped as None
- Strip __init__ suffix from module path for tools in __init__.py files
- Extend _unwrap_schema to handle function-before, function-wrap, and
  definitions wrapper types

* fix: capture env_vars declared with Field(default_factory=...)

When env_vars uses default_factory, pydantic stores a callable in the
schema instead of a static default value. Fall back to calling the
factory when no static default is present.

---------

Co-authored-by: Greyson LaLonde <greyson.r.lalonde@gmail.com>
2026-03-30 09:21:53 -04:00
19 changed files with 1246 additions and 91 deletions

View File

@@ -73,6 +73,7 @@ class PlusAPI:
description: str | None,
encoded_file: str,
available_exports: list[dict[str, Any]] | None = None,
tools_metadata: list[dict[str, Any]] | None = None,
) -> httpx.Response:
params = {
"handle": handle,
@@ -81,6 +82,9 @@ class PlusAPI:
"file": encoded_file,
"description": description,
"available_exports": available_exports,
"tools_metadata": {"package": handle, "tools": tools_metadata}
if tools_metadata is not None
else None,
}
return self._make_request("POST", f"{self.TOOLS_RESOURCE}", json=params)

View File

@@ -17,6 +17,7 @@ from crewai.cli.constants import DEFAULT_CREWAI_ENTERPRISE_URL
from crewai.cli.utils import (
build_env_with_tool_repository_credentials,
extract_available_exports,
extract_tools_metadata,
get_project_description,
get_project_name,
get_project_version,
@@ -101,6 +102,18 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
console.print(
f"[green]Found these tools to publish: {', '.join([e['name'] for e in available_exports])}[/green]"
)
console.print("[bold blue]Extracting tool metadata...[/bold blue]")
try:
tools_metadata = extract_tools_metadata()
except Exception as e:
console.print(
f"[yellow]Warning: Could not extract tool metadata: {e}[/yellow]\n"
f"Publishing will continue without detailed metadata."
)
tools_metadata = []
self._print_tools_preview(tools_metadata)
self._print_current_organization()
with tempfile.TemporaryDirectory() as temp_build_dir:
@@ -118,7 +131,7 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
"Project build failed. Please ensure that the command `uv build --sdist` completes successfully.",
style="bold red",
)
raise SystemExit
raise SystemExit(1)
tarball_path = os.path.join(temp_build_dir, tarball_filename)
with open(tarball_path, "rb") as file:
@@ -134,6 +147,7 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
description=project_description,
encoded_file=f"data:application/x-gzip;base64,{encoded_tarball}",
available_exports=available_exports,
tools_metadata=tools_metadata,
)
self._validate_response(publish_response)
@@ -246,6 +260,55 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
)
raise SystemExit
def _print_tools_preview(self, tools_metadata: list[dict[str, Any]]) -> None:
if not tools_metadata:
console.print("[yellow]No tool metadata extracted.[/yellow]")
return
console.print(
f"\n[bold]Tools to be published ({len(tools_metadata)}):[/bold]\n"
)
for tool in tools_metadata:
console.print(f" [bold cyan]{tool.get('name', 'Unknown')}[/bold cyan]")
if tool.get("module"):
console.print(f" Module: {tool.get('module')}")
console.print(f" Name: {tool.get('humanized_name', 'N/A')}")
console.print(
f" Description: {tool.get('description', 'N/A')[:80]}{'...' if len(tool.get('description', '')) > 80 else ''}"
)
init_params = tool.get("init_params_schema", {}).get("properties", {})
if init_params:
required = tool.get("init_params_schema", {}).get("required", [])
console.print(" Init parameters:")
for param_name, param_info in init_params.items():
param_type = param_info.get("type", "any")
is_required = param_name in required
req_marker = "[red]*[/red]" if is_required else ""
default = (
f" = {param_info['default']}" if "default" in param_info else ""
)
console.print(
f" - {param_name}: {param_type}{default} {req_marker}"
)
env_vars = tool.get("env_vars", [])
if env_vars:
console.print(" Environment variables:")
for env_var in env_vars:
req_marker = "[red]*[/red]" if env_var.get("required") else ""
default = (
f" (default: {env_var['default']})"
if env_var.get("default")
else ""
)
console.print(
f" - {env_var['name']}: {env_var.get('description', 'N/A')}{default} {req_marker}"
)
console.print()
def _print_current_organization(self) -> None:
settings = Settings()
if settings.org_uuid:

View File

@@ -1,10 +1,15 @@
from functools import reduce
from collections.abc import Generator, Mapping
from contextlib import contextmanager
from functools import lru_cache, reduce
import hashlib
import importlib.util
import inspect
from inspect import getmro, isclass, isfunction, ismethod
import os
from pathlib import Path
import shutil
import sys
import types
from typing import Any, cast, get_type_hints
import click
@@ -544,43 +549,62 @@ def build_env_with_tool_repository_credentials(
return env
@contextmanager
def _load_module_from_file(
init_file: Path, module_name: str | None = None
) -> Generator[types.ModuleType | None, None, None]:
"""
Context manager for loading a module from file with automatic cleanup.
Yields the loaded module or None if loading fails.
"""
if module_name is None:
module_name = (
f"temp_module_{hashlib.sha256(str(init_file).encode()).hexdigest()[:8]}"
)
spec = importlib.util.spec_from_file_location(module_name, init_file)
if not spec or not spec.loader:
yield None
return
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
try:
spec.loader.exec_module(module)
yield module
finally:
sys.modules.pop(module_name, None)
def _load_tools_from_init(init_file: Path) -> list[dict[str, Any]]:
"""
Load and validate tools from a given __init__.py file.
"""
spec = importlib.util.spec_from_file_location("temp_module", init_file)
if not spec or not spec.loader:
return []
module = importlib.util.module_from_spec(spec)
sys.modules["temp_module"] = module
try:
spec.loader.exec_module(module)
with _load_module_from_file(init_file) as module:
if module is None:
return []
if not hasattr(module, "__all__"):
console.print(
f"Warning: No __all__ defined in {init_file}",
style="bold yellow",
)
raise SystemExit(1)
return [
{
"name": name,
}
for name in module.__all__
if hasattr(module, name) and is_valid_tool(getattr(module, name))
]
if not hasattr(module, "__all__"):
console.print(
f"Warning: No __all__ defined in {init_file}",
style="bold yellow",
)
raise SystemExit(1)
return [
{"name": name}
for name in module.__all__
if hasattr(module, name) and is_valid_tool(getattr(module, name))
]
except SystemExit:
raise
except Exception as e:
console.print(f"[red]Warning: Could not load {init_file}: {e!s}[/red]")
raise SystemExit(1) from e
finally:
sys.modules.pop("temp_module", None)
def _print_no_tools_warning() -> None:
"""
@@ -610,3 +634,242 @@ def _print_no_tools_warning() -> None:
" # ... implementation\n"
" return result\n"
)
def extract_tools_metadata(dir_path: str = "src") -> list[dict[str, Any]]:
"""
Extract rich metadata from tool classes in the project.
Returns a list of tool metadata dictionaries containing:
- name: Class name
- humanized_name: From name field default
- description: From description field default
- run_params_schema: JSON Schema for _run() params (from args_schema)
- init_params_schema: JSON Schema for __init__ params (filtered)
- env_vars: List of environment variable dicts
"""
tools_metadata: list[dict[str, Any]] = []
for init_file in Path(dir_path).glob("**/__init__.py"):
tools = _extract_tool_metadata_from_init(init_file)
tools_metadata.extend(tools)
return tools_metadata
def _extract_tool_metadata_from_init(init_file: Path) -> list[dict[str, Any]]:
"""
Load module from init file and extract metadata from valid tool classes.
"""
from crewai.tools.base_tool import BaseTool
try:
with _load_module_from_file(init_file) as module:
if module is None:
return []
exported_names = getattr(module, "__all__", None)
if not exported_names:
return []
tools_metadata = []
for name in exported_names:
obj = getattr(module, name, None)
if obj is None or not (
inspect.isclass(obj) and issubclass(obj, BaseTool)
):
continue
if tool_info := _extract_single_tool_metadata(obj):
tools_metadata.append(tool_info)
return tools_metadata
except Exception as e:
console.print(
f"[yellow]Warning: Could not extract metadata from {init_file}: {e}[/yellow]"
)
return []
def _extract_single_tool_metadata(tool_class: type) -> dict[str, Any] | None:
"""
Extract metadata from a single tool class.
"""
try:
core_schema = cast(Any, tool_class).__pydantic_core_schema__
if not core_schema:
return None
schema = _unwrap_schema(core_schema)
fields = schema.get("schema", {}).get("fields", {})
try:
file_path = inspect.getfile(tool_class)
relative_path = Path(file_path).relative_to(Path.cwd())
module_path = relative_path.with_suffix("")
if module_path.parts[0] == "src":
module_path = Path(*module_path.parts[1:])
if module_path.name == "__init__":
module_path = module_path.parent
module = ".".join(module_path.parts)
except (TypeError, ValueError):
module = tool_class.__module__
return {
"name": tool_class.__name__,
"module": module,
"humanized_name": _extract_field_default(
fields.get("name"), fallback=tool_class.__name__
),
"description": str(
_extract_field_default(fields.get("description"))
).strip(),
"run_params_schema": _extract_run_params_schema(fields.get("args_schema")),
"init_params_schema": _extract_init_params_schema(tool_class),
"env_vars": _extract_env_vars(fields.get("env_vars")),
}
except Exception:
return None
def _unwrap_schema(schema: Mapping[str, Any] | dict[str, Any]) -> dict[str, Any]:
"""
Unwrap nested schema structures to get to the actual schema definition.
"""
result: dict[str, Any] = dict(schema)
while (
result.get("type")
in {"function-after", "function-before", "function-wrap", "default"}
and "schema" in result
):
result = dict(result["schema"])
if result.get("type") == "definitions" and "schema" in result:
result = dict(result["schema"])
return result
def _extract_field_default(
field: dict[str, Any] | None, fallback: str | list[Any] = ""
) -> str | list[Any] | int:
"""
Extract the default value from a field schema.
"""
if not field:
return fallback
schema = field.get("schema", {})
default = schema.get("default")
return default if isinstance(default, (list, str, int)) else fallback
@lru_cache(maxsize=1)
def _get_schema_generator() -> type:
"""Get a SchemaGenerator that omits non-serializable defaults."""
from pydantic.json_schema import GenerateJsonSchema
from pydantic_core import PydanticOmit
class SchemaGenerator(GenerateJsonSchema):
def handle_invalid_for_json_schema(
self, schema: Any, error_info: Any
) -> dict[str, Any]:
raise PydanticOmit
return SchemaGenerator
def _extract_run_params_schema(
args_schema_field: dict[str, Any] | None,
) -> dict[str, Any]:
"""
Extract JSON Schema for the tool's run parameters from args_schema field.
"""
from pydantic import BaseModel
if not args_schema_field:
return {}
args_schema_class = args_schema_field.get("schema", {}).get("default")
if not (
inspect.isclass(args_schema_class) and issubclass(args_schema_class, BaseModel)
):
return {}
try:
return args_schema_class.model_json_schema(
schema_generator=_get_schema_generator()
)
except Exception:
return {}
_IGNORED_INIT_PARAMS = frozenset(
{
"name",
"description",
"env_vars",
"args_schema",
"description_updated",
"cache_function",
"result_as_answer",
"max_usage_count",
"current_usage_count",
"package_dependencies",
}
)
def _extract_init_params_schema(tool_class: type) -> dict[str, Any]:
"""
Extract JSON Schema for the tool's __init__ parameters, filtering out base fields.
"""
try:
json_schema: dict[str, Any] = cast(Any, tool_class).model_json_schema(
schema_generator=_get_schema_generator(), mode="serialization"
)
filtered_properties = {
key: value
for key, value in json_schema.get("properties", {}).items()
if key not in _IGNORED_INIT_PARAMS
}
json_schema["properties"] = filtered_properties
if "required" in json_schema:
json_schema["required"] = [
key for key in json_schema["required"] if key in filtered_properties
]
return json_schema
except Exception:
return {}
def _extract_env_vars(env_vars_field: dict[str, Any] | None) -> list[dict[str, Any]]:
"""
Extract environment variable definitions from env_vars field.
"""
from crewai.tools.base_tool import EnvVar
if not env_vars_field:
return []
schema = env_vars_field.get("schema", {})
default = schema.get("default")
if default is None:
default_factory = schema.get("default_factory")
if callable(default_factory):
try:
default = default_factory()
except Exception:
default = []
if not isinstance(default, list):
return []
return [
{
"name": env_var.name,
"description": env_var.description,
"required": env_var.required,
"default": env_var.default,
}
for env_var in default
if isinstance(env_var, EnvVar)
]

View File

@@ -1966,37 +1966,6 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
"original_tool": original_tool,
}
def _extract_tool_name(self, tool_call: Any) -> str:
"""Extract tool name from various tool call formats."""
if hasattr(tool_call, "function"):
return sanitize_tool_name(tool_call.function.name)
if hasattr(tool_call, "function_call") and tool_call.function_call:
return sanitize_tool_name(tool_call.function_call.name)
if hasattr(tool_call, "name"):
return sanitize_tool_name(tool_call.name)
if isinstance(tool_call, dict):
func_info = tool_call.get("function", {})
return sanitize_tool_name(
func_info.get("name", "") or tool_call.get("name", "unknown")
)
return "unknown"
@router(execute_native_tool)
def check_native_todo_completion(
self,
) -> Literal["todo_satisfied", "todo_not_satisfied"]:
"""Check if the native tool execution satisfied the active todo.
Similar to check_todo_completion but for native tool execution path.
"""
current_todo = self.state.todos.current_todo
if not current_todo:
return "todo_not_satisfied"
# For native tools, any tool execution satisfies the todo
return "todo_satisfied"
@listen("initialized")
def continue_iteration(self) -> Literal["check_iteration"]:
"""Bridge listener that connects iteration loop back to iteration check."""

View File

@@ -753,7 +753,7 @@ class LLM(BaseLLM):
"temperature": self.temperature,
"top_p": self.top_p,
"n": self.n,
"stop": self.stop or None,
"stop": (self.stop or None) if self.supports_stop_words() else None,
"max_tokens": self.max_tokens or self.max_completion_tokens,
"presence_penalty": self.presence_penalty,
"frequency_penalty": self.frequency_penalty,
@@ -1825,9 +1825,11 @@ class LLM(BaseLLM):
# whether to summarize the content or abort based on the respect_context_window flag
raise
except Exception as e:
unsupported_stop = "Unsupported parameter" in str(
e
) and "'stop'" in str(e)
error_str = str(e)
unsupported_stop = "'stop'" in error_str and (
"Unsupported parameter" in error_str
or "does not support parameters" in error_str
)
if unsupported_stop:
if (
@@ -1961,9 +1963,11 @@ class LLM(BaseLLM):
except LLMContextLengthExceededError:
raise
except Exception as e:
unsupported_stop = "Unsupported parameter" in str(
e
) and "'stop'" in str(e)
error_str = str(e)
unsupported_stop = "'stop'" in error_str and (
"Unsupported parameter" in error_str
or "does not support parameters" in error_str
)
if unsupported_stop:
if (
@@ -2263,6 +2267,10 @@ class LLM(BaseLLM):
Note: This method is only used by the litellm fallback path.
Native providers override this method with their own implementation.
"""
model_lower = self.model.lower() if self.model else ""
if "gpt-5" in model_lower:
return False
if not LITELLM_AVAILABLE or get_supported_openai_params is None:
# When litellm is not available, assume stop words are supported
return True

View File

@@ -2245,6 +2245,9 @@ class OpenAICompletion(BaseLLM):
def supports_stop_words(self) -> bool:
"""Check if the model supports stop words."""
model_lower = self.model.lower() if self.model else ""
if "gpt-5" in model_lower:
return False
return not self.is_o1_model
def get_context_window_size(self) -> int:

View File

@@ -7,6 +7,7 @@ various transport types, similar to OpenAI's Agents SDK.
from pydantic import BaseModel, Field
from crewai.mcp.filters import ToolFilter
from crewai.mcp.transports.stdio import DEFAULT_ALLOWED_COMMANDS
class MCPServerStdio(BaseModel):
@@ -44,6 +45,14 @@ class MCPServerStdio(BaseModel):
default=None,
description="Optional tool filter for filtering available tools.",
)
allowed_commands: frozenset[str] | None = Field(
default=DEFAULT_ALLOWED_COMMANDS,
description=(
"Optional frozenset of allowed command basenames for security validation. "
"Defaults to common runtimes (python, node, npx, uvx, uv, deno, docker). "
"Set to None to disable the allowlist check."
),
)
cache_tools_list: bool = Field(
default=False,
description="Whether to cache the tool list for faster subsequent access.",

View File

@@ -292,6 +292,7 @@ class MCPToolResolver:
command=mcp_config.command,
args=mcp_config.args,
env=mcp_config.env,
allowed_commands=mcp_config.allowed_commands,
)
server_name = f"{mcp_config.command}_{'_'.join(mcp_config.args)}"
elif isinstance(mcp_config, MCPServerHTTP):

View File

@@ -3,11 +3,12 @@
from crewai.mcp.transports.base import BaseTransport, TransportType
from crewai.mcp.transports.http import HTTPTransport
from crewai.mcp.transports.sse import SSETransport
from crewai.mcp.transports.stdio import StdioTransport
from crewai.mcp.transports.stdio import DEFAULT_ALLOWED_COMMANDS, StdioTransport
__all__ = [
"BaseTransport",
"DEFAULT_ALLOWED_COMMANDS",
"HTTPTransport",
"SSETransport",
"StdioTransport",

View File

@@ -9,6 +9,22 @@ from typing_extensions import Self
from crewai.mcp.transports.base import BaseTransport, TransportType
# Default allowlist for common MCP server runtimes.
# Covers the vast majority of MCP server launch commands.
# Pass ``allowed_commands=None`` to disable validation entirely.
DEFAULT_ALLOWED_COMMANDS: frozenset[str] = frozenset(
{
"python",
"python3",
"node",
"npx",
"uvx",
"uv",
"deno",
"docker",
}
)
class StdioTransport(BaseTransport):
"""Stdio transport for connecting to local MCP servers.
@@ -34,6 +50,7 @@ class StdioTransport(BaseTransport):
command: str,
args: list[str] | None = None,
env: dict[str, str] | None = None,
allowed_commands: frozenset[str] | None = DEFAULT_ALLOWED_COMMANDS,
**kwargs: Any,
) -> None:
"""Initialize stdio transport.
@@ -42,9 +59,24 @@ class StdioTransport(BaseTransport):
command: Command to execute (e.g., "python", "node", "npx").
args: Command arguments (e.g., ["server.py"] or ["-y", "@mcp/server"]).
env: Environment variables to pass to the process.
allowed_commands: Optional frozenset of allowed command basenames.
Defaults to ``DEFAULT_ALLOWED_COMMANDS`` which includes common
runtimes (python, node, npx, uvx, uv, deno, docker). Pass
``None`` to disable the check entirely.
**kwargs: Additional transport options.
"""
super().__init__(**kwargs)
if allowed_commands is not None:
base_command = os.path.basename(command)
if base_command not in allowed_commands:
raise ValueError(
f"Command '{command}' is not in the allowed commands list: "
f"{sorted(allowed_commands)}. "
f"To allow this command, add it to allowed_commands or pass "
f"allowed_commands=None to disable this check."
)
self.command = command
self.args = args or []
self.env = env or {}

View File

@@ -879,30 +879,6 @@ class TestNativeToolExecution:
assert len(tool_messages) == 1
assert tool_messages[0]["tool_call_id"] == "call_1"
def test_check_native_todo_completion_requires_current_todo(
self, mock_dependencies
):
from crewai.utilities.planning_types import TodoList
executor = AgentExecutor(**mock_dependencies)
# No current todo → not satisfied
executor.state.todos = TodoList(items=[])
assert executor.check_native_todo_completion() == "todo_not_satisfied"
# With a current todo that has tool_to_use → satisfied
running = TodoItem(
step_number=1,
description="Use the expected tool",
tool_to_use="expected_tool",
status="running",
)
executor.state.todos = TodoList(items=[running])
assert executor.check_native_todo_completion() == "todo_satisfied"
# With a current todo without tool_to_use → still satisfied
running.tool_to_use = None
assert executor.check_native_todo_completion() == "todo_satisfied"
class TestPlannerObserver:

View File

@@ -0,0 +1,110 @@
interactions:
- request:
body: '{"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"gpt-5"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '89'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-raw-response:
- 'true'
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.2
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-DO4LcSpy72yIXCYSIVOQEXWNXydgn\",\n \"object\":
\"chat.completion\",\n \"created\": 1774628956,\n \"model\": \"gpt-5-2025-08-07\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"Paris.\",\n \"refusal\": null,\n
\ \"annotations\": []\n },\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 13,\n \"completion_tokens\":
11,\n \"total_tokens\": 24,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": null\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-Ray:
- 9e2fc5dce85582fb-GIG
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Fri, 27 Mar 2026 16:29:17 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
content-length:
- '772'
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1343'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
set-cookie:
- SET-COOKIE-XXX
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -136,6 +136,7 @@ class TestPlusAPI(unittest.TestCase):
"file": encoded_file,
"description": description,
"available_exports": None,
"tools_metadata": None,
}
mock_make_request.assert_called_once_with(
"POST", "/crewai_plus/api/v1/tools", json=params
@@ -173,6 +174,7 @@ class TestPlusAPI(unittest.TestCase):
"file": encoded_file,
"description": description,
"available_exports": None,
"tools_metadata": None,
}
self.assert_request_with_org_id(
@@ -201,6 +203,48 @@ class TestPlusAPI(unittest.TestCase):
"file": encoded_file,
"description": description,
"available_exports": None,
"tools_metadata": None,
}
mock_make_request.assert_called_once_with(
"POST", "/crewai_plus/api/v1/tools", json=params
)
self.assertEqual(response, mock_response)
@patch("crewai.cli.plus_api.PlusAPI._make_request")
def test_publish_tool_with_tools_metadata(self, mock_make_request):
mock_response = MagicMock()
mock_make_request.return_value = mock_response
handle = "test_tool_handle"
public = True
version = "1.0.0"
description = "Test tool description"
encoded_file = "encoded_test_file"
available_exports = [{"name": "MyTool"}]
tools_metadata = [
{
"name": "MyTool",
"humanized_name": "my_tool",
"description": "A test tool",
"run_params_schema": {"type": "object", "properties": {}},
"init_params_schema": {"type": "object", "properties": {}},
"env_vars": [{"name": "API_KEY", "description": "API key", "required": True, "default": None}],
}
]
response = self.api.publish_tool(
handle, public, version, description, encoded_file,
available_exports=available_exports,
tools_metadata=tools_metadata,
)
params = {
"handle": handle,
"public": public,
"version": version,
"file": encoded_file,
"description": description,
"available_exports": available_exports,
"tools_metadata": {"package": handle, "tools": tools_metadata},
}
mock_make_request.assert_called_once_with(
"POST", "/crewai_plus/api/v1/tools", json=params

View File

@@ -363,3 +363,290 @@ def test_get_crews_ignores_template_directories(
utils.get_crews()
assert not template_crew_detected
# Tests for extract_tools_metadata
def test_extract_tools_metadata_empty_project(temp_project_dir):
"""Test that extract_tools_metadata returns empty list for empty project."""
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert metadata == []
def test_extract_tools_metadata_no_init_file(temp_project_dir):
"""Test that extract_tools_metadata returns empty list when no __init__.py exists."""
(temp_project_dir / "some_file.py").write_text("print('hello')")
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert metadata == []
def test_extract_tools_metadata_empty_init_file(temp_project_dir):
"""Test that extract_tools_metadata returns empty list for empty __init__.py."""
create_init_file(temp_project_dir, "")
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert metadata == []
def test_extract_tools_metadata_no_all_variable(temp_project_dir):
"""Test that extract_tools_metadata returns empty list when __all__ is not defined."""
create_init_file(
temp_project_dir,
"from crewai.tools import BaseTool\n\nclass MyTool(BaseTool):\n pass",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert metadata == []
def test_extract_tools_metadata_valid_base_tool_class(temp_project_dir):
"""Test that extract_tools_metadata extracts metadata from a valid BaseTool class."""
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
class MyTool(BaseTool):
name: str = "my_tool"
description: str = "A test tool"
__all__ = ['MyTool']
""",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert len(metadata) == 1
assert metadata[0]["name"] == "MyTool"
assert metadata[0]["humanized_name"] == "my_tool"
assert metadata[0]["description"] == "A test tool"
def test_extract_tools_metadata_with_args_schema(temp_project_dir):
"""Test that extract_tools_metadata extracts run_params_schema from args_schema."""
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
from pydantic import BaseModel
class MyToolInput(BaseModel):
query: str
limit: int = 10
class MyTool(BaseTool):
name: str = "my_tool"
description: str = "A test tool"
args_schema: type[BaseModel] = MyToolInput
__all__ = ['MyTool']
""",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert len(metadata) == 1
assert metadata[0]["name"] == "MyTool"
run_params = metadata[0]["run_params_schema"]
assert "properties" in run_params
assert "query" in run_params["properties"]
assert "limit" in run_params["properties"]
def test_extract_tools_metadata_with_env_vars(temp_project_dir):
"""Test that extract_tools_metadata extracts env_vars."""
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
from crewai.tools.base_tool import EnvVar
class MyTool(BaseTool):
name: str = "my_tool"
description: str = "A test tool"
env_vars: list[EnvVar] = [
EnvVar(name="MY_API_KEY", description="API key for service", required=True),
EnvVar(name="MY_OPTIONAL_VAR", description="Optional var", required=False, default="default_value"),
]
__all__ = ['MyTool']
""",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert len(metadata) == 1
env_vars = metadata[0]["env_vars"]
assert len(env_vars) == 2
assert env_vars[0]["name"] == "MY_API_KEY"
assert env_vars[0]["description"] == "API key for service"
assert env_vars[0]["required"] is True
assert env_vars[1]["name"] == "MY_OPTIONAL_VAR"
assert env_vars[1]["required"] is False
assert env_vars[1]["default"] == "default_value"
def test_extract_tools_metadata_with_env_vars_field_default_factory(temp_project_dir):
"""Test that extract_tools_metadata extracts env_vars declared with Field(default_factory=...)."""
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
from crewai.tools.base_tool import EnvVar
from pydantic import Field
class MyTool(BaseTool):
name: str = "my_tool"
description: str = "A test tool"
env_vars: list[EnvVar] = Field(
default_factory=lambda: [
EnvVar(name="MY_TOOL_API", description="API token for my tool", required=True),
]
)
__all__ = ['MyTool']
""",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert len(metadata) == 1
env_vars = metadata[0]["env_vars"]
assert len(env_vars) == 1
assert env_vars[0]["name"] == "MY_TOOL_API"
assert env_vars[0]["description"] == "API token for my tool"
assert env_vars[0]["required"] is True
def test_extract_tools_metadata_with_custom_init_params(temp_project_dir):
"""Test that extract_tools_metadata extracts init_params_schema with custom params."""
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
class MyTool(BaseTool):
name: str = "my_tool"
description: str = "A test tool"
api_endpoint: str = "https://api.example.com"
timeout: int = 30
__all__ = ['MyTool']
""",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert len(metadata) == 1
init_params = metadata[0]["init_params_schema"]
assert "properties" in init_params
# Custom params should be included
assert "api_endpoint" in init_params["properties"]
assert "timeout" in init_params["properties"]
# Base params should be filtered out
assert "name" not in init_params["properties"]
assert "description" not in init_params["properties"]
def test_extract_tools_metadata_multiple_tools(temp_project_dir):
"""Test that extract_tools_metadata extracts metadata from multiple tools."""
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
class FirstTool(BaseTool):
name: str = "first_tool"
description: str = "First test tool"
class SecondTool(BaseTool):
name: str = "second_tool"
description: str = "Second test tool"
__all__ = ['FirstTool', 'SecondTool']
""",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert len(metadata) == 2
names = [m["name"] for m in metadata]
assert "FirstTool" in names
assert "SecondTool" in names
def test_extract_tools_metadata_multiple_init_files(temp_project_dir):
"""Test that extract_tools_metadata extracts metadata from multiple __init__.py files."""
# Create tool in root __init__.py
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
class RootTool(BaseTool):
name: str = "root_tool"
description: str = "Root tool"
__all__ = ['RootTool']
""",
)
# Create nested package with another tool
nested_dir = temp_project_dir / "nested"
nested_dir.mkdir()
create_init_file(
nested_dir,
"""from crewai.tools import BaseTool
class NestedTool(BaseTool):
name: str = "nested_tool"
description: str = "Nested tool"
__all__ = ['NestedTool']
""",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert len(metadata) == 2
names = [m["name"] for m in metadata]
assert "RootTool" in names
assert "NestedTool" in names
def test_extract_tools_metadata_ignores_non_tool_exports(temp_project_dir):
"""Test that extract_tools_metadata ignores non-BaseTool exports."""
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
class MyTool(BaseTool):
name: str = "my_tool"
description: str = "A test tool"
def not_a_tool():
pass
SOME_CONSTANT = "value"
__all__ = ['MyTool', 'not_a_tool', 'SOME_CONSTANT']
""",
)
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert len(metadata) == 1
assert metadata[0]["name"] == "MyTool"
def test_extract_tools_metadata_import_error_returns_empty(temp_project_dir):
"""Test that extract_tools_metadata returns empty list on import error."""
create_init_file(
temp_project_dir,
"""from nonexistent_module import something
class MyTool(BaseTool):
pass
__all__ = ['MyTool']
""",
)
# Should not raise, just return empty list
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert metadata == []
def test_extract_tools_metadata_syntax_error_returns_empty(temp_project_dir):
"""Test that extract_tools_metadata returns empty list on syntax error."""
create_init_file(
temp_project_dir,
"""from crewai.tools import BaseTool
class MyTool(BaseTool):
# Missing closing parenthesis
def __init__(self, name:
pass
__all__ = ['MyTool']
""",
)
# Should not raise, just return empty list
metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir))
assert metadata == []

View File

@@ -185,9 +185,14 @@ def test_publish_when_not_in_sync(mock_is_synced, capsys, tool_command):
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
@patch(
"crewai.cli.tools.main.extract_tools_metadata",
return_value=[{"name": "SampleTool", "humanized_name": "sample_tool", "description": "A sample tool", "run_params_schema": {}, "init_params_schema": {}, "env_vars": []}],
)
@patch("crewai.cli.tools.main.ToolCommand._print_current_organization")
def test_publish_when_not_in_sync_and_force(
mock_print_org,
mock_tools_metadata,
mock_available_exports,
mock_is_synced,
mock_publish,
@@ -222,6 +227,7 @@ def test_publish_when_not_in_sync_and_force(
description="A sample tool",
encoded_file=unittest.mock.ANY,
available_exports=[{"name": "SampleTool"}],
tools_metadata=[{"name": "SampleTool", "humanized_name": "sample_tool", "description": "A sample tool", "run_params_schema": {}, "init_params_schema": {}, "env_vars": []}],
)
mock_print_org.assert_called_once()
@@ -242,7 +248,12 @@ def test_publish_when_not_in_sync_and_force(
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
@patch(
"crewai.cli.tools.main.extract_tools_metadata",
return_value=[{"name": "SampleTool", "humanized_name": "sample_tool", "description": "A sample tool", "run_params_schema": {}, "init_params_schema": {}, "env_vars": []}],
)
def test_publish_success(
mock_tools_metadata,
mock_available_exports,
mock_is_synced,
mock_publish,
@@ -277,6 +288,7 @@ def test_publish_success(
description="A sample tool",
encoded_file=unittest.mock.ANY,
available_exports=[{"name": "SampleTool"}],
tools_metadata=[{"name": "SampleTool", "humanized_name": "sample_tool", "description": "A sample tool", "run_params_schema": {}, "init_params_schema": {}, "env_vars": []}],
)
@@ -295,7 +307,12 @@ def test_publish_success(
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
@patch(
"crewai.cli.tools.main.extract_tools_metadata",
return_value=[{"name": "SampleTool", "humanized_name": "sample_tool", "description": "A sample tool", "run_params_schema": {}, "init_params_schema": {}, "env_vars": []}],
)
def test_publish_failure(
mock_tools_metadata,
mock_available_exports,
mock_publish,
mock_open,
@@ -336,7 +353,12 @@ def test_publish_failure(
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
@patch(
"crewai.cli.tools.main.extract_tools_metadata",
return_value=[{"name": "SampleTool", "humanized_name": "sample_tool", "description": "A sample tool", "run_params_schema": {}, "init_params_schema": {}, "env_vars": []}],
)
def test_publish_api_error(
mock_tools_metadata,
mock_available_exports,
mock_publish,
mock_open,
@@ -362,6 +384,63 @@ def test_publish_api_error(
mock_publish.assert_called_once()
@patch("crewai.cli.tools.main.get_project_name", return_value="sample-tool")
@patch("crewai.cli.tools.main.get_project_version", return_value="1.0.0")
@patch("crewai.cli.tools.main.get_project_description", return_value="A sample tool")
@patch("crewai.cli.tools.main.subprocess.run")
@patch("crewai.cli.tools.main.os.listdir", return_value=["sample-tool-1.0.0.tar.gz"])
@patch(
"crewai.cli.tools.main.open",
new_callable=unittest.mock.mock_open,
read_data=b"sample tarball content",
)
@patch("crewai.cli.plus_api.PlusAPI.publish_tool")
@patch("crewai.cli.tools.main.git.Repository.is_synced", return_value=True)
@patch(
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
@patch(
"crewai.cli.tools.main.extract_tools_metadata",
side_effect=Exception("Failed to extract metadata"),
)
def test_publish_metadata_extraction_failure_continues_with_warning(
mock_tools_metadata,
mock_available_exports,
mock_is_synced,
mock_publish,
mock_open,
mock_listdir,
mock_subprocess_run,
mock_get_project_description,
mock_get_project_version,
mock_get_project_name,
capsys,
tool_command,
):
"""Test that metadata extraction failure shows warning but continues publishing."""
mock_publish_response = MagicMock()
mock_publish_response.status_code = 200
mock_publish_response.json.return_value = {"handle": "sample-tool"}
mock_publish.return_value = mock_publish_response
tool_command.publish(is_public=True)
output = capsys.readouterr().out
assert "Warning: Could not extract tool metadata" in output
assert "Publishing will continue without detailed metadata" in output
assert "No tool metadata extracted" in output
mock_publish.assert_called_once_with(
handle="sample-tool",
is_public=True,
version="1.0.0",
description="A sample tool",
encoded_file=unittest.mock.ANY,
available_exports=[{"name": "SampleTool"}],
tools_metadata=[],
)
@patch("crewai.cli.tools.main.Settings")
def test_print_current_organization_with_org(mock_settings, capsys, tool_command):
mock_settings_instance = MagicMock()

View File

@@ -1523,6 +1523,69 @@ def test_openai_stop_words_not_applied_to_structured_output():
assert "Observation:" in result.observation
def test_openai_gpt5_models_do_not_support_stop_words():
"""
Test that GPT-5 family models do not support stop words via the API.
GPT-5 models reject the 'stop' parameter, so stop words must be
applied client-side only.
"""
gpt5_models = [
"gpt-5",
"gpt-5-mini",
"gpt-5-nano",
"gpt-5-pro",
"gpt-5.1",
"gpt-5.1-chat",
"gpt-5.2",
"gpt-5.2-chat",
]
for model_name in gpt5_models:
llm = OpenAICompletion(model=model_name)
assert llm.supports_stop_words() == False, (
f"Expected {model_name} to NOT support stop words"
)
def test_openai_non_gpt5_models_support_stop_words():
"""
Test that non-GPT-5 models still support stop words normally.
"""
supported_models = [
"gpt-4o",
"gpt-4o-mini",
"gpt-4.1",
"gpt-4.1-mini",
"gpt-4-turbo",
]
for model_name in supported_models:
llm = OpenAICompletion(model=model_name)
assert llm.supports_stop_words() == True, (
f"Expected {model_name} to support stop words"
)
def test_openai_gpt5_still_applies_stop_words_client_side():
"""
Test that GPT-5 models still truncate responses at stop words client-side
via _apply_stop_words(), even though they don't send 'stop' to the API.
"""
llm = OpenAICompletion(
model="gpt-5.2",
stop=["Observation:", "Final Answer:"],
)
assert llm.supports_stop_words() == False
response = "I need to search.\n\nAction: search\nObservation: Found results"
result = llm._apply_stop_words(response)
assert "Observation:" not in result
assert "Found results" not in result
assert "I need to search" in result
def test_openai_stop_words_still_applied_to_regular_responses():
"""
Test that stop words ARE still applied for regular (non-structured) responses.

View File

@@ -0,0 +1,30 @@
"""Tests for MCPServerStdio allowed_commands config integration."""
import pytest
from crewai.mcp.config import MCPServerStdio
from crewai.mcp.transports.stdio import DEFAULT_ALLOWED_COMMANDS
class TestMCPServerStdioConfig:
"""Tests for the allowed_commands field on MCPServerStdio."""
def test_default_allowed_commands(self):
"""MCPServerStdio should default to DEFAULT_ALLOWED_COMMANDS."""
config = MCPServerStdio(command="python", args=["server.py"])
assert config.allowed_commands == DEFAULT_ALLOWED_COMMANDS
def test_custom_allowed_commands(self):
"""Users can override allowed_commands in config."""
custom = frozenset({"my-runtime"})
config = MCPServerStdio(
command="my-runtime", args=[], allowed_commands=custom
)
assert config.allowed_commands == custom
def test_none_allowed_commands(self):
"""Users can disable the allowlist via config."""
config = MCPServerStdio(
command="anything", args=[], allowed_commands=None
)
assert config.allowed_commands is None

View File

@@ -0,0 +1,93 @@
"""Tests for StdioTransport command allowlist validation."""
import pytest
from crewai.mcp.transports.stdio import DEFAULT_ALLOWED_COMMANDS, StdioTransport
class TestStdioTransportAllowlist:
"""Tests for the command allowlist feature."""
def test_default_allowed_commands_contains_common_runtimes(self):
"""DEFAULT_ALLOWED_COMMANDS should include all common MCP server runtimes."""
expected = {"python", "python3", "node", "npx", "uvx", "uv", "deno", "docker"}
assert expected == DEFAULT_ALLOWED_COMMANDS
def test_allowed_command_passes_validation(self):
"""Commands in the default allowlist should be accepted."""
for cmd in DEFAULT_ALLOWED_COMMANDS:
transport = StdioTransport(command=cmd, args=["server.py"])
assert transport.command == cmd
def test_allowed_command_with_full_path(self):
"""Full paths to allowed commands should pass (basename is checked)."""
transport = StdioTransport(command="/usr/bin/python3", args=["server.py"])
assert transport.command == "/usr/bin/python3"
def test_disallowed_command_raises_value_error(self):
"""Commands not in the allowlist should raise ValueError."""
with pytest.raises(ValueError, match="not in the allowed commands list"):
StdioTransport(command="malicious-binary", args=["--evil"])
def test_disallowed_command_with_full_path_raises(self):
"""Full paths to disallowed commands should also be rejected."""
with pytest.raises(ValueError, match="not in the allowed commands list"):
StdioTransport(command="/tmp/evil/script", args=[])
def test_allowed_commands_none_disables_validation(self):
"""Setting allowed_commands=None should disable the check entirely."""
transport = StdioTransport(
command="any-custom-binary",
args=["--flag"],
allowed_commands=None,
)
assert transport.command == "any-custom-binary"
def test_custom_allowlist(self):
"""Users should be able to pass a custom allowlist."""
custom = frozenset({"my-server", "python"})
# Allowed
transport = StdioTransport(
command="my-server", args=[], allowed_commands=custom
)
assert transport.command == "my-server"
# Not allowed
with pytest.raises(ValueError, match="not in the allowed commands list"):
StdioTransport(command="node", args=[], allowed_commands=custom)
def test_extended_allowlist(self):
"""Users should be able to extend the default allowlist."""
extended = DEFAULT_ALLOWED_COMMANDS | frozenset({"my-custom-runtime"})
transport = StdioTransport(
command="my-custom-runtime", args=[], allowed_commands=extended
)
assert transport.command == "my-custom-runtime"
# Original defaults still work
transport2 = StdioTransport(
command="python", args=["server.py"], allowed_commands=extended
)
assert transport2.command == "python"
def test_error_message_includes_sorted_allowed_commands(self):
"""The error message should list the allowed commands for discoverability."""
with pytest.raises(ValueError) as exc_info:
StdioTransport(command="bad-cmd", args=[])
error_msg = str(exc_info.value)
assert "bad-cmd" in error_msg
assert "allowed_commands=None" in error_msg
def test_args_and_env_still_work(self):
"""Existing args and env functionality should be unaffected."""
transport = StdioTransport(
command="python",
args=["server.py", "--port", "8080"],
env={"API_KEY": "test123"},
)
assert transport.command == "python"
assert transport.args == ["server.py", "--port", "8080"]
assert transport.env == {"API_KEY": "test123"}

View File

@@ -682,6 +682,126 @@ def test_llm_call_when_stop_is_unsupported_when_additional_drop_params_is_provid
assert "Paris" in result
@pytest.mark.vcr()
def test_litellm_gpt5_call_succeeds_without_stop_error():
"""
Integration test: GPT-5 call succeeds when stop words are configured,
because stop is omitted from API params and applied client-side.
"""
llm = LLM(model="gpt-5", stop=["Observation:"], is_litellm=True)
result = llm.call("What is the capital of France?")
assert isinstance(result, str)
assert len(result) > 0
def test_litellm_gpt5_does_not_send_stop_in_params():
"""
Test that the LiteLLM fallback path does not include 'stop' in API params
for GPT-5.x models, since they reject it at the API level.
"""
llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
params = llm._prepare_completion_params(
messages=[{"role": "user", "content": "Hello"}]
)
assert params.get("stop") is None, (
"GPT-5.x models should not have 'stop' in API params"
)
def test_litellm_non_gpt5_sends_stop_in_params():
"""
Test that the LiteLLM fallback path still includes 'stop' in API params
for models that support it.
"""
llm = LLM(model="gpt-4o", stop=["Observation:"], is_litellm=True)
params = llm._prepare_completion_params(
messages=[{"role": "user", "content": "Hello"}]
)
assert params.get("stop") == ["Observation:"], (
"Non-GPT-5 models should have 'stop' in API params"
)
def test_litellm_retry_catches_litellm_unsupported_params_error(caplog):
"""
Test that the retry logic catches LiteLLM's UnsupportedParamsError format
("does not support parameters") in addition to the OpenAI API format.
"""
llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
litellm_error = Exception(
"litellm.UnsupportedParamsError: openai does not support parameters: "
"['stop'], for model=openai/gpt-5.2."
)
call_count = 0
try:
import litellm
except ImportError:
pytest.skip("litellm is not installed; skipping LiteLLM retry test")
def mock_completion(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
raise litellm_error
return MagicMock(
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
usage=MagicMock(
prompt_tokens=10,
completion_tokens=5,
total_tokens=15,
),
)
with patch("litellm.completion", side_effect=mock_completion):
with caplog.at_level(logging.INFO):
result = llm.call("What is the capital of France?")
assert "Retrying LLM call without the unsupported 'stop'" in caplog.text
assert "stop" in llm.additional_params.get("additional_drop_params", [])
def test_litellm_retry_catches_openai_api_stop_error(caplog):
"""
Test that the retry logic still catches the OpenAI API error format
("Unsupported parameter: 'stop'").
"""
llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
api_error = Exception(
"Unsupported parameter: 'stop' is not supported with this model."
)
call_count = 0
def mock_completion(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
raise api_error
return MagicMock(
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
usage=MagicMock(
prompt_tokens=10,
completion_tokens=5,
total_tokens=15,
),
)
with patch("litellm.completion", side_effect=mock_completion):
with caplog.at_level(logging.INFO):
llm.call("What is the capital of France?")
assert "Retrying LLM call without the unsupported 'stop'" in caplog.text
assert "stop" in llm.additional_params.get("additional_drop_params", [])
@pytest.fixture
def ollama_llm():
return LLM(model="ollama/llama3.2:3b", is_litellm=True)