diff --git a/lib/crewai/src/crewai/project/crew_loader.py b/lib/crewai/src/crewai/project/crew_loader.py index f4ee0d077..1afa1e828 100644 --- a/lib/crewai/src/crewai/project/crew_loader.py +++ b/lib/crewai/src/crewai/project/crew_loader.py @@ -119,7 +119,11 @@ def _load_crew_project( for index, task_defn in enumerate(project.task_definitions): task_source = f"{source_label}: tasks[{index}]" - task_class = _task_class_from_definition(task_defn, f"{task_source}: type") + task_class = _task_class_from_definition( + task_defn, + f"{task_source}: type", + project_root=project_root, + ) task_kwargs = _task_kwargs_from_definition( task_defn, agents_map=agents_map, @@ -147,6 +151,7 @@ def _load_crew_project( tasks=tasks_list, agents_map=agents_map, source=source_label, + project_root=project_root, ) try: diff --git a/lib/crewai/src/crewai/project/json_loader.py b/lib/crewai/src/crewai/project/json_loader.py index ab13d881e..107eb8c0c 100644 --- a/lib/crewai/src/crewai/project/json_loader.py +++ b/lib/crewai/src/crewai/project/json_loader.py @@ -4,11 +4,15 @@ from __future__ import annotations from collections.abc import Callable from dataclasses import dataclass +import importlib +import inspect import json import logging -from pathlib import Path +from pathlib import Path, PureWindowsPath import re +import sys from typing import Any, cast +from urllib.parse import unquote, urlparse from pydantic import BaseModel, ValidationError @@ -93,6 +97,9 @@ _CONDITIONAL_TASK_TYPE_ALIASES = { "crewai.tasks.conditional_task.ConditionalTask", } _URI_RE = re.compile(r"^[A-Za-z][A-Za-z0-9+.-]*:") +_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:") +_WINDOWS_UNC_PATH_RE = re.compile(r"^(?:\\\\|//)[^\\/]+[\\/][^\\/]+(?:[\\/]|$)") +_MAX_PYTHON_REF_DEPTH = 64 _AGENT_CALLABLE_FIELDS = {"guardrail", "step_callback"} _AGENT_CALLABLE_LIST_FIELDS = {"callbacks"} @@ -204,8 +211,12 @@ def load_agent(source: str | Path) -> Any: """Load an existing ``Agent`` from a ``.json`` / ``.jsonc`` definition file.""" path = Path(source) defn = _expect_object(load_jsonc_file(path), path) - root = path.parent.parent if path.parent.name == "agents" else Path.cwd() - agent_class = _agent_class_from_definition(defn, f"{path}: type") + root = path.parent.parent if path.parent.name == "agents" else path.parent + agent_class = _agent_class_from_definition( + defn, + f"{path}: type", + project_root=root, + ) agent_kwargs = _agent_kwargs_from_definition( defn, path, @@ -343,6 +354,7 @@ def _load_json_crew_project_definition( agent_defn, f"{agent_source}: type", resolve_python_refs=not collect_errors, + project_root=project_root, ) agent_kwargs = _agent_kwargs_from_definition( agent_defn, @@ -399,6 +411,7 @@ def _load_json_crew_project_definition( task_defn, task_path, resolve_python_refs=not collect_errors, + project_root=project_root, ) ) missing_required = [ @@ -567,11 +580,17 @@ def _python_ref_errors(value: Any, source: str | Path) -> list[str]: path = value.get(PYTHON_REF_KEY) if not isinstance(path, str) or not path.strip(): return [f"{source}: Python reference '{PYTHON_REF_KEY}' must be a string"] + path = path.strip() if "." not in path: return [ f"{source}: Python reference '{path}' must be a dotted import path " "like 'module.attribute'" ] + if not all(part.isidentifier() for part in path.split(".")): + return [ + f"{source}: Python reference '{path}' must contain only valid " + "Python identifiers separated by dots" + ] return [] @@ -588,17 +607,37 @@ def _resolve_python_ref( source: str | Path, *, expected: str, + project_root: Path | None, ) -> Any: - from crewai.utilities.import_utils import import_and_validate_definition - path = _python_ref_path(value, source) try: - resolved = import_and_validate_definition(path) + resolved = _import_project_python_reference(path, source, project_root) + except JSONProjectError: + raise except Exception as exc: - raise JSONProjectError(f"{source}: failed to import '{path}': {exc}") from exc + logger.debug( + "Failed to resolve JSON Python reference %r from %s", + path, + source, + exc_info=True, + ) + raise JSONProjectError( + f"{source}: failed to import Python reference '{path}'" + ) from exc if expected == "any": return resolved + if expected == "object": + if ( + isinstance(resolved, type) + or inspect.ismodule(resolved) + or inspect.isroutine(resolved) + ): + raise JSONProjectError( + f"{source}: Python reference '{path}' is not a supported object " + "reference" + ) + return resolved if expected == "callable" and not callable(resolved): raise JSONProjectError(f"{source}: Python reference '{path}' is not callable") if expected == "class" and not isinstance(resolved, type): @@ -606,13 +645,172 @@ def _resolve_python_ref( return resolved +def _import_project_python_reference( + path: str, + source: str | Path, + project_root: Path | None, +) -> Any: + module_path, _, attr = path.rpartition(".") + root = _project_root(project_root) + _project_module_file(module_path, root, source) + displaced_modules = _evict_external_cached_modules(module_path, root) + + logger.info( + "Resolving JSON Python reference '%s' for %s from project root %s", + path, + source, + root, + ) + + inserted_sys_path = False + root_str = str(root) + if not sys.path or sys.path[0] != root_str: + sys.path.insert(0, root_str) + inserted_sys_path = True + + try: + try: + module = importlib.import_module(module_path) + except Exception as exc: + logger.debug( + "Failed to import JSON Python reference module %r from %s", + module_path, + source, + exc_info=True, + ) + raise JSONProjectError( + f"{source}: failed to import Python reference '{path}'" + ) from exc + + if not _module_is_project_local(module, root): + raise JSONProjectError( + f"{source}: Python reference '{path}' resolved outside project root" + ) + if not hasattr(module, attr): + raise JSONProjectError( + f"{source}: Python reference '{path}' could not be resolved" + ) + return getattr(module, attr) + finally: + if inserted_sys_path: + try: + sys.path.remove(root_str) + except ValueError: + logger.debug( + "Project root %s was already removed from sys.path while " + "resolving JSON Python reference %r", + root, + path, + ) + _restore_external_cached_modules(displaced_modules, root) + + +def _project_root(project_root: Path | None) -> Path: + return (project_root or Path.cwd()).resolve() + + +def _project_module_file( + module_path: str, + project_root: Path, + source: str | Path, +) -> Path: + module_rel = Path(*module_path.split(".")) + candidates = [ + project_root / module_rel.with_suffix(".py"), + project_root / module_rel / "__init__.py", + ] + for candidate in candidates: + resolved = candidate.resolve() + if resolved.is_file() and _is_relative_to(resolved, project_root): + return resolved + raise JSONProjectError( + f"{source}: Python references in JSON configs must point to modules inside the " + f"project root; '{module_path}' was not found under {project_root}" + ) + + +def _evict_external_cached_modules( + module_path: str, + project_root: Path, +) -> dict[str, Any]: + displaced_modules: dict[str, Any] = {} + parts = module_path.split(".") + for index in range(len(parts), 0, -1): + prefix = ".".join(parts[:index]) + module = sys.modules.get(prefix) + if module is None or _module_is_project_local(module, project_root): + continue + displaced_modules[prefix] = module + logger.debug( + "Evicting cached module %r before resolving JSON Python reference " + "from project root %s", + prefix, + project_root, + ) + sys.modules.pop(prefix, None) + return displaced_modules + + +def _restore_external_cached_modules( + displaced_modules: dict[str, Any], + project_root: Path, +) -> None: + if not displaced_modules: + return + + displaced_prefixes = tuple(displaced_modules) + for name, module in list(sys.modules.items()): + if not any( + name == prefix or name.startswith(f"{prefix}.") + for prefix in displaced_prefixes + ): + continue + if _module_is_project_local(module, project_root): + logger.debug( + "Removing project-local module %r before restoring cached module", + name, + ) + sys.modules.pop(name, None) + + for name in sorted(displaced_modules, key=lambda value: value.count(".")): + logger.debug( + "Restoring cached module %r after JSON Python reference import", + name, + ) + sys.modules[name] = displaced_modules[name] + + +def _module_is_project_local(module: Any, project_root: Path) -> bool: + module_file = getattr(module, "__file__", None) + if module_file and _is_relative_to(Path(module_file).resolve(), project_root): + return True + module_paths = getattr(module, "__path__", None) + if module_paths is not None: + return any( + _is_relative_to(Path(path).resolve(), project_root) for path in module_paths + ) + return False + + +def _is_relative_to(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + except ValueError: + return False + return True + + def _resolve_python_class( value: Any, source: str | Path, *, base_class: type[Any] | None = None, + project_root: Path | None, ) -> type[Any]: - cls = cast(type[Any], _resolve_python_ref(value, source, expected="class")) + cls = cast( + type[Any], + _resolve_python_ref(value, source, expected="class", project_root=project_root), + ) if base_class is not None and not issubclass(cls, base_class): raise JSONProjectError( f"{source}: Python reference '{_python_ref_path(value, source)}' " @@ -626,6 +824,7 @@ def _agent_class_from_definition( source: str | Path, *, resolve_python_refs: bool = True, + project_root: Path | None = None, ) -> type[Any]: from crewai import Agent @@ -643,7 +842,12 @@ def _agent_class_from_definition( return agent_class from crewai.agents.agent_builder.base_agent import BaseAgent - return _resolve_python_class(type_value, source, base_class=BaseAgent) + return _resolve_python_class( + type_value, + source, + base_class=BaseAgent, + project_root=project_root, + ) if isinstance(type_value, str): raise JSONProjectError( f"{source}: unsupported agent type '{type_value}'. Use 'Agent' or " @@ -657,6 +861,7 @@ def _task_class_from_definition( source: str | Path, *, resolve_python_refs: bool = True, + project_root: Path | None = None, ) -> type[Any]: from crewai import Task @@ -676,7 +881,12 @@ def _task_class_from_definition( if errors: raise JSONProjectValidationError(errors) return task_class - return _resolve_python_class(type_value, source, base_class=task_class) + return _resolve_python_class( + type_value, + source, + base_class=task_class, + project_root=project_root, + ) if isinstance(type_value, str): raise JSONProjectError( f"{source}: unsupported task type '{type_value}'. Use 'Task', " @@ -713,6 +923,7 @@ def _agent_kwargs_from_definition( defn, f"{path}: type", resolve_python_refs=resolve_python_refs, + project_root=project_root, ) allowed_fields = _agent_allowed_fields(agent_class) extra_allowed = {"settings", "type"} @@ -757,8 +968,8 @@ def _agent_kwargs_from_definition( agent_kwargs = {key: value for key, value in defn.items() if key in allowed_fields} agent_kwargs.update(settings) if resolve_tools: - _resolve_tool_fields(agent_kwargs, project_root=project_root) - _resolve_agent_python_refs(agent_kwargs, path) + _resolve_tool_fields(agent_kwargs, path, project_root=project_root) + _resolve_agent_python_refs(agent_kwargs, path, project_root) else: # Validation/deploy mode: check tool declarations structurally without # importing or instantiating anything — custom: tools execute @@ -778,7 +989,11 @@ def _task_kwargs_from_definition( source: str, project_root: Path | None = None, ) -> dict[str, Any]: - task_class = _task_class_from_definition(task_defn, f"{source}: type") + task_class = _task_class_from_definition( + task_defn, + f"{source}: type", + project_root=project_root, + ) allowed_fields = _task_allowed_fields(task_class) errors = _field_errors( task_defn, @@ -815,8 +1030,8 @@ def _task_kwargs_from_definition( context_tasks.append(task_name_map[ctx_name]) task_kwargs["context"] = context_tasks - _resolve_tool_fields(task_kwargs, project_root=project_root) - _resolve_task_python_refs(task_kwargs, source) + _resolve_tool_fields(task_kwargs, source, project_root=project_root) + _resolve_task_python_refs(task_kwargs, source, project_root) if "input_files" in task_kwargs: task_kwargs["input_files"] = _normalize_input_files( task_kwargs["input_files"], @@ -832,6 +1047,7 @@ def _crew_kwargs_from_definition( tasks: list[Any], agents_map: dict[str, Any], source: Path | str, + project_root: Path | None = None, ) -> dict[str, Any]: errors = _field_errors( defn, @@ -857,19 +1073,25 @@ def _crew_kwargs_from_definition( ) crew_kwargs["manager_agent"] = agents_map[manager_agent] - _resolve_crew_python_refs(crew_kwargs, source) + _resolve_crew_python_refs(crew_kwargs, source, project_root) return crew_kwargs def _resolve_tool_fields( - kwargs: dict[str, Any], project_root: Path | None = None + kwargs: dict[str, Any], + source: str | Path, + project_root: Path | None = None, ) -> None: tools = kwargs.get("tools") if tools is not None: kwargs["tools"] = _resolve_tools(tools, project_root=project_root) if "mcps" in kwargs: - kwargs["mcps"] = _resolve_mcp_python_refs(kwargs["mcps"]) + kwargs["mcps"] = _resolve_mcp_python_refs( + kwargs["mcps"], + f"{source}: mcps", + project_root, + ) def _field_errors( @@ -924,6 +1146,7 @@ def _task_definition_errors( source: str | Path, *, resolve_python_refs: bool, + project_root: Path | None, ) -> list[str]: skip_unknown = _definition_has_python_type(task_defn) and not resolve_python_refs try: @@ -931,6 +1154,7 @@ def _task_definition_errors( task_defn, f"{source}: type", resolve_python_refs=resolve_python_refs, + project_root=project_root, ) except JSONProjectValidationError as exc: return exc.errors @@ -946,6 +1170,13 @@ def _task_definition_errors( skip_unknown=skip_unknown, ) errors.extend(_python_reference_definition_errors(task_defn, source)) + errors.extend( + _input_files_definition_errors( + task_defn.get("input_files"), + f"{source}: input_files", + project_root, + ) + ) return errors @@ -1001,20 +1232,33 @@ def _python_reference_value_errors(value: Any, source: str | Path) -> list[str]: def _python_reference_value_errors_recursive( - value: Any, source: str | Path + value: Any, source: str | Path, depth: int = 0 ) -> list[str]: + if depth > _MAX_PYTHON_REF_DEPTH: + return [ + f"{source}: Python reference nesting exceeds maximum depth " + f"{_MAX_PYTHON_REF_DEPTH}" + ] if _is_python_ref(value): return _python_ref_errors(value, source) errors: list[str] = [] if isinstance(value, list): for index, item in enumerate(value): errors.extend( - _python_reference_value_errors_recursive(item, f"{source}[{index}]") + _python_reference_value_errors_recursive( + item, + f"{source}[{index}]", + depth + 1, + ) ) elif isinstance(value, dict): for key, item in value.items(): errors.extend( - _python_reference_value_errors_recursive(item, f"{source}.{key}") + _python_reference_value_errors_recursive( + item, + f"{source}.{key}", + depth + 1, + ) ) return errors @@ -1069,34 +1313,54 @@ def _mcp_python_ref_errors(value: Any, source: str | Path) -> list[str]: return errors -def _resolve_agent_python_refs(kwargs: dict[str, Any], source: str | Path) -> None: +def _resolve_agent_python_refs( + kwargs: dict[str, Any], + source: str | Path, + project_root: Path | None, +) -> None: _resolve_callable_fields( kwargs, source, scalar_fields=_AGENT_CALLABLE_FIELDS, list_fields=_AGENT_CALLABLE_LIST_FIELDS, + project_root=project_root, ) if _is_python_ref(kwargs.get("executor_class")): kwargs["executor_class"] = _resolve_python_class( - kwargs["executor_class"], f"{source}: executor_class" + kwargs["executor_class"], + f"{source}: executor_class", + project_root=project_root, ) if "embedder" in kwargs: - kwargs["embedder"] = _resolve_embedder_python_refs(kwargs["embedder"], source) + kwargs["embedder"] = _resolve_embedder_python_refs( + kwargs["embedder"], source, project_root + ) if "a2a" in kwargs: - kwargs["a2a"] = _resolve_a2a_python_refs(kwargs["a2a"], source) - _resolve_object_reference_fields(kwargs, source, _AGENT_OBJECT_REF_FIELDS) + kwargs["a2a"] = _resolve_a2a_python_refs(kwargs["a2a"], source, project_root) + _resolve_object_reference_fields( + kwargs, source, _AGENT_OBJECT_REF_FIELDS, project_root + ) -def _resolve_task_python_refs(kwargs: dict[str, Any], source: str | Path) -> None: +def _resolve_task_python_refs( + kwargs: dict[str, Any], + source: str | Path, + project_root: Path | None, +) -> None: _resolve_callable_fields( kwargs, source, scalar_fields=_TASK_CALLABLE_FIELDS, list_fields=_TASK_CALLABLE_LIST_FIELDS, + project_root=project_root, ) for field in _TASK_MODEL_CLASS_FIELDS: if _is_python_ref(kwargs.get(field)): - kwargs[field] = _resolve_model_class(kwargs[field], f"{source}: {field}") + kwargs[field] = _resolve_model_class( + kwargs[field], + f"{source}: {field}", + project_root, + ) if _is_python_ref(kwargs.get("converter_cls")): from crewai.utilities.converter import Converter @@ -1104,51 +1368,93 @@ def _resolve_task_python_refs(kwargs: dict[str, Any], source: str | Path) -> Non kwargs["converter_cls"], f"{source}: converter_cls", base_class=Converter, + project_root=project_root, ) elif isinstance(kwargs.get("converter_cls"), str): raise JSONProjectError( f"{source}: converter_cls must use " f'{{"{PYTHON_REF_KEY}": "module.ConverterSubclass"}}' ) - _resolve_object_reference_fields(kwargs, source, _TASK_OBJECT_REF_FIELDS) + _resolve_object_reference_fields( + kwargs, source, _TASK_OBJECT_REF_FIELDS, project_root + ) -def _resolve_crew_python_refs(kwargs: dict[str, Any], source: str | Path) -> None: +def _resolve_crew_python_refs( + kwargs: dict[str, Any], + source: str | Path, + project_root: Path | None, +) -> None: _resolve_callable_fields( kwargs, source, scalar_fields=_CREW_CALLABLE_FIELDS, list_fields=_CREW_CALLABLE_LIST_FIELDS, + project_root=project_root, ) if "embedder" in kwargs: - kwargs["embedder"] = _resolve_embedder_python_refs(kwargs["embedder"], source) - _resolve_object_reference_fields(kwargs, source, _CREW_OBJECT_REF_FIELDS) + kwargs["embedder"] = _resolve_embedder_python_refs( + kwargs["embedder"], source, project_root + ) + _resolve_object_reference_fields( + kwargs, source, _CREW_OBJECT_REF_FIELDS, project_root + ) def _resolve_object_reference_fields( kwargs: dict[str, Any], source: str | Path, fields: set[str], + project_root: Path | None, ) -> None: for field in fields: if field not in kwargs: continue kwargs[field] = _resolve_python_refs_recursively( - kwargs[field], f"{source}: {field}" + kwargs[field], + f"{source}: {field}", + project_root, ) -def _resolve_python_refs_recursively(value: Any, source: str | Path) -> Any: +def _resolve_python_refs_recursively( + value: Any, + source: str | Path, + project_root: Path | None, + depth: int = 0, +) -> Any: + if depth > _MAX_PYTHON_REF_DEPTH: + raise JSONProjectValidationError( + [ + f"{source}: Python reference nesting exceeds maximum depth " + f"{_MAX_PYTHON_REF_DEPTH}" + ] + ) if _is_python_ref(value): - return _resolve_python_ref(value, source, expected="any") + return _resolve_python_ref( + value, + source, + expected="object", + project_root=project_root, + ) if isinstance(value, list): return [ - _resolve_python_refs_recursively(item, f"{source}[{index}]") + _resolve_python_refs_recursively( + item, + f"{source}[{index}]", + project_root, + depth + 1, + ) for index, item in enumerate(value) ] if isinstance(value, dict): return { - key: _resolve_python_refs_recursively(item, f"{source}.{key}") + key: _resolve_python_refs_recursively( + item, + f"{source}.{key}", + project_root, + depth + 1, + ) for key, item in value.items() } return value @@ -1160,6 +1466,7 @@ def _resolve_callable_fields( *, scalar_fields: set[str], list_fields: set[str], + project_root: Path | None, ) -> None: for field in scalar_fields: if _is_python_ref(kwargs.get(field)): @@ -1167,6 +1474,7 @@ def _resolve_callable_fields( kwargs[field], f"{source}: {field}", expected="callable", + project_root=project_root, ) for field in list_fields: value = kwargs.get(field) @@ -1174,7 +1482,10 @@ def _resolve_callable_fields( continue kwargs[field] = [ _resolve_python_ref( - item, f"{source}: {field}[{index}]", expected="callable" + item, + f"{source}: {field}[{index}]", + expected="callable", + project_root=project_root, ) if _is_python_ref(item) else item @@ -1182,11 +1493,24 @@ def _resolve_callable_fields( ] -def _resolve_model_class(value: Any, source: str | Path) -> type[BaseModel]: - return _resolve_python_class(value, source, base_class=BaseModel) +def _resolve_model_class( + value: Any, + source: str | Path, + project_root: Path | None, +) -> type[BaseModel]: + return _resolve_python_class( + value, + source, + base_class=BaseModel, + project_root=project_root, + ) -def _resolve_embedder_python_refs(value: Any, source: str | Path) -> Any: +def _resolve_embedder_python_refs( + value: Any, + source: str | Path, + project_root: Path | None, +) -> Any: if not isinstance(value, dict): return value config = value.get("config") @@ -1206,15 +1530,24 @@ def _resolve_embedder_python_refs(value: Any, source: str | Path) -> Any: embedding_callable, f"{source}: embedder.config.embedding_callable", base_class=CustomEmbeddingFunction, + project_root=project_root, ) normalized["config"] = normalized_config return normalized -def _resolve_a2a_python_refs(value: Any, source: str | Path) -> Any: +def _resolve_a2a_python_refs( + value: Any, + source: str | Path, + project_root: Path | None, +) -> Any: if isinstance(value, list): return [ - _resolve_a2a_python_refs(item, f"{source}: a2a[{index}]") + _resolve_a2a_python_refs( + item, + f"{source}: a2a[{index}]", + project_root, + ) for index, item in enumerate(value) ] if not isinstance(value, dict): @@ -1228,6 +1561,7 @@ def _resolve_a2a_python_refs(value: Any, source: str | Path) -> Any: normalized["response_model"] = _resolve_model_class( response_model, f"{source}: a2a.response_model", + project_root, ) elif isinstance(response_model, dict): from crewai.utilities.pydantic_schema_utils import create_model_from_schema @@ -1236,11 +1570,19 @@ def _resolve_a2a_python_refs(value: Any, source: str | Path) -> Any: return normalized -def _resolve_mcp_python_refs(value: Any) -> Any: +def _resolve_mcp_python_refs( + value: Any, + source: str | Path, + project_root: Path | None, +) -> Any: if not isinstance(value, list): return value return [ - _resolve_mcp_config_python_refs(config, index) + _resolve_mcp_config_python_refs( + config, + f"{source}[{index}]", + project_root, + ) if isinstance(config, dict) else config for index, config in enumerate(value) @@ -1248,7 +1590,9 @@ def _resolve_mcp_python_refs(value: Any) -> Any: def _resolve_mcp_config_python_refs( - config: dict[str, Any], index: int + config: dict[str, Any], + source: str | Path, + project_root: Path | None, ) -> dict[str, Any]: tool_filter = config.get("tool_filter") if tool_filter is None: @@ -1257,8 +1601,9 @@ def _resolve_mcp_config_python_refs( if _is_python_ref(tool_filter): normalized["tool_filter"] = _resolve_python_ref( tool_filter, - f"mcps[{index}].tool_filter", + f"{source}.tool_filter", expected="callable", + project_root=project_root, ) elif isinstance(tool_filter, dict) and tool_filter.get("type") == "static": from crewai.mcp.filters import create_static_tool_filter @@ -1267,15 +1612,11 @@ def _resolve_mcp_config_python_refs( blocked_tool_names = tool_filter.get("blocked_tool_names") if allowed_tool_names is not None and not _is_string_list(allowed_tool_names): raise JSONProjectValidationError( - [ - f"mcps[{index}].tool_filter.allowed_tool_names must be a list of strings" - ] + [f"{source}.tool_filter.allowed_tool_names must be a list of strings"] ) if blocked_tool_names is not None and not _is_string_list(blocked_tool_names): raise JSONProjectValidationError( - [ - f"mcps[{index}].tool_filter.blocked_tool_names must be a list of strings" - ] + [f"{source}.tool_filter.blocked_tool_names must be a list of strings"] ) normalized["tool_filter"] = create_static_tool_filter( allowed_tool_names=allowed_tool_names, @@ -1304,7 +1645,11 @@ def _normalize_input_files( for name, file_spec in value.items(): if isinstance(file_spec, str): normalized[name] = { - "source": _resolve_project_path(file_spec, project_root) + "source": _resolve_project_path( + file_spec, + project_root, + f"{source}: input_files.{name}", + ) } continue if isinstance(file_spec, dict): @@ -1313,7 +1658,9 @@ def _normalize_input_files( field_value = normalized_spec.get(field) if isinstance(field_value, str): normalized_spec[field] = _resolve_project_path( - field_value, project_root + field_value, + project_root, + f"{source}: input_files.{name}.{field}", ) normalized[name] = normalized_spec continue @@ -1321,13 +1668,89 @@ def _normalize_input_files( return normalized -def _resolve_project_path(value: str, project_root: Path | None) -> str: - if not value or _URI_RE.match(value): +def _input_files_definition_errors( + value: Any, + source: str | Path, + project_root: Path | None, +) -> list[str]: + if value is None: + return [] + if not isinstance(value, dict): + return [f"{source} must be an object mapping names to file specs"] + + errors: list[str] = [] + for name, file_spec in value.items(): + if isinstance(file_spec, str): + try: + _resolve_project_path(file_spec, project_root, f"{source}.{name}") + except JSONProjectValidationError as exc: + errors.extend(exc.errors) + continue + if isinstance(file_spec, dict): + for field in ("source", "path"): + field_value = file_spec.get(field) + if not isinstance(field_value, str): + continue + try: + _resolve_project_path( + field_value, + project_root, + f"{source}.{name}.{field}", + ) + except JSONProjectValidationError as exc: + errors.extend(exc.errors) + return errors + + +def _resolve_project_path( + value: str, + project_root: Path | None, + source: str | Path, +) -> str: + if not value: return value - path = Path(value) - if path.is_absolute(): + root = _project_root(project_root) + parsed = urlparse(value) + path_value = value + if ( + parsed.scheme + and parsed.scheme.lower() != "file" + and not _WINDOWS_DRIVE_PATH_RE.match(value) + ): return value - return str(((project_root or Path.cwd()) / path).resolve()) + if parsed.scheme.lower() == "file": + if parsed.netloc not in {"", "localhost"}: + raise JSONProjectValidationError( + [f"{source}: file URI '{value}' must point to a local project path"] + ) + path_value = unquote(parsed.path) + if re.match(r"^/[A-Za-z]:", path_value): + path_value = path_value[1:] + path = Path(path_value) + elif _URI_RE.match(value): + path = Path(path_value) + else: + path = Path(path_value) + if ( + _looks_like_windows_absolute_path(path_value) + or _WINDOWS_DRIVE_PATH_RE.match(path_value) + ) and not path.is_absolute(): + raise JSONProjectValidationError( + [f"{source}: path '{value}' resolves outside the project root {root}"] + ) + resolved = path.resolve() if path.is_absolute() else (root / path).resolve() + if not _is_relative_to(resolved, root): + raise JSONProjectValidationError( + [f"{source}: path '{value}' resolves outside the project root {root}"] + ) + return str(resolved) + + +def _looks_like_windows_absolute_path(value: str) -> bool: + if _WINDOWS_UNC_PATH_RE.match(value): + return True + windows_path = PureWindowsPath(value) + return windows_path.is_absolute() def _format_validation_error(path: str | Path, exc: ValidationError) -> str: diff --git a/lib/crewai/tests/project/test_crew_loader.py b/lib/crewai/tests/project/test_crew_loader.py index 367bdbd30..f8cb806ad 100644 --- a/lib/crewai/tests/project/test_crew_loader.py +++ b/lib/crewai/tests/project/test_crew_loader.py @@ -4,6 +4,8 @@ from __future__ import annotations import json from pathlib import Path +import sys +import types import pytest @@ -560,6 +562,98 @@ class TestLoadCrew: assert "summary" in task.output_json.model_fields assert task.converter_cls.__name__ == "SpecialConverter" + def test_crew_rejects_stdlib_python_ref_for_agent_callback( + self, tmp_path: Path + ): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent( + agents_dir, + "worker", + step_callback={"python": "os.system"}, + ) + + crew_def = { + "name": "unsafe_callback_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Do work", + "expected_output": "Work done", + "agent": "worker", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectError, match="project root"): + load_crew(crew_file) + + def test_crew_rejects_stdlib_python_ref_for_mcp_tool_filter( + self, tmp_path: Path + ): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent( + agents_dir, + "worker", + mcps=[ + { + "command": "python", + "args": ["server.py"], + "tool_filter": {"python": "os.system"}, + } + ], + ) + + crew_def = { + "name": "unsafe_mcp_filter_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Do work", + "expected_output": "Work done", + "agent": "worker", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectError, match="project root"): + load_crew(crew_file) + + def test_crew_rejects_callable_python_ref_for_object_field( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + _write_python_defs(tmp_path) + monkeypatch.syspath_prepend(str(tmp_path)) + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent( + agents_dir, + "worker", + security_config={"python": "json_refs.always_true"}, + ) + + crew_def = { + "name": "unsafe_object_ref_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Do work", + "expected_output": "Work done", + "agent": "worker", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectError, match="supported object reference"): + load_crew(crew_file) + def test_crew_loads_project_relative_input_files(self, tmp_path: Path): agents_dir = tmp_path / "agents" agents_dir.mkdir() @@ -595,6 +689,147 @@ class TestLoadCrew: assert _input_file_path(input_files["brief"]) == brief_path assert _input_file_path(input_files["spec"]) == spec_path + def test_crew_rejects_relative_input_file_outside_project(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "reader") + + crew_def = { + "name": "unsafe_input_files_crew", + "agents": ["reader"], + "tasks": [ + { + "name": "read", + "description": "Read files", + "expected_output": "File summary", + "agent": "reader", + "input_files": {"secret": "../secret.txt"}, + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectValidationError, match="outside the project root"): + load_crew(crew_file) + + def test_crew_rejects_absolute_input_file_outside_project(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "reader") + outside_path = tmp_path.parent / "secret.txt" + + crew_def = { + "name": "unsafe_absolute_input_files_crew", + "agents": ["reader"], + "tasks": [ + { + "name": "read", + "description": "Read files", + "expected_output": "File summary", + "agent": "reader", + "input_files": {"secret": str(outside_path)}, + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectValidationError, match="outside the project root"): + load_crew(crew_file) + + def test_crew_rejects_file_uri_input_file_outside_project(self, tmp_path: Path): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "reader") + outside_uri = (tmp_path.parent / "secret.txt").as_uri() + + crew_def = { + "name": "unsafe_file_uri_input_files_crew", + "agents": ["reader"], + "tasks": [ + { + "name": "read", + "description": "Read files", + "expected_output": "File summary", + "agent": "reader", + "input_files": {"secret": outside_uri}, + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectValidationError, match="outside the project root"): + load_crew(crew_file) + + @pytest.mark.parametrize( + "outside_path", + [ + r"C:\Users\alice\.ssh\id_rsa", + "C:/Users/alice/.ssh/id_rsa", + r"\\server\share\secret.txt", + "//server/share/secret.txt", + ], + ) + def test_crew_rejects_windows_input_file_outside_project( + self, tmp_path: Path, outside_path: str + ): + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent(agents_dir, "reader") + + crew_def = { + "name": "unsafe_windows_input_files_crew", + "agents": ["reader"], + "tasks": [ + { + "name": "read", + "description": "Read files", + "expected_output": "File summary", + "agent": "reader", + "input_files": {"secret": outside_path}, + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + with pytest.raises(JSONProjectValidationError, match="outside the project root"): + load_crew(crew_file) + + def test_crew_restores_external_module_cache_after_project_ref( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ): + _write_python_defs(tmp_path) + external_module = types.ModuleType("json_refs") + external_module.__file__ = str(tmp_path.parent / "json_refs.py") + external_module.marker = "external" + monkeypatch.setitem(sys.modules, "json_refs", external_module) + + agents_dir = tmp_path / "agents" + agents_dir.mkdir() + _write_agent( + agents_dir, + "worker", + step_callback={"python": "json_refs.task_callback"}, + ) + + crew_def = { + "name": "cache_restore_crew", + "agents": ["worker"], + "tasks": [ + { + "name": "work", + "description": "Do work", + "expected_output": "Work done", + "agent": "worker", + } + ], + } + crew_file = _write_crew(tmp_path, crew_def) + + crew, _ = load_crew(crew_file) + + assert crew.agents[0].step_callback.__name__ == "task_callback" + assert sys.modules["json_refs"] is external_module + def test_missing_agent_file_raises(self, tmp_path: Path): agents_dir = tmp_path / "agents" agents_dir.mkdir() diff --git a/lib/crewai/tests/project/test_json_loader.py b/lib/crewai/tests/project/test_json_loader.py index 0da719c5a..3acfabf5d 100644 --- a/lib/crewai/tests/project/test_json_loader.py +++ b/lib/crewai/tests/project/test_json_loader.py @@ -11,6 +11,7 @@ import pytest from crewai.llms.base_llm import BaseLLM from crewai.project.json_loader import ( JSONProjectValidationError, + _looks_like_windows_absolute_path, find_json_project_file, load_agent, strip_jsonc_comments, @@ -74,6 +75,31 @@ def test_find_json_project_file_prefers_jsonc(tmp_path: Path): assert find_json_project_file(tmp_path, "agent") == jsonc_path +@pytest.mark.parametrize( + "path_value", + [ + r"C:\Users\alice\.ssh\id_rsa", + "C:/Users/alice/.ssh/id_rsa", + r"\\server\share\secret.txt", + "//server/share/secret.txt", + ], +) +def test_windows_absolute_path_detection(path_value: str): + assert _looks_like_windows_absolute_path(path_value) + + +@pytest.mark.parametrize( + "path_value", + [ + r"folder\file.txt", + "folder/file.txt", + r"\server\share\secret.txt", + ], +) +def test_windows_absolute_path_detection_ignores_relative_paths(path_value: str): + assert not _looks_like_windows_absolute_path(path_value) + + class TestLoadAgent: def test_load_minimal_agent(self, tmp_path: Path): agent_def = { @@ -480,6 +506,28 @@ class TestValidationDoesNotExecuteTools: assert "Invalid custom tool name" in str(exc_info.value) + def test_validate_rejects_deep_python_ref_nesting(self, tmp_path): + from crewai.project.json_loader import validate_crew_project + + crew_path = self._write_project( + tmp_path, + tool_line='{"tool_type": "some.module.Tool"}', + ) + agent_file = tmp_path / "agents" / "worker.jsonc" + agent_def = json.loads(agent_file.read_text()) + nested: dict[str, object] = {} + current = nested + for _ in range(70): + child: dict[str, object] = {} + current["nested"] = child + current = child + current["ref"] = {"python": "callbacks.step_callback"} + agent_def["security_config"] = nested + agent_file.write_text(json.dumps(agent_def)) + + with pytest.raises(JSONProjectValidationError, match="maximum depth"): + validate_crew_project(crew_path, tmp_path / "agents") + class TestCustomToolPathSafety: @pytest.mark.parametrize( diff --git a/lib/crewai/tests/test_crew.py b/lib/crewai/tests/test_crew.py index 8ce25774e..5b06685d8 100644 --- a/lib/crewai/tests/test_crew.py +++ b/lib/crewai/tests/test_crew.py @@ -3830,7 +3830,6 @@ def test_crew_testing_function(researcher): assert isinstance(received_events[1], CrewTestCompletedEvent) -@pytest.mark.vcr() def test_hierarchical_verbose_manager_agent(researcher, writer): task = Task( description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.", @@ -3845,13 +3844,18 @@ def test_hierarchical_verbose_manager_agent(researcher, writer): verbose=True, ) - crew.kickoff() + mock_task_output = TaskOutput( + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] + ) + task.output = mock_task_output + + with patch.object(Task, "execute_sync", return_value=mock_task_output): + crew.kickoff() assert crew.manager_agent is not None assert crew.manager_agent.verbose -@pytest.mark.vcr() def test_hierarchical_verbose_false_manager_agent(researcher, writer): task = Task( description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.", @@ -3866,7 +3870,13 @@ def test_hierarchical_verbose_false_manager_agent(researcher, writer): verbose=False, ) - crew.kickoff() + mock_task_output = TaskOutput( + description="Mock description", raw="mocked output", agent="mocked agent", messages=[] + ) + task.output = mock_task_output + + with patch.object(Task, "execute_sync", return_value=mock_task_output): + crew.kickoff() assert crew.manager_agent is not None assert not crew.manager_agent.verbose