Enhance invoke method with input sanitization, size limits, and nested dictionary depth validation

Co-Authored-By: Joe Moura <joao@crewai.com>
Enhance invoke method with better error handling, logging, and input validation
2026-01-28 17:48:13 +00:00 · 2025-03-17 02:55:38 +00:00 · 2025-03-17 02:51:46 +00:00 · 2025-03-17 02:49:10 +00:00
3 changed files with 216 additions and 1 deletions
--- a/src/crewai/tools/base_tool.py
+++ b/src/crewai/tools/base_tool.py
@@ -1,7 +1,11 @@
 import json
 import logging
 import warnings
 from abc import ABC, abstractmethod
 from inspect import signature
-from typing import Any, Callable, Type, get_args, get_origin
+from typing import Any, Callable, Dict, Optional, Type, Union, get_args, get_origin
 logger = logging.getLogger(__name__)
 from pydantic import (
    BaseModel,
@@ -75,6 +79,93 @@ class BaseTool(BaseModel, ABC):
        **kwargs: Any,
    ) -> Any:
        """Here goes the actual implementation of the tool."""
    def invoke(
        self, input: Union[str, dict], config: Optional[dict] = None, **kwargs: Any
    ) -> Any:
        """Main method for tool execution.
        This method provides a fallback implementation for models that don't support
        function calling natively (like QwQ-32B-Preview and deepseek-chat).
        It parses the input and calls the _run method with the appropriate arguments.
        Args:
            input: Either a string (raw or JSON) or a dictionary of arguments
            config: Optional configuration dictionary
            **kwargs: Additional keyword arguments to pass to _run
        Returns:
            The result of calling the tool's _run method
        Raises:
            ValueError: If input is neither a string nor a dictionary
            ValueError: If input exceeds the maximum allowed size
            ValueError: If input contains nested dictionaries beyond the maximum allowed depth
        """
        # Input type validation
        if not isinstance(input, (str, dict)):
            raise ValueError(f"Input must be string or dict, got {type(input)}")
        # Input size validation (limit to 100KB)
        MAX_INPUT_SIZE = 100 * 1024  # 100KB
        if isinstance(input, str) and len(input.encode('utf-8')) > MAX_INPUT_SIZE:
            logger.warning(f"Input string exceeds maximum size of {MAX_INPUT_SIZE} bytes")
            raise ValueError(f"Input string exceeds maximum size of {MAX_INPUT_SIZE} bytes")
        if isinstance(input, str):
            # Try to parse as JSON if it's a string
            try:
                input = json.loads(input)
                logger.debug(f"Successfully parsed JSON input: {input}")
            except json.JSONDecodeError as e:
                # If not valid JSON, pass as a single argument
                logger.debug(f"Input string is not JSON format: {e}")
                return self._run(input)
        if not isinstance(input, dict):
            # If input is not a dict after parsing, pass it directly
            logger.debug(f"Using non-dict input directly: {input}")
            return self._run(input)
        # Validate nested dictionary depth
        MAX_DEPTH = 5
        def check_depth(obj, current_depth=1):
            if current_depth > MAX_DEPTH:
                return False
            if isinstance(obj, dict):
                return all(check_depth(v, current_depth + 1) for v in obj.values())
            elif isinstance(obj, (list, tuple)):
                return all(check_depth(item, current_depth + 1) for item in obj)
            return True
        if not check_depth(input):
            logger.warning(f"Input contains nested structures beyond maximum depth of {MAX_DEPTH}")
            raise ValueError(f"Input contains nested structures beyond maximum depth of {MAX_DEPTH}")
        # Get the expected arguments from the schema
        if hasattr(self, 'args_schema') and self.args_schema is not None:
            try:
                # Extract argument names from the schema
                arg_names = list(self.args_schema.model_json_schema()["properties"].keys())
                # Filter the input to only include valid arguments
                filtered_args = {}
                for k in input.keys():
                    if k in arg_names:
                        filtered_args[k] = input[k]
                    else:
                        logger.warning(f"Ignoring unexpected argument: {k}")
                logger.debug(f"Calling _run with filtered arguments: {filtered_args}")
                # Call _run with the filtered arguments
                return self._run(**filtered_args)
            except Exception as e:
                # Fallback to passing the entire input dict if schema parsing fails
                logger.warning(f"Schema parsing failed, using raw input: {e}")
        # If we couldn't parse the schema or there was an error, just pass the input dict
        logger.debug(f"Calling _run with unfiltered arguments: {input}")
        return self._run(**input)
    def to_structured_tool(self) -> CrewStructuredTool:
        """Convert this tool to a CrewStructuredTool instance."""
--- a/tests/tools/test_invoke_method.py
+++ b/tests/tools/test_invoke_method.py
@@ -0,0 +1,55 @@
 from typing import Type
 import pytest
 from pydantic import BaseModel, Field
 from crewai.tools import BaseTool
 class TestToolInput(BaseModel):
    param: str = Field(description="A test parameter")
 class TestTool(BaseTool):
    name: str = "Test Tool"
    description: str = "A tool for testing the invoke method"
    args_schema: Type[BaseModel] = TestToolInput
    def _run(self, param: str) -> str:
        return f"Tool executed with: {param}"
 def test_invoke_with_dict():
    """Test that invoke works with a dictionary input."""
    tool = TestTool()
    result = tool.invoke(input={"param": "test value"})
    assert result == "Tool executed with: test value"
 def test_invoke_with_json_string():
    """Test that invoke works with a JSON string input."""
    tool = TestTool()
    result = tool.invoke(input='{"param": "test value"}')
    assert result == "Tool executed with: test value"
 def test_invoke_with_raw_string():
    """Test that invoke works with a raw string input."""
    tool = TestTool()
    result = tool.invoke(input="test value")
    assert result == "Tool executed with: test value"
 def test_invoke_with_empty_dict():
    """Test that invoke handles empty dict input appropriately."""
    tool = TestTool()
    with pytest.raises(Exception):
        # Should raise an exception since param is required
        tool.invoke(input={})
 def test_invoke_with_extra_args():
    """Test that invoke filters out extra arguments not in the schema."""
    tool = TestTool()
    result = tool.invoke(input={"param": "test value", "extra": "ignored"})
    assert result == "Tool executed with: test value"
--- a/tests/tools/test_invoke_method_additional.py
+++ b/tests/tools/test_invoke_method_additional.py
@@ -0,0 +1,69 @@
 from typing import Type
 import pytest
 from pydantic import BaseModel, Field
 from crewai.tools import BaseTool
 class TestToolInput(BaseModel):
    param: str = Field(description="A test parameter")
 class TestTool(BaseTool):
    name: str = "Test Tool"
    description: str = "A tool for testing the invoke method"
    args_schema: Type[BaseModel] = TestToolInput
    def _run(self, param: str) -> str:
        return f"Tool executed with: {param}"
 def test_invoke_with_invalid_type():
    """Test that invoke raises ValueError with invalid input types."""
    tool = TestTool()
    with pytest.raises(ValueError, match="Input must be string or dict"):
        tool.invoke(input=123)
    with pytest.raises(ValueError, match="Input must be string or dict"):
        tool.invoke(input=["list", "not", "allowed"])
    with pytest.raises(ValueError, match="Input must be string or dict"):
        tool.invoke(input=None)
 def test_invoke_with_config():
    """Test that invoke properly handles configuration dictionaries."""
    tool = TestTool()
    # Config should be passed through to _run but not affect the result
    result = tool.invoke(input={"param": "test with config"}, config={"timeout": 30})
    assert result == "Tool executed with: test with config"
 def test_invoke_with_malformed_json():
    """Test that invoke handles malformed JSON gracefully."""
    tool = TestTool()
    # Malformed JSON should be treated as a raw string
    result = tool.invoke(input="{param: this is not valid JSON}")
    assert "this is not valid JSON" in result
 def test_invoke_with_nested_dict():
    """Test that invoke handles nested dictionaries properly."""
    class NestedToolInput(BaseModel):
        config: dict = Field(description="A nested configuration dictionary")
    class NestedTool(BaseTool):
        name: str = "Nested Tool"
        description: str = "A tool for testing nested dictionaries"
        args_schema: Type[BaseModel] = NestedToolInput
        def _run(self, config: dict) -> str:
            return f"Tool executed with nested config: {config}"
    tool = NestedTool()
    nested_input = {"config": {"key1": "value1", "key2": {"nested": "value"}}}
    result = tool.invoke(input=nested_input)
    assert "Tool executed with nested config" in result
    assert "key1" in result
    assert "nested" in result
Author	SHA1	Message	Date
Devin AI	945a1346a3	Enhance invoke method with input sanitization, size limits, and nested dictionary depth validation Co-Authored-By: Joe Moura <joao@crewai.com>	2025-03-17 02:55:38 +00:00
Devin AI	c0386b73b9	Enhance invoke method with better error handling, logging, and input validation Co-Authored-By: Joe Moura <joao@crewai.com>	2025-03-17 02:51:46 +00:00
Devin AI	3f25e535f4	Fix issue #2383 : Add invoke method to BaseTool for models without function calling support Co-Authored-By: Joe Moura <joao@crewai.com>	2025-03-17 02:49:10 +00:00