From acd5aadfd15d69b26c1e11cc70f7c4ea27bd1c20 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 31 May 2025 19:41:21 +0000 Subject: [PATCH] Fix pandas DataFrame input support in crew.kickoff() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add automatic serialization of unsupported types in interpolate_only - Support pandas DataFrames and other complex objects via to_serializable - Add comprehensive tests for DataFrame inputs - Maintain backward compatibility with existing input types Fixes #2925 Co-Authored-By: João --- src/crewai/utilities/string_utils.py | 55 ++++++++++------ tests/crew_test.py | 93 ++++++++++++++++++++++++++++ tests/utilities/test_string_utils.py | 80 +++++++++++++++++++++++- 3 files changed, 207 insertions(+), 21 deletions(-) diff --git a/src/crewai/utilities/string_utils.py b/src/crewai/utilities/string_utils.py index 255e66a0b..630512b04 100644 --- a/src/crewai/utilities/string_utils.py +++ b/src/crewai/utilities/string_utils.py @@ -25,27 +25,42 @@ def interpolate_only( ValueError: If a value contains unsupported types or a template variable is missing """ - # Validation function for recursive type checking - def validate_type(value: Any) -> None: - if value is None: - return - if isinstance(value, (str, int, float, bool)): - return - if isinstance(value, (dict, list)): - for item in value.values() if isinstance(value, dict) else value: - validate_type(item) - return - raise ValueError( - f"Unsupported type {type(value).__name__} in inputs. " - "Only str, int, float, bool, dict, and list are allowed." - ) - - # Validate all input values + from crewai.utilities.serialization import to_serializable + + processed_inputs = {} + supported_types = (str, int, float, bool, dict, list) + for key, value in inputs.items(): - try: - validate_type(value) - except ValueError as e: - raise ValueError(f"Invalid value for key '{key}': {str(e)}") from e + if value is None or isinstance(value, supported_types): + def validate_type(val: Any) -> None: + if val is None: + return + if isinstance(val, (str, int, float, bool)): + return + if isinstance(val, (dict, list)): + for item in val.values() if isinstance(val, dict) else val: + validate_type(item) + return + raise ValueError( + f"Unsupported type {type(val).__name__} in inputs. " + "Only str, int, float, bool, dict, and list are allowed." + ) + + try: + validate_type(value) + processed_inputs[key] = value + except ValueError as e: + raise ValueError(f"Invalid value for key '{key}': {str(e)}") from e + else: + try: + processed_inputs[key] = to_serializable(value) + except Exception as e: + raise ValueError( + f"Invalid value for key '{key}': Unable to serialize {type(value).__name__}. " + f"Serialization error: {str(e)}" + ) + + inputs = processed_inputs if input_string is None or not input_string: return "" diff --git a/tests/crew_test.py b/tests/crew_test.py index 62b934883..6b428c0f5 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -4566,3 +4566,96 @@ def test_reset_agent_knowledge_with_only_agent_knowledge(researcher,writer): mock_reset_agent_knowledge.assert_called_once_with([mock_ks_research,mock_ks_writer]) +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_crew_kickoff_with_pandas_dataframe(): + """Test that crew.kickoff works with pandas DataFrame inputs.""" + import pandas as pd + + df = pd.DataFrame({ + "name": ["Alice", "Bob", "Charlie"], + "age": [25, 30, 35], + "city": ["New York", "London", "Tokyo"] + }) + + agent = Agent( + role="Data Analyst", + goal="Analyze the provided data", + backstory="You are an expert data analyst", + ) + + task = Task( + description="Analyze this dataset: {data}", + expected_output="A brief summary of the data", + agent=agent, + ) + + crew = Crew(agents=[agent], tasks=[task]) + + result = crew.kickoff(inputs={"data": df}) + assert result is not None + assert "Alice" in str(result) or "Bob" in str(result) + + +def test_crew_inputs_interpolate_with_dataframe(): + """Test that input interpolation works with pandas DataFrames.""" + import pandas as pd + + df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) + + agent = Agent( + role="Analyst", + goal="Process {data_type} data", + backstory="Expert in {data_type} analysis", + ) + + task = Task( + description="Process this data: {dataset}", + expected_output="Analysis of {dataset}", + agent=agent, + ) + + crew = Crew(agents=[agent], tasks=[task]) + inputs = {"data_type": "tabular", "dataset": df} + + crew._interpolate_inputs(inputs=inputs) + + assert "tabular" in crew.agents[0].goal + assert "tabular" in crew.agents[0].backstory + assert str(df) in crew.tasks[0].description + assert str(df) in crew.tasks[0].expected_output + + +def test_crew_inputs_interpolate_mixed_types_with_dataframe(): + """Test input interpolation with mixed types including DataFrames.""" + import pandas as pd + + df = pd.DataFrame({"values": [10, 20, 30]}) + + agent = Agent( + role="{role_name}", + goal="Analyze {count} records", + backstory="Expert with {dataset}", + ) + + task = Task( + description="Process {dataset} with {count} records", + expected_output="{count} insights from {dataset}", + agent=agent, + ) + + crew = Crew(agents=[agent], tasks=[task]) + inputs = { + "role_name": "Data Scientist", + "count": 3, + "dataset": df + } + + crew._interpolate_inputs(inputs=inputs) + + assert crew.agents[0].role == "Data Scientist" + assert "3" in crew.agents[0].goal + assert str(df) in crew.agents[0].backstory + assert str(df) in crew.tasks[0].description + assert "3" in crew.tasks[0].expected_output + + diff --git a/tests/utilities/test_string_utils.py b/tests/utilities/test_string_utils.py index 441aae8c0..27ba2125c 100644 --- a/tests/utilities/test_string_utils.py +++ b/tests/utilities/test_string_utils.py @@ -1,6 +1,7 @@ from typing import Any, Dict, List, Union import pytest +import pandas as pd from crewai.utilities.string_utils import interpolate_only @@ -184,4 +185,81 @@ class TestInterpolateOnly: with pytest.raises(ValueError) as excinfo: interpolate_only(template, inputs) - assert "inputs dictionary cannot be empty" in str(excinfo.value).lower() + + def test_interpolate_only_with_dataframe(self): + """Test that interpolate_only handles pandas DataFrames correctly.""" + df = pd.DataFrame({"name": ["Alice", "Bob"], "age": [25, 30]}) + + result = interpolate_only("Data: {data}", {"data": df}) + + assert "Alice" in result + assert "Bob" in result + assert "25" in result + assert "30" in result + + def test_interpolate_only_mixed_types_with_dataframe(self): + """Test interpolate_only with mixed input types including DataFrame.""" + df = pd.DataFrame({"col": [1, 2, 3]}) + + inputs = { + "text": "hello", + "number": 42, + "flag": True, + "data": df, + "items": [1, 2, 3] + } + + template = "Text: {text}, Number: {number}, Flag: {flag}, Data: {data}, Items: {items}" + result = interpolate_only(template, inputs) + + assert "hello" in result + assert "42" in result + assert "True" in result + assert "col" in result + assert "[1, 2, 3]" in result + + def test_interpolate_only_unsupported_type_error(self): + """Test that interpolate_only handles unsupported types gracefully.""" + class CustomObject: + def __str__(self): + raise Exception("Cannot serialize") + + with pytest.raises(ValueError, match="Unable to serialize CustomObject"): + interpolate_only("Value: {obj}", {"obj": CustomObject()}) + + def test_interpolate_only_complex_dataframe(self): + """Test interpolate_only with more complex DataFrame structures.""" + df = pd.DataFrame({ + "product": ["Widget A", "Widget B", "Widget C"], + "sales": [100, 150, 200], + "region": ["North", "South", "East"] + }) + + result = interpolate_only("Sales report: {report}", {"report": df}) + + assert "Widget A" in result + assert "100" in result + assert "North" in result + assert "sales" in result + assert "product" in result + + def test_interpolate_only_backward_compatibility(self): + """Test that existing supported types still work correctly.""" + inputs = { + "text": "hello", + "number": 42, + "float_val": 3.14, + "flag": True, + "nested": {"key": "value"}, + "items": [1, 2, 3] + } + + template = "Text: {text}, Number: {number}, Float: {float_val}, Flag: {flag}, Nested: {nested}, Items: {items}" + result = interpolate_only(template, inputs) + + assert "hello" in result + assert "42" in result + assert "3.14" in result + assert "True" in result + assert "key" in result + assert "[1, 2, 3]" in result