diff --git a/src/crewai/agents/parser.py b/src/crewai/agents/parser.py index b4629a8d2..71444a20a 100644 --- a/src/crewai/agents/parser.py +++ b/src/crewai/agents/parser.py @@ -94,6 +94,13 @@ class CrewAgentParser: elif includes_answer: final_answer = text.split(FINAL_ANSWER_ACTION)[-1].strip() + # Check whether the final answer ends with triple backticks. + if final_answer.endswith("```"): + # Count occurrences of triple backticks in the final answer. + count = final_answer.count("```") + # If count is odd then it's an unmatched trailing set; remove it. + if count % 2 != 0: + final_answer = final_answer[:-3].rstrip() return AgentFinish(thought, final_answer, text) if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL): @@ -120,7 +127,10 @@ class CrewAgentParser: regex = r"(.*?)(?:\n\nAction|\n\nFinal Answer)" thought_match = re.search(regex, text, re.DOTALL) if thought_match: - return thought_match.group(1).strip() + thought = thought_match.group(1).strip() + # Remove any triple backticks from the thought string + thought = thought.replace("```", "").strip() + return thought return "" def _clean_action(self, text: str) -> str: diff --git a/src/crewai/crew.py b/src/crewai/crew.py index 55a24640c..7bb11321a 100644 --- a/src/crewai/crew.py +++ b/src/crewai/crew.py @@ -281,12 +281,26 @@ class Crew(BaseModel): if self.entity_memory else EntityMemory(crew=self, embedder_config=self.embedder) ) - if hasattr(self, "memory_config") and self.memory_config is not None: - self._user_memory = ( - self.user_memory if self.user_memory else UserMemory(crew=self) - ) + if ( + self.memory_config and "user_memory" in self.memory_config + ): # Check for user_memory in config + user_memory_config = self.memory_config["user_memory"] + if isinstance( + user_memory_config, UserMemory + ): # Check if it is already an instance + self._user_memory = user_memory_config + elif isinstance( + user_memory_config, dict + ): # Check if it's a configuration dict + self._user_memory = UserMemory( + crew=self, **user_memory_config + ) # Initialize with config + else: + raise TypeError( + "user_memory must be a UserMemory instance or a configuration dictionary" + ) else: - self._user_memory = None + self._user_memory = None # No user memory if not in config return self @model_validator(mode="after") @@ -1182,7 +1196,7 @@ class Crew(BaseModel): def test( self, n_iterations: int, - openai_model_name: Optional[str] = None, + eval_llm: Union[str, InstanceOf[LLM]], inputs: Optional[Dict[str, Any]] = None, ) -> None: """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.""" @@ -1192,12 +1206,12 @@ class Crew(BaseModel): CrewTestStartedEvent( crew_name=self.name or "crew", n_iterations=n_iterations, - openai_model_name=openai_model_name, + eval_llm=eval_llm, inputs=inputs, ), ) test_crew = self.copy() - evaluator = CrewEvaluator(test_crew, openai_model_name or "gpt-4o-mini") + evaluator = CrewEvaluator(test_crew, eval_llm) for i in range(1, n_iterations + 1): evaluator.set_iteration(i) diff --git a/src/crewai/flow/state_utils.py b/src/crewai/flow/state_utils.py new file mode 100644 index 000000000..40bc81162 --- /dev/null +++ b/src/crewai/flow/state_utils.py @@ -0,0 +1,52 @@ +from datetime import date, datetime +from typing import Any + +from pydantic import BaseModel + +from crewai.flow import Flow + + +def export_state(flow: Flow) -> dict[str, Any]: + """Exports the Flow's internal state as JSON-compatible data structures. + + Performs a one-way transformation of a Flow's state into basic Python types + that can be safely serialized to JSON. To prevent infinite recursion with + circular references, the conversion is limited to a depth of 5 levels. + + Args: + flow: The Flow object whose state needs to be exported + + Returns: + dict[str, Any]: The transformed state using JSON-compatible Python + types. + """ + return _to_serializable(flow._state) + + +def _to_serializable(obj: Any, max_depth: int = 5, _current_depth: int = 0) -> Any: + if _current_depth >= max_depth: + return repr(obj) + + if isinstance(obj, (str, int, float, bool, type(None))): + return obj + elif isinstance(obj, (date, datetime)): + return obj.isoformat() + elif isinstance(obj, (list, tuple, set)): + return [_to_serializable(item, max_depth, _current_depth + 1) for item in obj] + elif isinstance(obj, dict): + return { + _to_serializable_key(key): _to_serializable( + value, max_depth, _current_depth + 1 + ) + for key, value in obj.items() + } + elif isinstance(obj, BaseModel): + return _to_serializable(obj.model_dump(), max_depth, _current_depth + 1) + else: + return repr(obj) + + +def _to_serializable_key(key: Any) -> str: + if isinstance(key, (str, int)): + return str(key) + return f"key_{id(key)}_{repr(key)}" diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py index ef9b908e1..9fcd2886d 100644 --- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py +++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py @@ -1,11 +1,12 @@ from collections import defaultdict -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, InstanceOf from rich.box import HEAVY_EDGE from rich.console import Console from rich.table import Table from crewai.agent import Agent +from crewai.llm import LLM from crewai.task import Task from crewai.tasks.task_output import TaskOutput from crewai.telemetry import Telemetry @@ -23,7 +24,7 @@ class CrewEvaluator: Attributes: crew (Crew): The crew of agents to evaluate. - openai_model_name (str): The model to use for evaluating the performance of the agents (for now ONLY OpenAI accepted). + eval_llm (LLM): Language model instance to use for evaluations tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task. iteration (int): The current iteration of the evaluation. """ @@ -32,9 +33,9 @@ class CrewEvaluator: run_execution_times: defaultdict = defaultdict(list) iteration: int = 0 - def __init__(self, crew, openai_model_name: str): + def __init__(self, crew, eval_llm: InstanceOf[LLM]): self.crew = crew - self.openai_model_name = openai_model_name + self.llm = eval_llm self._telemetry = Telemetry() self._setup_for_evaluating() @@ -51,7 +52,7 @@ class CrewEvaluator: ), backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed", verbose=False, - llm=self.openai_model_name, + llm=self.llm, ) def _evaluation_task( @@ -181,7 +182,7 @@ class CrewEvaluator: self.crew, evaluation_result.pydantic.quality, current_task.execution_duration, - self.openai_model_name, + self.llm.model, ) self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality) self.run_execution_times[self.iteration].append( diff --git a/tests/crew_test.py b/tests/crew_test.py index f15f5ac47..6cd0370ae 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -14,6 +14,7 @@ from crewai.agents.cache import CacheHandler from crewai.crew import Crew from crewai.crews.crew_output import CrewOutput from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource +from crewai.llm import LLM from crewai.memory.contextual.contextual_memory import ContextualMemory from crewai.process import Process from crewai.task import Task @@ -3375,6 +3376,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator): copy_mock.return_value = crew n_iterations = 2 + llm_instance = LLM("gpt-4o-mini") received_events = [] @@ -3386,7 +3388,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator): def on_crew_test_completed(source, event: CrewTestCompletedEvent): received_events.append(event) - crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"}) + crew.test(n_iterations, llm_instance, inputs={"topic": "AI"}) # Ensure kickoff is called on the copied crew kickoff_mock.assert_has_calls( @@ -3395,7 +3397,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator): crew_evaluator.assert_has_calls( [ - mock.call(crew, "gpt-4o-mini"), + mock.call(crew, llm_instance), mock.call().set_iteration(1), mock.call().set_iteration(2), mock.call().print_crew_evaluation_result(), diff --git a/tests/flow/test_state_utils.py b/tests/flow/test_state_utils.py new file mode 100644 index 000000000..1f71cd981 --- /dev/null +++ b/tests/flow/test_state_utils.py @@ -0,0 +1,156 @@ +from datetime import date, datetime +from typing import List +from unittest.mock import Mock + +import pytest +from pydantic import BaseModel + +from crewai.flow import Flow +from crewai.flow.state_utils import export_state + + +class Address(BaseModel): + street: str + city: str + country: str + + +class Person(BaseModel): + name: str + age: int + address: Address + birthday: date + skills: List[str] + + +@pytest.fixture +def mock_flow(): + def create_flow(state): + flow = Mock(spec=Flow) + flow._state = state + return flow + + return create_flow + + +@pytest.mark.parametrize( + "test_input,expected", + [ + ({"text": "hello world"}, {"text": "hello world"}), + ({"number": 42}, {"number": 42}), + ({"decimal": 3.14}, {"decimal": 3.14}), + ({"flag": True}, {"flag": True}), + ({"empty": None}, {"empty": None}), + ({"list": [1, 2, 3]}, {"list": [1, 2, 3]}), + ({"tuple": (1, 2, 3)}, {"tuple": [1, 2, 3]}), + ({"set": {1, 2, 3}}, {"set": [1, 2, 3]}), + ({"nested": [1, [2, 3], {4, 5}]}, {"nested": [1, [2, 3], [4, 5]]}), + ], +) +def test_basic_serialization(mock_flow, test_input, expected): + flow = mock_flow(test_input) + result = export_state(flow) + assert result == expected + + +@pytest.mark.parametrize( + "input_date,expected", + [ + (date(2024, 1, 1), "2024-01-01"), + (datetime(2024, 1, 1, 12, 30), "2024-01-01T12:30:00"), + ], +) +def test_temporal_serialization(mock_flow, input_date, expected): + flow = mock_flow({"date": input_date}) + result = export_state(flow) + assert result["date"] == expected + + +@pytest.mark.parametrize( + "key,value,expected_key_type", + [ + (("tuple", "key"), "value", str), + (None, "value", str), + (123, "value", str), + ("normal", "value", str), + ], +) +def test_dictionary_key_serialization(mock_flow, key, value, expected_key_type): + flow = mock_flow({key: value}) + result = export_state(flow) + assert len(result) == 1 + result_key = next(iter(result.keys())) + assert isinstance(result_key, expected_key_type) + assert result[result_key] == value + + +@pytest.mark.parametrize( + "callable_obj,expected_in_result", + [ + (lambda x: x * 2, "lambda"), + (str.upper, "upper"), + ], +) +def test_callable_serialization(mock_flow, callable_obj, expected_in_result): + flow = mock_flow({"func": callable_obj}) + result = export_state(flow) + assert isinstance(result["func"], str) + assert expected_in_result in result["func"].lower() + + +def test_pydantic_model_serialization(mock_flow): + address = Address(street="123 Main St", city="Tech City", country="Pythonia") + + person = Person( + name="John Doe", + age=30, + address=address, + birthday=date(1994, 1, 1), + skills=["Python", "Testing"], + ) + + flow = mock_flow( + { + "single_model": address, + "nested_model": person, + "model_list": [address, address], + "model_dict": {"home": address}, + } + ) + + result = export_state(flow) + + assert result["single_model"]["street"] == "123 Main St" + + assert result["nested_model"]["name"] == "John Doe" + assert result["nested_model"]["address"]["city"] == "Tech City" + assert result["nested_model"]["birthday"] == "1994-01-01" + + assert len(result["model_list"]) == 2 + assert all(m["street"] == "123 Main St" for m in result["model_list"]) + assert result["model_dict"]["home"]["city"] == "Tech City" + + +def test_depth_limit(mock_flow): + """Test max depth handling with a deeply nested structure""" + + def create_nested(depth): + if depth == 0: + return "value" + return {"next": create_nested(depth - 1)} + + deep_structure = create_nested(10) + flow = mock_flow(deep_structure) + result = export_state(flow) + + assert result == { + "next": { + "next": { + "next": { + "next": { + "next": "{'next': {'next': {'next': {'next': {'next': 'value'}}}}}" + } + } + } + } + }