Merge branch 'main' of github.com:crewAIInc/crewAI into better/event-emitter

This commit is contained in:
Lorenze Jay
2025-02-18 14:18:24 -08:00
6 changed files with 252 additions and 17 deletions

View File

@@ -94,6 +94,13 @@ class CrewAgentParser:
elif includes_answer: elif includes_answer:
final_answer = text.split(FINAL_ANSWER_ACTION)[-1].strip() final_answer = text.split(FINAL_ANSWER_ACTION)[-1].strip()
# Check whether the final answer ends with triple backticks.
if final_answer.endswith("```"):
# Count occurrences of triple backticks in the final answer.
count = final_answer.count("```")
# If count is odd then it's an unmatched trailing set; remove it.
if count % 2 != 0:
final_answer = final_answer[:-3].rstrip()
return AgentFinish(thought, final_answer, text) return AgentFinish(thought, final_answer, text)
if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL): if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
@@ -120,7 +127,10 @@ class CrewAgentParser:
regex = r"(.*?)(?:\n\nAction|\n\nFinal Answer)" regex = r"(.*?)(?:\n\nAction|\n\nFinal Answer)"
thought_match = re.search(regex, text, re.DOTALL) thought_match = re.search(regex, text, re.DOTALL)
if thought_match: if thought_match:
return thought_match.group(1).strip() thought = thought_match.group(1).strip()
# Remove any triple backticks from the thought string
thought = thought.replace("```", "").strip()
return thought
return "" return ""
def _clean_action(self, text: str) -> str: def _clean_action(self, text: str) -> str:

View File

@@ -281,12 +281,26 @@ class Crew(BaseModel):
if self.entity_memory if self.entity_memory
else EntityMemory(crew=self, embedder_config=self.embedder) else EntityMemory(crew=self, embedder_config=self.embedder)
) )
if hasattr(self, "memory_config") and self.memory_config is not None: if (
self._user_memory = ( self.memory_config and "user_memory" in self.memory_config
self.user_memory if self.user_memory else UserMemory(crew=self) ): # Check for user_memory in config
) user_memory_config = self.memory_config["user_memory"]
if isinstance(
user_memory_config, UserMemory
): # Check if it is already an instance
self._user_memory = user_memory_config
elif isinstance(
user_memory_config, dict
): # Check if it's a configuration dict
self._user_memory = UserMemory(
crew=self, **user_memory_config
) # Initialize with config
else:
raise TypeError(
"user_memory must be a UserMemory instance or a configuration dictionary"
)
else: else:
self._user_memory = None self._user_memory = None # No user memory if not in config
return self return self
@model_validator(mode="after") @model_validator(mode="after")
@@ -1182,7 +1196,7 @@ class Crew(BaseModel):
def test( def test(
self, self,
n_iterations: int, n_iterations: int,
openai_model_name: Optional[str] = None, eval_llm: Union[str, InstanceOf[LLM]],
inputs: Optional[Dict[str, Any]] = None, inputs: Optional[Dict[str, Any]] = None,
) -> None: ) -> None:
"""Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.""" """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures."""
@@ -1192,12 +1206,12 @@ class Crew(BaseModel):
CrewTestStartedEvent( CrewTestStartedEvent(
crew_name=self.name or "crew", crew_name=self.name or "crew",
n_iterations=n_iterations, n_iterations=n_iterations,
openai_model_name=openai_model_name, eval_llm=eval_llm,
inputs=inputs, inputs=inputs,
), ),
) )
test_crew = self.copy() test_crew = self.copy()
evaluator = CrewEvaluator(test_crew, openai_model_name or "gpt-4o-mini") evaluator = CrewEvaluator(test_crew, eval_llm)
for i in range(1, n_iterations + 1): for i in range(1, n_iterations + 1):
evaluator.set_iteration(i) evaluator.set_iteration(i)

View File

@@ -0,0 +1,52 @@
from datetime import date, datetime
from typing import Any
from pydantic import BaseModel
from crewai.flow import Flow
def export_state(flow: Flow) -> dict[str, Any]:
"""Exports the Flow's internal state as JSON-compatible data structures.
Performs a one-way transformation of a Flow's state into basic Python types
that can be safely serialized to JSON. To prevent infinite recursion with
circular references, the conversion is limited to a depth of 5 levels.
Args:
flow: The Flow object whose state needs to be exported
Returns:
dict[str, Any]: The transformed state using JSON-compatible Python
types.
"""
return _to_serializable(flow._state)
def _to_serializable(obj: Any, max_depth: int = 5, _current_depth: int = 0) -> Any:
if _current_depth >= max_depth:
return repr(obj)
if isinstance(obj, (str, int, float, bool, type(None))):
return obj
elif isinstance(obj, (date, datetime)):
return obj.isoformat()
elif isinstance(obj, (list, tuple, set)):
return [_to_serializable(item, max_depth, _current_depth + 1) for item in obj]
elif isinstance(obj, dict):
return {
_to_serializable_key(key): _to_serializable(
value, max_depth, _current_depth + 1
)
for key, value in obj.items()
}
elif isinstance(obj, BaseModel):
return _to_serializable(obj.model_dump(), max_depth, _current_depth + 1)
else:
return repr(obj)
def _to_serializable_key(key: Any) -> str:
if isinstance(key, (str, int)):
return str(key)
return f"key_{id(key)}_{repr(key)}"

View File

@@ -1,11 +1,12 @@
from collections import defaultdict from collections import defaultdict
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, InstanceOf
from rich.box import HEAVY_EDGE from rich.box import HEAVY_EDGE
from rich.console import Console from rich.console import Console
from rich.table import Table from rich.table import Table
from crewai.agent import Agent from crewai.agent import Agent
from crewai.llm import LLM
from crewai.task import Task from crewai.task import Task
from crewai.tasks.task_output import TaskOutput from crewai.tasks.task_output import TaskOutput
from crewai.telemetry import Telemetry from crewai.telemetry import Telemetry
@@ -23,7 +24,7 @@ class CrewEvaluator:
Attributes: Attributes:
crew (Crew): The crew of agents to evaluate. crew (Crew): The crew of agents to evaluate.
openai_model_name (str): The model to use for evaluating the performance of the agents (for now ONLY OpenAI accepted). eval_llm (LLM): Language model instance to use for evaluations
tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task. tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task.
iteration (int): The current iteration of the evaluation. iteration (int): The current iteration of the evaluation.
""" """
@@ -32,9 +33,9 @@ class CrewEvaluator:
run_execution_times: defaultdict = defaultdict(list) run_execution_times: defaultdict = defaultdict(list)
iteration: int = 0 iteration: int = 0
def __init__(self, crew, openai_model_name: str): def __init__(self, crew, eval_llm: InstanceOf[LLM]):
self.crew = crew self.crew = crew
self.openai_model_name = openai_model_name self.llm = eval_llm
self._telemetry = Telemetry() self._telemetry = Telemetry()
self._setup_for_evaluating() self._setup_for_evaluating()
@@ -51,7 +52,7 @@ class CrewEvaluator:
), ),
backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed", backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
verbose=False, verbose=False,
llm=self.openai_model_name, llm=self.llm,
) )
def _evaluation_task( def _evaluation_task(
@@ -181,7 +182,7 @@ class CrewEvaluator:
self.crew, self.crew,
evaluation_result.pydantic.quality, evaluation_result.pydantic.quality,
current_task.execution_duration, current_task.execution_duration,
self.openai_model_name, self.llm.model,
) )
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality) self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
self.run_execution_times[self.iteration].append( self.run_execution_times[self.iteration].append(

View File

@@ -14,6 +14,7 @@ from crewai.agents.cache import CacheHandler
from crewai.crew import Crew from crewai.crew import Crew
from crewai.crews.crew_output import CrewOutput from crewai.crews.crew_output import CrewOutput
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
from crewai.llm import LLM
from crewai.memory.contextual.contextual_memory import ContextualMemory from crewai.memory.contextual.contextual_memory import ContextualMemory
from crewai.process import Process from crewai.process import Process
from crewai.task import Task from crewai.task import Task
@@ -3375,6 +3376,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
copy_mock.return_value = crew copy_mock.return_value = crew
n_iterations = 2 n_iterations = 2
llm_instance = LLM("gpt-4o-mini")
received_events = [] received_events = []
@@ -3386,7 +3388,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
def on_crew_test_completed(source, event: CrewTestCompletedEvent): def on_crew_test_completed(source, event: CrewTestCompletedEvent):
received_events.append(event) received_events.append(event)
crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"}) crew.test(n_iterations, llm_instance, inputs={"topic": "AI"})
# Ensure kickoff is called on the copied crew # Ensure kickoff is called on the copied crew
kickoff_mock.assert_has_calls( kickoff_mock.assert_has_calls(
@@ -3395,7 +3397,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
crew_evaluator.assert_has_calls( crew_evaluator.assert_has_calls(
[ [
mock.call(crew, "gpt-4o-mini"), mock.call(crew, llm_instance),
mock.call().set_iteration(1), mock.call().set_iteration(1),
mock.call().set_iteration(2), mock.call().set_iteration(2),
mock.call().print_crew_evaluation_result(), mock.call().print_crew_evaluation_result(),

View File

@@ -0,0 +1,156 @@
from datetime import date, datetime
from typing import List
from unittest.mock import Mock
import pytest
from pydantic import BaseModel
from crewai.flow import Flow
from crewai.flow.state_utils import export_state
class Address(BaseModel):
street: str
city: str
country: str
class Person(BaseModel):
name: str
age: int
address: Address
birthday: date
skills: List[str]
@pytest.fixture
def mock_flow():
def create_flow(state):
flow = Mock(spec=Flow)
flow._state = state
return flow
return create_flow
@pytest.mark.parametrize(
"test_input,expected",
[
({"text": "hello world"}, {"text": "hello world"}),
({"number": 42}, {"number": 42}),
({"decimal": 3.14}, {"decimal": 3.14}),
({"flag": True}, {"flag": True}),
({"empty": None}, {"empty": None}),
({"list": [1, 2, 3]}, {"list": [1, 2, 3]}),
({"tuple": (1, 2, 3)}, {"tuple": [1, 2, 3]}),
({"set": {1, 2, 3}}, {"set": [1, 2, 3]}),
({"nested": [1, [2, 3], {4, 5}]}, {"nested": [1, [2, 3], [4, 5]]}),
],
)
def test_basic_serialization(mock_flow, test_input, expected):
flow = mock_flow(test_input)
result = export_state(flow)
assert result == expected
@pytest.mark.parametrize(
"input_date,expected",
[
(date(2024, 1, 1), "2024-01-01"),
(datetime(2024, 1, 1, 12, 30), "2024-01-01T12:30:00"),
],
)
def test_temporal_serialization(mock_flow, input_date, expected):
flow = mock_flow({"date": input_date})
result = export_state(flow)
assert result["date"] == expected
@pytest.mark.parametrize(
"key,value,expected_key_type",
[
(("tuple", "key"), "value", str),
(None, "value", str),
(123, "value", str),
("normal", "value", str),
],
)
def test_dictionary_key_serialization(mock_flow, key, value, expected_key_type):
flow = mock_flow({key: value})
result = export_state(flow)
assert len(result) == 1
result_key = next(iter(result.keys()))
assert isinstance(result_key, expected_key_type)
assert result[result_key] == value
@pytest.mark.parametrize(
"callable_obj,expected_in_result",
[
(lambda x: x * 2, "lambda"),
(str.upper, "upper"),
],
)
def test_callable_serialization(mock_flow, callable_obj, expected_in_result):
flow = mock_flow({"func": callable_obj})
result = export_state(flow)
assert isinstance(result["func"], str)
assert expected_in_result in result["func"].lower()
def test_pydantic_model_serialization(mock_flow):
address = Address(street="123 Main St", city="Tech City", country="Pythonia")
person = Person(
name="John Doe",
age=30,
address=address,
birthday=date(1994, 1, 1),
skills=["Python", "Testing"],
)
flow = mock_flow(
{
"single_model": address,
"nested_model": person,
"model_list": [address, address],
"model_dict": {"home": address},
}
)
result = export_state(flow)
assert result["single_model"]["street"] == "123 Main St"
assert result["nested_model"]["name"] == "John Doe"
assert result["nested_model"]["address"]["city"] == "Tech City"
assert result["nested_model"]["birthday"] == "1994-01-01"
assert len(result["model_list"]) == 2
assert all(m["street"] == "123 Main St" for m in result["model_list"])
assert result["model_dict"]["home"]["city"] == "Tech City"
def test_depth_limit(mock_flow):
"""Test max depth handling with a deeply nested structure"""
def create_nested(depth):
if depth == 0:
return "value"
return {"next": create_nested(depth - 1)}
deep_structure = create_nested(10)
flow = mock_flow(deep_structure)
result = export_state(flow)
assert result == {
"next": {
"next": {
"next": {
"next": {
"next": "{'next': {'next': {'next': {'next': {'next': 'value'}}}}}"
}
}
}
}
}