mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-02-02 20:18:13 +00:00
Compare commits
6 Commits
feat/add-t
...
fix/memory
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
319f0301ef | ||
|
|
d824db82a3 | ||
|
|
de6b597eff | ||
|
|
6111d05219 | ||
|
|
f83c91d612 | ||
|
|
c8f360414e |
@@ -17,7 +17,7 @@ Beforre we start there are a couple of things to note:
|
||||
Before getting started with CrewAI, make sure that you have installed it via pip:
|
||||
|
||||
```shell
|
||||
$ pip install crewai crewi-tools
|
||||
$ pip install crewai crewai-tools
|
||||
```
|
||||
|
||||
### Virtual Environemnts
|
||||
|
||||
@@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from crewai.memory.entity.entity_memory_item import EntityMemoryItem
|
||||
from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem
|
||||
from crewai.memory.short_term.short_term_memory_item import ShortTermMemoryItem
|
||||
from crewai.utilities.converter import ConverterError
|
||||
from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
|
||||
from crewai.utilities import I18N
|
||||
@@ -39,18 +38,17 @@ class CrewAgentExecutorMixin:
|
||||
and "Action: Delegate work to coworker" not in output.log
|
||||
):
|
||||
try:
|
||||
memory = ShortTermMemoryItem(
|
||||
data=output.log,
|
||||
agent=self.crew_agent.role,
|
||||
metadata={
|
||||
"observation": self.task.description,
|
||||
},
|
||||
)
|
||||
if (
|
||||
hasattr(self.crew, "_short_term_memory")
|
||||
and self.crew._short_term_memory
|
||||
):
|
||||
self.crew._short_term_memory.save(memory)
|
||||
self.crew._short_term_memory.save(
|
||||
value=output.log,
|
||||
metadata={
|
||||
"observation": self.task.description,
|
||||
},
|
||||
agent=self.crew_agent.role,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Failed to add to short term memory: {e}")
|
||||
pass
|
||||
|
||||
@@ -6,9 +6,9 @@ from crewai.memory.storage.kickoff_task_outputs_storage import (
|
||||
)
|
||||
|
||||
from .create_crew import create_crew
|
||||
from .evaluate_crew import evaluate_crew
|
||||
from .replay_from_task import replay_task_command
|
||||
from .reset_memories_command import reset_memories_command
|
||||
from .test_crew import test_crew
|
||||
from .train_crew import train_crew
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ def reset_memories(long, short, entities, kickoff_outputs, all):
|
||||
def test(n_iterations: int, model: str):
|
||||
"""Test the crew and evaluate the results."""
|
||||
click.echo(f"Testing the crew for {n_iterations} iterations with model {model}")
|
||||
test_crew(n_iterations, model)
|
||||
evaluate_crew(n_iterations, model)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import subprocess
|
||||
|
||||
import click
|
||||
import pytest
|
||||
|
||||
pytest.skip(allow_module_level=True)
|
||||
|
||||
|
||||
def test_crew(n_iterations: int, model: str) -> None:
|
||||
def evaluate_crew(n_iterations: int, model: str) -> None:
|
||||
"""
|
||||
Test the crew by running a command in the Poetry environment.
|
||||
Test and Evaluate the crew by running a command in the Poetry environment.
|
||||
|
||||
Args:
|
||||
n_iterations (int): The number of iterations to test the crew.
|
||||
@@ -1,3 +1,4 @@
|
||||
from typing import Any, Dict, Optional
|
||||
from crewai.memory.memory import Memory
|
||||
from crewai.memory.short_term.short_term_memory_item import ShortTermMemoryItem
|
||||
from crewai.memory.storage.rag_storage import RAGStorage
|
||||
@@ -18,7 +19,14 @@ class ShortTermMemory(Memory):
|
||||
)
|
||||
super().__init__(storage)
|
||||
|
||||
def save(self, item: ShortTermMemoryItem) -> None:
|
||||
def save(
|
||||
self,
|
||||
value: Any,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
agent: Optional[str] = None,
|
||||
) -> None:
|
||||
item = ShortTermMemoryItem(data=value, metadata=metadata, agent=agent)
|
||||
|
||||
super().save(value=item.data, metadata=item.metadata, agent=item.agent)
|
||||
|
||||
def search(self, query: str, score_threshold: float = 0.35):
|
||||
|
||||
@@ -3,7 +3,10 @@ from typing import Any, Dict, Optional
|
||||
|
||||
class ShortTermMemoryItem:
|
||||
def __init__(
|
||||
self, data: Any, agent: str, metadata: Optional[Dict[str, Any]] = None
|
||||
self,
|
||||
data: Any,
|
||||
agent: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
self.data = data
|
||||
self.agent = agent
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Any, Dict
|
||||
class Storage:
|
||||
"""Abstract base class defining the storage interface"""
|
||||
|
||||
def save(self, key: str, value: Any, metadata: Dict[str, Any]) -> None:
|
||||
def save(self, value: Any, metadata: Dict[str, Any]) -> None:
|
||||
pass
|
||||
|
||||
def search(self, key: str) -> Dict[str, Any]: # type: ignore
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
@@ -107,6 +108,7 @@ class Task(BaseModel):
|
||||
_original_description: str | None = None
|
||||
_original_expected_output: str | None = None
|
||||
_thread: threading.Thread | None = None
|
||||
_execution_time: float | None = None
|
||||
|
||||
def __init__(__pydantic_self__, **data):
|
||||
config = data.pop("config", {})
|
||||
@@ -120,6 +122,12 @@ class Task(BaseModel):
|
||||
"may_not_set_field", "This field is not to be set by the user.", {}
|
||||
)
|
||||
|
||||
def _set_start_execution_time(self) -> float:
|
||||
return datetime.datetime.now().timestamp()
|
||||
|
||||
def _set_end_execution_time(self, start_time: float) -> None:
|
||||
self._execution_time = datetime.datetime.now().timestamp() - start_time
|
||||
|
||||
@field_validator("output_file")
|
||||
@classmethod
|
||||
def output_file_validation(cls, value: str) -> str:
|
||||
@@ -216,6 +224,7 @@ class Task(BaseModel):
|
||||
f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
|
||||
)
|
||||
|
||||
start_time = self._set_start_execution_time()
|
||||
self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self)
|
||||
|
||||
self.prompt_context = context
|
||||
@@ -239,6 +248,7 @@ class Task(BaseModel):
|
||||
)
|
||||
self.output = task_output
|
||||
|
||||
self._set_end_execution_time(start_time)
|
||||
if self.callback:
|
||||
self.callback(self.output)
|
||||
|
||||
@@ -250,7 +260,9 @@ class Task(BaseModel):
|
||||
content = (
|
||||
json_output
|
||||
if json_output
|
||||
else pydantic_output.model_dump_json() if pydantic_output else result
|
||||
else pydantic_output.model_dump_json()
|
||||
if pydantic_output
|
||||
else result
|
||||
)
|
||||
self._save_file(content)
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ class Telemetry:
|
||||
- Roles of agents in a crew
|
||||
- Tools names available
|
||||
|
||||
Users can opt-in to sharing more complete data suing the `share_crew`
|
||||
Users can opt-in to sharing more complete data using the `share_crew`
|
||||
attribute in the Crew class.
|
||||
"""
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ class CrewEvaluator:
|
||||
"""
|
||||
|
||||
tasks_scores: defaultdict = defaultdict(list)
|
||||
run_execution_times: defaultdict = defaultdict(list)
|
||||
iteration: int = 0
|
||||
|
||||
def __init__(self, crew, openai_model_name: str):
|
||||
@@ -40,9 +41,6 @@ class CrewEvaluator:
|
||||
for task in self.crew.tasks:
|
||||
task.callback = self.evaluate
|
||||
|
||||
def set_iteration(self, iteration: int) -> None:
|
||||
self.iteration = iteration
|
||||
|
||||
def _evaluator_agent(self):
|
||||
return Agent(
|
||||
role="Task Execution Evaluator",
|
||||
@@ -71,6 +69,9 @@ class CrewEvaluator:
|
||||
output_pydantic=TaskEvaluationPydanticOutput,
|
||||
)
|
||||
|
||||
def set_iteration(self, iteration: int) -> None:
|
||||
self.iteration = iteration
|
||||
|
||||
def print_crew_evaluation_result(self) -> None:
|
||||
"""
|
||||
Prints the evaluation result of the crew in a table.
|
||||
@@ -119,6 +120,16 @@ class CrewEvaluator:
|
||||
]
|
||||
table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
|
||||
|
||||
run_exec_times = [
|
||||
int(sum(tasks_exec_times))
|
||||
for _, tasks_exec_times in self.run_execution_times.items()
|
||||
]
|
||||
execution_time_avg = int(sum(run_exec_times) / len(run_exec_times))
|
||||
table.add_row(
|
||||
"Execution Time (s)",
|
||||
*map(str, run_exec_times),
|
||||
f"{execution_time_avg}",
|
||||
)
|
||||
# Display the table in the terminal
|
||||
console = Console()
|
||||
console.print(table)
|
||||
@@ -145,5 +156,8 @@ class CrewEvaluator:
|
||||
|
||||
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
||||
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
||||
self.run_execution_times[self.iteration].append(
|
||||
current_task._execution_time
|
||||
)
|
||||
else:
|
||||
raise ValueError("Evaluation result is not in the expected format")
|
||||
|
||||
@@ -135,29 +135,29 @@ def test_version_command_with_tools(runner):
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_default_iterations(test_crew, runner):
|
||||
@mock.patch("crewai.cli.cli.evaluate_crew")
|
||||
def test_test_default_iterations(evaluate_crew, runner):
|
||||
result = runner.invoke(test)
|
||||
|
||||
test_crew.assert_called_once_with(3, "gpt-4o-mini")
|
||||
evaluate_crew.assert_called_once_with(3, "gpt-4o-mini")
|
||||
assert result.exit_code == 0
|
||||
assert "Testing the crew for 3 iterations with model gpt-4o-mini" in result.output
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_custom_iterations(test_crew, runner):
|
||||
@mock.patch("crewai.cli.cli.evaluate_crew")
|
||||
def test_test_custom_iterations(evaluate_crew, runner):
|
||||
result = runner.invoke(test, ["--n_iterations", "5", "--model", "gpt-4o"])
|
||||
|
||||
test_crew.assert_called_once_with(5, "gpt-4o")
|
||||
evaluate_crew.assert_called_once_with(5, "gpt-4o")
|
||||
assert result.exit_code == 0
|
||||
assert "Testing the crew for 5 iterations with model gpt-4o" in result.output
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_invalid_string_iterations(test_crew, runner):
|
||||
@mock.patch("crewai.cli.cli.evaluate_crew")
|
||||
def test_test_invalid_string_iterations(evaluate_crew, runner):
|
||||
result = runner.invoke(test, ["--n_iterations", "invalid"])
|
||||
|
||||
test_crew.assert_not_called()
|
||||
evaluate_crew.assert_not_called()
|
||||
assert result.exit_code == 2
|
||||
assert (
|
||||
"Usage: test [OPTIONS]\nTry 'test --help' for help.\n\nError: Invalid value for '-n' / '--n_iterations': 'invalid' is not a valid integer.\n"
|
||||
|
||||
@@ -3,7 +3,7 @@ from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.cli import test_crew
|
||||
from crewai.cli import evaluate_crew
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -14,13 +14,13 @@ from crewai.cli import test_crew
|
||||
(10, "gpt-4"),
|
||||
],
|
||||
)
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
@mock.patch("crewai.cli.evaluate_crew.subprocess.run")
|
||||
def test_crew_success(mock_subprocess_run, n_iterations, model):
|
||||
"""Test the crew function for successful execution."""
|
||||
mock_subprocess_run.return_value = subprocess.CompletedProcess(
|
||||
args=f"poetry run test {n_iterations} {model}", returncode=0
|
||||
)
|
||||
result = test_crew.test_crew(n_iterations, model)
|
||||
result = evaluate_crew.evaluate_crew(n_iterations, model)
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", str(n_iterations), model],
|
||||
@@ -31,26 +31,26 @@ def test_crew_success(mock_subprocess_run, n_iterations, model):
|
||||
assert result is None
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
@mock.patch("crewai.cli.evaluate_crew.click")
|
||||
def test_test_crew_zero_iterations(click):
|
||||
test_crew.test_crew(0, "gpt-4o")
|
||||
evaluate_crew.evaluate_crew(0, "gpt-4o")
|
||||
click.echo.assert_called_once_with(
|
||||
"An unexpected error occurred: The number of iterations must be a positive integer.",
|
||||
err=True,
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
@mock.patch("crewai.cli.evaluate_crew.click")
|
||||
def test_test_crew_negative_iterations(click):
|
||||
test_crew.test_crew(-2, "gpt-4o")
|
||||
evaluate_crew.evaluate_crew(-2, "gpt-4o")
|
||||
click.echo.assert_called_once_with(
|
||||
"An unexpected error occurred: The number of iterations must be a positive integer.",
|
||||
err=True,
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
@mock.patch("crewai.cli.evaluate_crew.click")
|
||||
@mock.patch("crewai.cli.evaluate_crew.subprocess.run")
|
||||
def test_test_crew_called_process_error(mock_subprocess_run, click):
|
||||
n_iterations = 5
|
||||
mock_subprocess_run.side_effect = subprocess.CalledProcessError(
|
||||
@@ -59,7 +59,7 @@ def test_test_crew_called_process_error(mock_subprocess_run, click):
|
||||
output="Error",
|
||||
stderr="Some error occurred",
|
||||
)
|
||||
test_crew.test_crew(n_iterations, "gpt-4o")
|
||||
evaluate_crew.evaluate_crew(n_iterations, "gpt-4o")
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", "5", "gpt-4o"],
|
||||
@@ -78,13 +78,13 @@ def test_test_crew_called_process_error(mock_subprocess_run, click):
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
@mock.patch("crewai.cli.evaluate_crew.click")
|
||||
@mock.patch("crewai.cli.evaluate_crew.subprocess.run")
|
||||
def test_test_crew_unexpected_exception(mock_subprocess_run, click):
|
||||
# Arrange
|
||||
n_iterations = 5
|
||||
mock_subprocess_run.side_effect = Exception("Unexpected error")
|
||||
test_crew.test_crew(n_iterations, "gpt-4o")
|
||||
evaluate_crew.evaluate_crew(n_iterations, "gpt-4o")
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", "5", "gpt-4o"],
|
||||
|
||||
@@ -23,10 +23,7 @@ def short_term_memory():
|
||||
expected_output="A list of relevant URLs based on the search query.",
|
||||
agent=agent,
|
||||
)
|
||||
return ShortTermMemory(crew=Crew(
|
||||
agents=[agent],
|
||||
tasks=[task]
|
||||
))
|
||||
return ShortTermMemory(crew=Crew(agents=[agent], tasks=[task]))
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
@@ -38,7 +35,11 @@ def test_save_and_search(short_term_memory):
|
||||
agent="test_agent",
|
||||
metadata={"task": "test_task"},
|
||||
)
|
||||
short_term_memory.save(memory)
|
||||
short_term_memory.save(
|
||||
value=memory.data,
|
||||
metadata=memory.metadata,
|
||||
agent=memory.agent,
|
||||
)
|
||||
|
||||
find = short_term_memory.search("test value", score_threshold=0.01)[0]
|
||||
assert find["context"] == memory.data, "Data value mismatch."
|
||||
|
||||
@@ -84,6 +84,10 @@ class TestCrewEvaluator:
|
||||
1: [10, 9, 8],
|
||||
2: [9, 8, 7],
|
||||
}
|
||||
crew_planner.run_execution_times = {
|
||||
1: [24, 45, 66],
|
||||
2: [55, 33, 67],
|
||||
}
|
||||
|
||||
crew_planner.print_crew_evaluation_result()
|
||||
|
||||
@@ -98,6 +102,7 @@ class TestCrewEvaluator:
|
||||
mock.call().add_row("Task 2", "9", "8", "8.5"),
|
||||
mock.call().add_row("Task 3", "8", "7", "7.5"),
|
||||
mock.call().add_row("Crew", "9.0", "8.0", "8.5"),
|
||||
mock.call().add_row("Execution Time (s)", "135", "155", "145"),
|
||||
]
|
||||
)
|
||||
console.assert_has_calls([mock.call(), mock.call().print(table())])
|
||||
|
||||
Reference in New Issue
Block a user