Merge branch 'main' into fix-knowledgestorage-default-instantiation

refactor: Change storage field to optional and improve error handling when saving documents
2026-01-29 01:58:14 +00:00 · 2024-12-27 21:18:16 -03:00 · 2024-12-27 17:18:33 -03:00 · 2024-12-26 22:27:19 -04:00 · 2024-12-26 21:30:06 -04:00
7 changed files with 29 additions and 135 deletions
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -4,7 +4,6 @@ import uuid
 import warnings
 from concurrent.futures import Future
 from hashlib import md5
-from crewai.llm import LLM
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union

 from pydantic import (
@@ -1076,36 +1075,19 @@ class Crew(BaseModel):
    def test(
        self,
        n_iterations: int,
-        llm: Union[str, LLM],
+        openai_model_name: Optional[str] = None,
        inputs: Optional[Dict[str, Any]] = None,
    ) -> None:
-        """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.
-        
-        Args:
-            n_iterations: Number of test iterations to run
-            llm: Language model to use for evaluation. Can be either a model name string (e.g. "gpt-4") 
-                 or an LLM instance for custom implementations
-            inputs: Optional dictionary of input values to use for task execution
-            
-        Example:
-            ```python
-            # Using model name string
-            crew.test(n_iterations=3, llm="gpt-4")
-            
-            # Using custom LLM implementation
-            custom_llm = LLM(model="custom-model")
-            crew.test(n_iterations=3, llm=custom_llm)
-            ```
-        """
+        """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures."""
        test_crew = self.copy()

        self._test_execution_span = test_crew._telemetry.test_execution_span(
            test_crew,
            n_iterations,
            inputs,
-            str(llm) if isinstance(llm, LLM) else llm,
-        )
-        evaluator = CrewEvaluator(test_crew, llm)
+            openai_model_name,  # type: ignore[arg-type]
+        )  # type: ignore[arg-type]
+        evaluator = CrewEvaluator(test_crew, openai_model_name)  # type: ignore[arg-type]

        for i in range(1, n_iterations + 1):
            evaluator.set_iteration(i)
--- a/src/crewai/knowledge/knowledge.py
+++ b/src/crewai/knowledge/knowledge.py
@@ -14,13 +14,13 @@ class Knowledge(BaseModel):
    Knowledge is a collection of sources and setup for the vector store to save and query relevant context.
    Args:
        sources: List[BaseKnowledgeSource] = Field(default_factory=list)
-        storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+        storage: Optional[KnowledgeStorage] = Field(default=None)
        embedder_config: Optional[Dict[str, Any]] = None
    """

    sources: List[BaseKnowledgeSource] = Field(default_factory=list)
    model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    embedder_config: Optional[Dict[str, Any]] = None
    collection_name: Optional[str] = None

--- a/src/crewai/knowledge/source/base_file_knowledge_source.py
+++ b/src/crewai/knowledge/source/base_file_knowledge_source.py
@@ -22,7 +22,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
        default_factory=list, description="The path to the file"
    )
    content: Dict[Path, str] = Field(init=False, default_factory=dict)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    safe_file_paths: List[Path] = Field(default_factory=list)

    @field_validator("file_path", "file_paths", mode="before")
@@ -62,7 +62,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):

    def _save_documents(self):
        """Save the documents to the storage."""
-        self.storage.save(self.chunks)
+        if self.storage:
+            self.storage.save(self.chunks)
+        else:
+            raise ValueError("No storage found to save documents.")

    def convert_to_path(self, path: Union[Path, str]) -> Path:
        """Convert a path to a Path object."""
--- a/src/crewai/knowledge/source/base_knowledge_source.py
+++ b/src/crewai/knowledge/source/base_knowledge_source.py
@@ -16,7 +16,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
    chunk_embeddings: List[np.ndarray] = Field(default_factory=list)

    model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    metadata: Dict[str, Any] = Field(default_factory=dict)  # Currently unused
    collection_name: Optional[str] = Field(default=None)

@@ -46,4 +46,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
        Save the documents to the storage.
        This method should be called after the chunks and embeddings are generated.
        """
-        self.storage.save(self.chunks)
+        if self.storage:
+            self.storage.save(self.chunks)
+        else:
+            raise ValueError("No storage found to save documents.")
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -1,16 +1,10 @@
 from collections import defaultdict
-from typing import Any, Dict, List, Optional, TypeVar, Union
-from typing import DefaultDict  # Separate import to avoid circular imports

 from pydantic import BaseModel, Field
 from rich.box import HEAVY_EDGE
 from rich.console import Console
 from rich.table import Table

-from crewai.llm import LLM
-
-T = TypeVar('T', bound=LLM)
-
 from crewai.agent import Agent
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
@@ -34,47 +28,14 @@ class CrewEvaluator:
        iteration (int): The current iteration of the evaluation.
    """

-    _tasks_scores: DefaultDict[int, List[float]] = Field(
-        default_factory=lambda: defaultdict(list))
-    _run_execution_times: DefaultDict[int, List[float]] = Field(
-        default_factory=lambda: defaultdict(list))
+    tasks_scores: defaultdict = defaultdict(list)
+    run_execution_times: defaultdict = defaultdict(list)
    iteration: int = 0

-    @property
-    def tasks_scores(self) -> DefaultDict[int, List[float]]:
-        return self._tasks_scores
-
-    @tasks_scores.setter
-    def tasks_scores(self, value: Dict[int, List[float]]) -> None:
-        self._tasks_scores = defaultdict(list, value)
-
-    @property
-    def run_execution_times(self) -> DefaultDict[int, List[float]]:
-        return self._run_execution_times
-
-    @run_execution_times.setter
-    def run_execution_times(self, value: Dict[int, List[float]]) -> None:
-        self._run_execution_times = defaultdict(list, value)
-
-    def __init__(self, crew, llm: Union[str, T]):
-        """Initialize the CrewEvaluator.
-        
-        Args:
-            crew: The Crew instance to evaluate
-            llm: Language model to use for evaluation. Can be either a model name string
-                or an LLM instance for custom implementations
-                
-        Raises:
-            ValueError: If llm is None or invalid
-        """
-        if not llm:
-            raise ValueError("Invalid LLM configuration")
-            
+    def __init__(self, crew, openai_model_name: str):
        self.crew = crew
-        self.llm = LLM(model=llm) if isinstance(llm, str) else llm
+        self.openai_model_name = openai_model_name
        self._telemetry = Telemetry()
-        self._tasks_scores = defaultdict(list)
-        self._run_execution_times = defaultdict(list)
        self._setup_for_evaluating()

    def _setup_for_evaluating(self) -> None:
@@ -90,7 +51,7 @@ class CrewEvaluator:
            ),
            backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
            verbose=False,
-            llm=self.llm,
+            llm=self.openai_model_name,
        )

    def _evaluation_task(
@@ -220,19 +181,11 @@ class CrewEvaluator:
                self.crew,
                evaluation_result.pydantic.quality,
                current_task._execution_time,
-                self._get_llm_identifier(),
+                self.openai_model_name,
            )
-            self._tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
-            self._run_execution_times[self.iteration].append(
+            self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
+            self.run_execution_times[self.iteration].append(
                current_task._execution_time
            )
        else:
            raise ValueError("Evaluation result is not in the expected format")
-
-    def _get_llm_identifier(self) -> str:
-        """Get a string identifier for the LLM instance.
-        
-        Returns:
-            String representation of the LLM for telemetry
-        """
-        return str(self.llm) if isinstance(self.llm, LLM) else self.llm
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -10,7 +10,6 @@ import instructor
 import pydantic_core
 import pytest

-from crewai.llm import LLM
 from crewai.agent import Agent
 from crewai.agents.cache import CacheHandler
 from crewai.crew import Crew
@@ -1124,7 +1123,7 @@ def test_kickoff_for_each_empty_input():
    assert results == []


-@pytest.mark.vcr(filter_headeruvs=["authorization"])
+@pytest.mark.vcr(filter_headers=["authorization"])
 def test_kickoff_for_each_invalid_input():
    """Tests if kickoff_for_each raises TypeError for invalid input types."""

@@ -2829,7 +2828,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
    copy_mock.return_value = crew

    n_iterations = 2
-    crew.test(n_iterations, llm="gpt-4o-mini", inputs={"topic": "AI"})
+    crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})

    # Ensure kickoff is called on the copied crew
    kickoff_mock.assert_has_calls(
@@ -2845,32 +2844,6 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
        ]
    )

-@mock.patch("crewai.crew.CrewEvaluator")
-@mock.patch("crewai.crew.Crew.copy")
-@mock.patch("crewai.crew.Crew.kickoff")
-def test_crew_testing_with_custom_llm(kickoff_mock, copy_mock, crew_evaluator):
-    task = Task(
-        description="Test task",
-        expected_output="Test output",
-        agent=researcher,
-    )
-    crew = Crew(agents=[researcher], tasks=[task])
-    copy_mock.return_value = crew
-    custom_llm = LLM(model="gpt-4")
-    
-    crew.test(2, llm=custom_llm, inputs={"topic": "AI"})
-    
-    kickoff_mock.assert_has_calls([
-        mock.call(inputs={"topic": "AI"}),
-        mock.call(inputs={"topic": "AI"})
-    ])
-    crew_evaluator.assert_has_calls([
-        mock.call(crew, custom_llm),
-        mock.call().set_iteration(1),
-        mock.call().set_iteration(2),
-        mock.call().print_crew_evaluation_result(),
-    ])
-

@pytest.mark.vcr(filter_headers=["authorization"])
 def test_hierarchical_verbose_manager_agent():
@@ -3152,4 +3125,4 @@ def test_multimodal_agent_live_image_analysis():
    # Verify we got a meaningful response
    assert isinstance(result.raw, str)
    assert len(result.raw) > 100  # Expecting a detailed analysis
-    assert "error" not in result.raw.lower()  # No error messages in response
+    assert "error" not in result.raw.lower()  # No error messages in response
--- a/tests/utilities/evaluators/test_crew_evaluator_handler.py
+++ b/tests/utilities/evaluators/test_crew_evaluator_handler.py
@@ -2,7 +2,6 @@ from unittest import mock

 import pytest

-from crewai.llm import LLM
 from crewai.agent import Agent
 from crewai.crew import Crew
 from crewai.task import Task
@@ -24,7 +23,7 @@ class TestCrewEvaluator:
        )
        crew = Crew(agents=[agent], tasks=[task])

-        return CrewEvaluator(crew, llm="gpt-4o-mini")
+        return CrewEvaluator(crew, openai_model_name="gpt-4o-mini")

    def test_setup_for_evaluating(self, crew_planner):
        crew_planner._setup_for_evaluating()
@@ -48,25 +47,6 @@ class TestCrewEvaluator:
        assert agent.verbose is False
        assert agent.llm.model == "gpt-4o-mini"

-    @pytest.mark.parametrize("llm_input,expected_model", [
-        (LLM(model="gpt-4"), "gpt-4"),
-        ("gpt-4", "gpt-4"),
-    ])
-    def test_evaluator_with_llm_types(self, crew_planner, llm_input, expected_model):
-        evaluator = CrewEvaluator(crew_planner.crew, llm_input)
-        agent = evaluator._evaluator_agent()
-        assert agent.llm.model == expected_model
-        
-    def test_evaluator_with_invalid_llm(self, crew_planner):
-        with pytest.raises(ValueError, match="Invalid LLM configuration"):
-            CrewEvaluator(crew_planner.crew, None)
-
-    def test_evaluator_with_string_llm(self, crew_planner):
-        evaluator = CrewEvaluator(crew_planner.crew, "gpt-4")
-        agent = evaluator._evaluator_agent()
-        assert isinstance(agent.llm, LLM)
-        assert agent.llm.model == "gpt-4"
-
    def test_evaluation_task(self, crew_planner):
        evaluator_agent = Agent(
            role="Evaluator Agent",
Author	SHA1	Message	Date
João Moura	63028e1b20	Merge branch 'main' into fix-knowledgestorage-default-instantiation	2024-12-27 21:18:16 -03:00
João Moura	81759e8c72	Merge branch 'main' into fix-knowledgestorage-default-instantiation	2024-12-27 17:18:33 -03:00
ericklima-ca	27472ba69e	refactor: Change storage field to optional and improve error handling when saving documents	2024-12-26 22:27:19 -04:00
ericklima-ca	25aa774d8c	fix: Change storage initialization to None for KnowledgeStorage	2024-12-26 21:30:06 -04:00