Merge branch 'main' into fix-knowledgestorage-default-instantiation

refactor: Change storage field to optional and improve error handling when saving documents
2026-05-01 15:22:37 +00:00 · 2024-12-27 21:18:16 -03:00 · 2024-12-27 17:18:33 -03:00 · 2024-12-26 22:27:19 -04:00 · 2024-12-26 21:30:06 -04:00
5 changed files with 13 additions and 60 deletions
--- a/src/crewai/knowledge/knowledge.py
+++ b/src/crewai/knowledge/knowledge.py
@@ -14,13 +14,13 @@ class Knowledge(BaseModel):
    Knowledge is a collection of sources and setup for the vector store to save and query relevant context.
    Args:
        sources: List[BaseKnowledgeSource] = Field(default_factory=list)
-        storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+        storage: Optional[KnowledgeStorage] = Field(default=None)
        embedder_config: Optional[Dict[str, Any]] = None
    """

    sources: List[BaseKnowledgeSource] = Field(default_factory=list)
    model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    embedder_config: Optional[Dict[str, Any]] = None
    collection_name: Optional[str] = None

--- a/src/crewai/knowledge/source/base_file_knowledge_source.py
+++ b/src/crewai/knowledge/source/base_file_knowledge_source.py
@@ -22,7 +22,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
        default_factory=list, description="The path to the file"
    )
    content: Dict[Path, str] = Field(init=False, default_factory=dict)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    safe_file_paths: List[Path] = Field(default_factory=list)

    @field_validator("file_path", "file_paths", mode="before")
@@ -62,7 +62,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):

    def _save_documents(self):
        """Save the documents to the storage."""
-        self.storage.save(self.chunks)
+        if self.storage:
+            self.storage.save(self.chunks)
+        else:
+            raise ValueError("No storage found to save documents.")

    def convert_to_path(self, path: Union[Path, str]) -> Path:
        """Convert a path to a Path object."""
--- a/src/crewai/knowledge/source/base_knowledge_source.py
+++ b/src/crewai/knowledge/source/base_knowledge_source.py
@@ -16,7 +16,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
    chunk_embeddings: List[np.ndarray] = Field(default_factory=list)

    model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    metadata: Dict[str, Any] = Field(default_factory=dict)  # Currently unused
    collection_name: Optional[str] = Field(default=None)

@@ -46,4 +46,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
        Save the documents to the storage.
        This method should be called after the chunks and embeddings are generated.
        """
-        self.storage.save(self.chunks)
+        if self.storage:
+            self.storage.save(self.chunks)
+        else:
+            raise ValueError("No storage found to save documents.")
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -92,8 +92,6 @@ def suppress_warnings():


 class LLM:
-    MODELS_WITHOUT_STOP_SUPPORT = ["o3", "o3-mini", "o4-mini"]
-    
    def __init__(
        self,
        model: str,
@@ -157,7 +155,7 @@ class LLM:
                    "temperature": self.temperature,
                    "top_p": self.top_p,
                    "n": self.n,
-                    "stop": self.stop if self.supports_stop_words() else None,
+                    "stop": self.stop,
                    "max_tokens": self.max_tokens or self.max_completion_tokens,
                    "presence_penalty": self.presence_penalty,
                    "frequency_penalty": self.frequency_penalty,
@@ -195,19 +193,6 @@ class LLM:
            return False

    def supports_stop_words(self) -> bool:
-        """
-        Determines whether the current model supports the 'stop' parameter.
-        
-        This method checks if the model is in the list of models known not to support
-        stop words, and if not, it queries the litellm library to determine if the
-        model supports the 'stop' parameter.
-        
-        Returns:
-            bool: True if the model supports stop words, False otherwise.
-        """
-        if any(self.model.startswith(model) for model in self.MODELS_WITHOUT_STOP_SUPPORT):
-            return False
-                
        try:
            params = get_supported_openai_params(model=self.model)
            return "stop" in params
--- a/tests/llm_test.py
+++ b/tests/llm_test.py
@@ -28,41 +28,3 @@ def test_llm_callback_replacement():
    assert usage_metrics_1.successful_requests == 1
    assert usage_metrics_2.successful_requests == 1
    assert usage_metrics_1 == calc_handler_1.token_cost_process.get_summary()
-
-
-class TestLLMStopWords:
-    """Tests for LLM stop words functionality."""
-    
-    def test_supports_stop_words_for_o3_model(self):
-        """Test that supports_stop_words returns False for o3 model."""
-        llm = LLM(model="o3")
-        assert not llm.supports_stop_words()
-    
-    def test_supports_stop_words_for_o4_mini_model(self):
-        """Test that supports_stop_words returns False for o4-mini model."""
-        llm = LLM(model="o4-mini")
-        assert not llm.supports_stop_words()
-    
-    def test_supports_stop_words_for_supported_model(self):
-        """Test that supports_stop_words returns True for models that support stop words."""
-        llm = LLM(model="gpt-4")
-        assert llm.supports_stop_words()
-    
-    @pytest.mark.vcr(filter_headers=["authorization"])
-    def test_llm_call_excludes_stop_parameter_for_unsupported_models(self, monkeypatch):
-        """Test that the LLM.call method excludes the stop parameter for models that don't support it."""
-        def mock_completion(**kwargs):
-            assert 'stop' not in kwargs, "Stop parameter should be excluded for o3 model"
-            assert 'model' in kwargs, "Model parameter should be included"
-            assert 'messages' in kwargs, "Messages parameter should be included"
-            return {"choices": [{"message": {"content": "Hello, World!"}}]}
-        
-        monkeypatch.setattr("litellm.completion", mock_completion)
-        
-        llm = LLM(model="o3")
-        llm.stop = ["STOP"]
-        
-        messages = [{"role": "user", "content": "Say 'Hello, World!'"}]
-        response = llm.call(messages)
-        
-        assert response == "Hello, World!"
Author	SHA1	Message	Date
João Moura	63028e1b20	Merge branch 'main' into fix-knowledgestorage-default-instantiation	2024-12-27 21:18:16 -03:00
João Moura	81759e8c72	Merge branch 'main' into fix-knowledgestorage-default-instantiation	2024-12-27 17:18:33 -03:00
ericklima-ca	27472ba69e	refactor: Change storage field to optional and improve error handling when saving documents	2024-12-26 22:27:19 -04:00
ericklima-ca	25aa774d8c	fix: Change storage initialization to None for KnowledgeStorage	2024-12-26 21:30:06 -04:00