Merge branch 'main' into fix-knowledgestorage-default-instantiation

refactor: Change storage field to optional and improve error handling when saving documents
2026-01-28 17:48:13 +00:00 · 2024-12-27 21:18:16 -03:00 · 2024-12-27 17:18:33 -03:00 · 2024-12-26 22:27:19 -04:00 · 2024-12-26 21:30:06 -04:00
5 changed files with 12 additions and 96 deletions
--- a/src/crewai/agents/crew_agent_executor.py
+++ b/src/crewai/agents/crew_agent_executor.py
@@ -112,8 +112,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        try:
            while not isinstance(formatted_answer, AgentFinish):
                if not self.request_within_rpm_limit or self.request_within_rpm_limit():
-                    self._check_context_length_before_call()
-                    
                    answer = self.llm.call(
                        self.messages,
                        callbacks=self.callbacks,
@@ -329,19 +327,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            )
        ]

-    def _check_context_length_before_call(self) -> None:
-        total_chars = sum(len(msg.get("content", "")) for msg in self.messages)
-        estimated_tokens = total_chars // 4
-        
-        context_window_size = self.llm.get_context_window_size()
-        
-        if estimated_tokens > context_window_size:
-            self._printer.print(
-                content=f"Estimated token count ({estimated_tokens}) exceeds context window ({context_window_size}). Handling proactively.",
-                color="yellow",
-            )
-            self._handle_context_length()
-
    def _handle_context_length(self) -> None:
        if self.respect_context_window:
            self._printer.print(
--- a/src/crewai/knowledge/knowledge.py
+++ b/src/crewai/knowledge/knowledge.py
@@ -14,13 +14,13 @@ class Knowledge(BaseModel):
    Knowledge is a collection of sources and setup for the vector store to save and query relevant context.
    Args:
        sources: List[BaseKnowledgeSource] = Field(default_factory=list)
-        storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+        storage: Optional[KnowledgeStorage] = Field(default=None)
        embedder_config: Optional[Dict[str, Any]] = None
    """

    sources: List[BaseKnowledgeSource] = Field(default_factory=list)
    model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    embedder_config: Optional[Dict[str, Any]] = None
    collection_name: Optional[str] = None

--- a/src/crewai/knowledge/source/base_file_knowledge_source.py
+++ b/src/crewai/knowledge/source/base_file_knowledge_source.py
@@ -22,7 +22,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
        default_factory=list, description="The path to the file"
    )
    content: Dict[Path, str] = Field(init=False, default_factory=dict)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    safe_file_paths: List[Path] = Field(default_factory=list)

    @field_validator("file_path", "file_paths", mode="before")
@@ -62,7 +62,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):

    def _save_documents(self):
        """Save the documents to the storage."""
-        self.storage.save(self.chunks)
+        if self.storage:
+            self.storage.save(self.chunks)
+        else:
+            raise ValueError("No storage found to save documents.")

    def convert_to_path(self, path: Union[Path, str]) -> Path:
        """Convert a path to a Path object."""
--- a/src/crewai/knowledge/source/base_knowledge_source.py
+++ b/src/crewai/knowledge/source/base_knowledge_source.py
@@ -16,7 +16,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
    chunk_embeddings: List[np.ndarray] = Field(default_factory=list)

    model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    metadata: Dict[str, Any] = Field(default_factory=dict)  # Currently unused
    collection_name: Optional[str] = Field(default=None)

@@ -46,4 +46,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
        Save the documents to the storage.
        This method should be called after the chunks and embeddings are generated.
        """
-        self.storage.save(self.chunks)
+        if self.storage:
+            self.storage.save(self.chunks)
+        else:
+            raise ValueError("No storage found to save documents.")
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -1625,78 +1625,3 @@ def test_agent_with_knowledge_sources():

        # Assert that the agent provides the correct information
        assert "red" in result.raw.lower()
-
-
-def test_proactive_context_length_handling_prevents_empty_response():
-    """Test that proactive context length checking prevents empty LLM responses."""
-    agent = Agent(
-        role="test role",
-        goal="test goal", 
-        backstory="test backstory",
-        sliding_context_window=True,
-    )
-    
-    long_input = "This is a very long input that should exceed the context window. " * 1000
-    
-    with patch.object(agent.llm, 'get_context_window_size', return_value=100):
-        with patch.object(agent.agent_executor, '_handle_context_length') as mock_handle:
-            with patch.object(agent.llm, 'call', return_value="Proper response after summarization"):
-                
-                agent.agent_executor.messages = [
-                    {"role": "user", "content": long_input}
-                ]
-                
-                task = Task(
-                    description="Process this long input",
-                    expected_output="A response",
-                    agent=agent,
-                )
-                
-                result = agent.execute_task(task)
-                
-                mock_handle.assert_called()
-                assert result and result.strip() != ""
-
-
-def test_proactive_context_length_handling_with_no_summarization():
-    """Test proactive context length checking when summarization is disabled."""
-    agent = Agent(
-        role="test role",
-        goal="test goal",
-        backstory="test backstory", 
-        sliding_context_window=False,
-    )
-    
-    long_input = "This is a very long input. " * 1000
-    
-    with patch.object(agent.llm, 'get_context_window_size', return_value=100):
-        agent.agent_executor.messages = [
-            {"role": "user", "content": long_input}
-        ]
-        
-        with pytest.raises(SystemExit):
-            agent.agent_executor._check_context_length_before_call()
-
-
-def test_context_length_estimation():
-    """Test the token estimation logic."""
-    agent = Agent(
-        role="test role",
-        goal="test goal",
-        backstory="test backstory",
-    )
-    
-    agent.agent_executor.messages = [
-        {"role": "user", "content": "Short message"},
-        {"role": "assistant", "content": "Another short message"},
-    ]
-    
-    with patch.object(agent.llm, 'get_context_window_size', return_value=10):
-        with patch.object(agent.agent_executor, '_handle_context_length') as mock_handle:
-            agent.agent_executor._check_context_length_before_call()
-            mock_handle.assert_not_called()
-    
-    with patch.object(agent.llm, 'get_context_window_size', return_value=5):
-        with patch.object(agent.agent_executor, '_handle_context_length') as mock_handle:
-            agent.agent_executor._check_context_length_before_call()
-            mock_handle.assert_called()
Author	SHA1	Message	Date
João Moura	63028e1b20	Merge branch 'main' into fix-knowledgestorage-default-instantiation	2024-12-27 21:18:16 -03:00
João Moura	81759e8c72	Merge branch 'main' into fix-knowledgestorage-default-instantiation	2024-12-27 17:18:33 -03:00
ericklima-ca	27472ba69e	refactor: Change storage field to optional and improve error handling when saving documents	2024-12-26 22:27:19 -04:00
ericklima-ca	25aa774d8c	fix: Change storage initialization to None for KnowledgeStorage	2024-12-26 21:30:06 -04:00