Add additional tests and fix RAGStorage.search() comparison

Co-Authored-By: Joe Moura <joao@crewai.com>
Fix linting issues in test file
2026-02-23 14:28:14 +00:00 · 2025-03-10 17:16:49 +00:00 · 2025-03-10 17:12:27 +00:00 · 2025-03-10 17:10:26 +00:00
3 changed files with 121 additions and 2 deletions
--- a/src/crewai/knowledge/storage/knowledge_storage.py
+++ b/src/crewai/knowledge/storage/knowledge_storage.py
@@ -76,7 +76,7 @@ class KnowledgeStorage(BaseKnowledgeStorage):
                        "context": fetched["documents"][0][i],  # type: ignore
                        "score": fetched["distances"][0][i],  # type: ignore
                    }
-                    if result["score"] >= score_threshold:
+                    if result["score"] < score_threshold:  # Lower distance values indicate higher similarity in ChromaDB
                        results.append(result)
                return results
            else:
--- a/src/crewai/memory/storage/rag_storage.py
+++ b/src/crewai/memory/storage/rag_storage.py
@@ -130,7 +130,7 @@ class RAGStorage(BaseRAGStorage):
                    "context": response["documents"][0][i],
                    "score": response["distances"][0][i],
                }
-                if result["score"] >= score_threshold:
+                if result["score"] < score_threshold:  # Lower distance values indicate higher similarity in ChromaDB
                    results.append(result)

            return results
--- a/tests/knowledge/test_string_knowledge_source_fix.py
+++ b/tests/knowledge/test_string_knowledge_source_fix.py
@@ -0,0 +1,119 @@
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from crewai import Agent, Crew, Process, Task
+from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
+from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
+
+
+def test_knowledge_storage_search_filtering():
+    """Test that KnowledgeStorage.search() correctly filters results based on distance scores."""
+    # Create a mock collection to simulate ChromaDB behavior
+    mock_collection = MagicMock()
+    mock_collection.query.return_value = {
+        "ids": [["1", "2", "3", "4", "5"]],
+        "metadatas": [[{}, {}, {}, {}, {}]],
+        "documents": [["Doc1", "Doc2", "Doc3", "Doc4", "Doc5"]],
+        "distances": [[0.1, 0.2, 0.3, 0.4, 0.5]]  # Lower is better in ChromaDB
+    }
+    
+    # Create a KnowledgeStorage instance with the mock collection
+    storage = KnowledgeStorage()
+    storage.collection = mock_collection
+    
+    # Search with the fixed implementation
+    results = storage.search(["test query"], score_threshold=0.35)
+    
+    # Assert that only results with distance < threshold are included
+    assert len(results) == 3
+    assert results[0]["context"] == "Doc1"
+    assert results[1]["context"] == "Doc2"
+    assert results[2]["context"] == "Doc3"
+    
+    # Verify that results with distance >= threshold are excluded
+    contexts = [result["context"] for result in results]
+    assert "Doc4" not in contexts
+    assert "Doc5" not in contexts
+
+def test_string_knowledge_source_integration():
+    """Test that StringKnowledgeSource correctly adds content to storage."""
+    # Create a knowledge source with specific content
+    content = "Users name is John. He is 30 years old and lives in San Francisco."
+    
+    # Mock the KnowledgeStorage to avoid actual embedding computation
+    with patch('crewai.knowledge.storage.knowledge_storage.KnowledgeStorage') as MockStorage:
+        # Configure the mock storage
+        mock_storage = MockStorage.return_value
+        mock_storage.search.return_value = [
+            {"context": "Users name is John. He is 30 years old and lives in San Francisco."}
+        ]
+        
+        # Create the string source with the mock storage
+        string_source = StringKnowledgeSource(content=content)
+        string_source.storage = mock_storage
+        string_source.add()
+        
+        # Verify that the content was added to storage
+        assert mock_storage.save.called
+        
+        # Test querying the knowledge
+        results = mock_storage.search(["What city does John live in?"])
+        assert len(results) > 0
+        assert "San Francisco" in results[0]["context"]
+
+def test_knowledge_storage_search_empty_results():
+    """Test that KnowledgeStorage.search() correctly handles empty results."""
+    # Create a mock collection to simulate ChromaDB with empty results
+    mock_collection = MagicMock()
+    mock_collection.query.return_value = {
+        "ids": [[]],
+        "metadatas": [[]],
+        "documents": [[]],
+        "distances": [[]]
+    }
+    
+    # Create a KnowledgeStorage instance with the mock collection
+    storage = KnowledgeStorage()
+    storage.collection = mock_collection
+    
+    # Search with the fixed implementation
+    results = storage.search(["test query"], score_threshold=0.35)
+    
+    # Assert that no results are returned
+    assert len(results) == 0
+
+def test_knowledge_storage_search_threshold_boundary():
+    """Test that KnowledgeStorage.search() correctly handles boundary threshold values."""
+    # Create a mock collection to simulate ChromaDB with a result at the exact threshold
+    mock_collection = MagicMock()
+    mock_collection.query.return_value = {
+        "ids": [["1"]],
+        "metadatas": [[{}]],
+        "documents": [["Doc1"]],
+        "distances": [[0.35]]  # Exact threshold value
+    }
+    
+    # Create a KnowledgeStorage instance with the mock collection
+    storage = KnowledgeStorage()
+    storage.collection = mock_collection
+    
+    # Search with the fixed implementation
+    results = storage.search(["test query"], score_threshold=0.35)
+    
+    # Assert that exact threshold matches are excluded
+    assert len(results) == 0
+
+def test_knowledge_storage_search_error_handling():
+    """Test that KnowledgeStorage.search() correctly handles errors."""
+    # Create a mock collection that raises an exception
+    mock_collection = MagicMock()
+    mock_collection.query.side_effect = Exception("ChromaDB error")
+    
+    # Create a KnowledgeStorage instance with the mock collection
+    storage = KnowledgeStorage()
+    storage.collection = mock_collection
+    
+    # Assert that the exception is propagated
+    with pytest.raises(Exception):
+        storage.search(["test query"], score_threshold=0.35)
Author	SHA1	Message	Date
Devin AI	ac42992e22	Add additional tests and fix RAGStorage.search() comparison Co-Authored-By: Joe Moura <joao@crewai.com>	2025-03-10 17:16:49 +00:00
Devin AI	464ca30e46	Fix linting issues in test file Co-Authored-By: Joe Moura <joao@crewai.com>	2025-03-10 17:12:27 +00:00
Devin AI	a8022f31d3	Fix issue #2315 : Correct ChromaDB distance comparison in KnowledgeStorage.search() Co-Authored-By: Joe Moura <joao@crewai.com>	2025-03-10 17:10:26 +00:00