diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py index 72240e2b6..f185907d6 100644 --- a/src/crewai/knowledge/storage/knowledge_storage.py +++ b/src/crewai/knowledge/storage/knowledge_storage.py @@ -76,7 +76,7 @@ class KnowledgeStorage(BaseKnowledgeStorage): "context": fetched["documents"][0][i], # type: ignore "score": fetched["distances"][0][i], # type: ignore } - if result["score"] >= score_threshold: + if result["score"] < score_threshold: # Lower distance values indicate higher similarity in ChromaDB results.append(result) return results else: diff --git a/tests/knowledge/test_string_knowledge_source_fix.py b/tests/knowledge/test_string_knowledge_source_fix.py new file mode 100644 index 000000000..905892de6 --- /dev/null +++ b/tests/knowledge/test_string_knowledge_source_fix.py @@ -0,0 +1,60 @@ +import pytest +from unittest.mock import patch, MagicMock +from crewai import Agent, Task, Crew, Process +from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource +from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage + +def test_knowledge_storage_search_filtering(): + """Test that KnowledgeStorage.search() correctly filters results based on distance scores.""" + # Create a mock collection to simulate ChromaDB behavior + mock_collection = MagicMock() + mock_collection.query.return_value = { + "ids": [["1", "2", "3", "4", "5"]], + "metadatas": [[{}, {}, {}, {}, {}]], + "documents": [["Doc1", "Doc2", "Doc3", "Doc4", "Doc5"]], + "distances": [[0.1, 0.2, 0.3, 0.4, 0.5]] # Lower is better in ChromaDB + } + + # Create a KnowledgeStorage instance with the mock collection + storage = KnowledgeStorage() + storage.collection = mock_collection + + # Search with the fixed implementation + results = storage.search(["test query"], score_threshold=0.35) + + # Assert that only results with distance < threshold are included + assert len(results) == 3 + assert results[0]["context"] == "Doc1" + assert results[1]["context"] == "Doc2" + assert results[2]["context"] == "Doc3" + + # Verify that results with distance >= threshold are excluded + contexts = [result["context"] for result in results] + assert "Doc4" not in contexts + assert "Doc5" not in contexts + +def test_string_knowledge_source_integration(): + """Test that StringKnowledgeSource correctly adds content to storage.""" + # Create a knowledge source with specific content + content = "Users name is John. He is 30 years old and lives in San Francisco." + + # Mock the KnowledgeStorage to avoid actual embedding computation + with patch('crewai.knowledge.storage.knowledge_storage.KnowledgeStorage') as MockStorage: + # Configure the mock storage + mock_storage = MockStorage.return_value + mock_storage.search.return_value = [ + {"context": "Users name is John. He is 30 years old and lives in San Francisco."} + ] + + # Create the string source with the mock storage + string_source = StringKnowledgeSource(content=content) + string_source.storage = mock_storage + string_source.add() + + # Verify that the content was added to storage + assert mock_storage.save.called + + # Test querying the knowledge + results = mock_storage.search(["What city does John live in?"]) + assert len(results) > 0 + assert "San Francisco" in results[0]["context"]