from unittest.mock import MagicMock, patch import pytest from crewai import Agent, Crew, Process, Task from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage def test_knowledge_storage_search_filtering(): """Test that KnowledgeStorage.search() correctly filters results based on distance scores.""" # Create a mock collection to simulate ChromaDB behavior mock_collection = MagicMock() mock_collection.query.return_value = { "ids": [["1", "2", "3", "4", "5"]], "metadatas": [[{}, {}, {}, {}, {}]], "documents": [["Doc1", "Doc2", "Doc3", "Doc4", "Doc5"]], "distances": [[0.1, 0.2, 0.3, 0.4, 0.5]] # Lower is better in ChromaDB } # Create a KnowledgeStorage instance with the mock collection storage = KnowledgeStorage() storage.collection = mock_collection # Search with the fixed implementation results = storage.search(["test query"], score_threshold=0.35) # Assert that only results with distance < threshold are included assert len(results) == 3 assert results[0]["context"] == "Doc1" assert results[1]["context"] == "Doc2" assert results[2]["context"] == "Doc3" # Verify that results with distance >= threshold are excluded contexts = [result["context"] for result in results] assert "Doc4" not in contexts assert "Doc5" not in contexts def test_string_knowledge_source_integration(): """Test that StringKnowledgeSource correctly adds content to storage.""" # Create a knowledge source with specific content content = "Users name is John. He is 30 years old and lives in San Francisco." # Mock the KnowledgeStorage to avoid actual embedding computation with patch('crewai.knowledge.storage.knowledge_storage.KnowledgeStorage') as MockStorage: # Configure the mock storage mock_storage = MockStorage.return_value mock_storage.search.return_value = [ {"context": "Users name is John. He is 30 years old and lives in San Francisco."} ] # Create the string source with the mock storage string_source = StringKnowledgeSource(content=content) string_source.storage = mock_storage string_source.add() # Verify that the content was added to storage assert mock_storage.save.called # Test querying the knowledge results = mock_storage.search(["What city does John live in?"]) assert len(results) > 0 assert "San Francisco" in results[0]["context"] def test_knowledge_storage_search_empty_results(): """Test that KnowledgeStorage.search() correctly handles empty results.""" # Create a mock collection to simulate ChromaDB with empty results mock_collection = MagicMock() mock_collection.query.return_value = { "ids": [[]], "metadatas": [[]], "documents": [[]], "distances": [[]] } # Create a KnowledgeStorage instance with the mock collection storage = KnowledgeStorage() storage.collection = mock_collection # Search with the fixed implementation results = storage.search(["test query"], score_threshold=0.35) # Assert that no results are returned assert len(results) == 0 def test_knowledge_storage_search_threshold_boundary(): """Test that KnowledgeStorage.search() correctly handles boundary threshold values.""" # Create a mock collection to simulate ChromaDB with a result at the exact threshold mock_collection = MagicMock() mock_collection.query.return_value = { "ids": [["1"]], "metadatas": [[{}]], "documents": [["Doc1"]], "distances": [[0.35]] # Exact threshold value } # Create a KnowledgeStorage instance with the mock collection storage = KnowledgeStorage() storage.collection = mock_collection # Search with the fixed implementation results = storage.search(["test query"], score_threshold=0.35) # Assert that exact threshold matches are excluded assert len(results) == 0 def test_knowledge_storage_search_error_handling(): """Test that KnowledgeStorage.search() correctly handles errors.""" # Create a mock collection that raises an exception mock_collection = MagicMock() mock_collection.query.side_effect = Exception("ChromaDB error") # Create a KnowledgeStorage instance with the mock collection storage = KnowledgeStorage() storage.collection = mock_collection # Assert that the exception is propagated with pytest.raises(Exception): storage.search(["test query"], score_threshold=0.35)