mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 17:18:29 +00:00
Fix issue #2315: Correct ChromaDB distance comparison in KnowledgeStorage.search()
Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
60
tests/knowledge/test_string_knowledge_source_fix.py
Normal file
60
tests/knowledge/test_string_knowledge_source_fix.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from crewai import Agent, Task, Crew, Process
|
||||
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
|
||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||
|
||||
def test_knowledge_storage_search_filtering():
|
||||
"""Test that KnowledgeStorage.search() correctly filters results based on distance scores."""
|
||||
# Create a mock collection to simulate ChromaDB behavior
|
||||
mock_collection = MagicMock()
|
||||
mock_collection.query.return_value = {
|
||||
"ids": [["1", "2", "3", "4", "5"]],
|
||||
"metadatas": [[{}, {}, {}, {}, {}]],
|
||||
"documents": [["Doc1", "Doc2", "Doc3", "Doc4", "Doc5"]],
|
||||
"distances": [[0.1, 0.2, 0.3, 0.4, 0.5]] # Lower is better in ChromaDB
|
||||
}
|
||||
|
||||
# Create a KnowledgeStorage instance with the mock collection
|
||||
storage = KnowledgeStorage()
|
||||
storage.collection = mock_collection
|
||||
|
||||
# Search with the fixed implementation
|
||||
results = storage.search(["test query"], score_threshold=0.35)
|
||||
|
||||
# Assert that only results with distance < threshold are included
|
||||
assert len(results) == 3
|
||||
assert results[0]["context"] == "Doc1"
|
||||
assert results[1]["context"] == "Doc2"
|
||||
assert results[2]["context"] == "Doc3"
|
||||
|
||||
# Verify that results with distance >= threshold are excluded
|
||||
contexts = [result["context"] for result in results]
|
||||
assert "Doc4" not in contexts
|
||||
assert "Doc5" not in contexts
|
||||
|
||||
def test_string_knowledge_source_integration():
|
||||
"""Test that StringKnowledgeSource correctly adds content to storage."""
|
||||
# Create a knowledge source with specific content
|
||||
content = "Users name is John. He is 30 years old and lives in San Francisco."
|
||||
|
||||
# Mock the KnowledgeStorage to avoid actual embedding computation
|
||||
with patch('crewai.knowledge.storage.knowledge_storage.KnowledgeStorage') as MockStorage:
|
||||
# Configure the mock storage
|
||||
mock_storage = MockStorage.return_value
|
||||
mock_storage.search.return_value = [
|
||||
{"context": "Users name is John. He is 30 years old and lives in San Francisco."}
|
||||
]
|
||||
|
||||
# Create the string source with the mock storage
|
||||
string_source = StringKnowledgeSource(content=content)
|
||||
string_source.storage = mock_storage
|
||||
string_source.add()
|
||||
|
||||
# Verify that the content was added to storage
|
||||
assert mock_storage.save.called
|
||||
|
||||
# Test querying the knowledge
|
||||
results = mock_storage.search(["What city does John live in?"])
|
||||
assert len(results) > 0
|
||||
assert "San Francisco" in results[0]["context"]
|
||||
Reference in New Issue
Block a user