mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-26 00:28:13 +00:00
refactor: unify rag storage with instance-specific client support (#3455)
Some checks failed
Notify Downstream / notify-downstream (push) Has been cancelled
Update Test Durations / update-durations (3.10) (push) Has been cancelled
Update Test Durations / update-durations (3.11) (push) Has been cancelled
Update Test Durations / update-durations (3.12) (push) Has been cancelled
Update Test Durations / update-durations (3.13) (push) Has been cancelled
Build uv cache / build-cache (3.10) (push) Has been cancelled
Build uv cache / build-cache (3.11) (push) Has been cancelled
Build uv cache / build-cache (3.12) (push) Has been cancelled
Build uv cache / build-cache (3.13) (push) Has been cancelled
Some checks failed
Notify Downstream / notify-downstream (push) Has been cancelled
Update Test Durations / update-durations (3.10) (push) Has been cancelled
Update Test Durations / update-durations (3.11) (push) Has been cancelled
Update Test Durations / update-durations (3.12) (push) Has been cancelled
Update Test Durations / update-durations (3.13) (push) Has been cancelled
Build uv cache / build-cache (3.10) (push) Has been cancelled
Build uv cache / build-cache (3.11) (push) Has been cancelled
Build uv cache / build-cache (3.12) (push) Has been cancelled
Build uv cache / build-cache (3.13) (push) Has been cancelled
- ignore line length errors globally - migrate knowledge/memory and crew query_knowledge to `SearchResult` - remove legacy chromadb utils; fix empty metadata handling - restore openai as default embedding provider; support instance-specific clients - update and fix tests for `SearchResult` migration and rag changes
This commit is contained in:
@@ -1,123 +0,0 @@
|
||||
import multiprocessing
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from chromadb.config import Settings
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from crewai.utilities.chromadb import (
|
||||
MAX_COLLECTION_LENGTH,
|
||||
MIN_COLLECTION_LENGTH,
|
||||
is_ipv4_pattern,
|
||||
sanitize_collection_name,
|
||||
create_persistent_client,
|
||||
)
|
||||
|
||||
|
||||
def persistent_client_worker(path, queue):
|
||||
try:
|
||||
create_persistent_client(path=path)
|
||||
queue.put(None)
|
||||
except Exception as e:
|
||||
queue.put(e)
|
||||
|
||||
|
||||
class TestChromadbUtils(unittest.TestCase):
|
||||
def test_sanitize_collection_name_long_name(self):
|
||||
"""Test sanitizing a very long collection name."""
|
||||
long_name = "This is an extremely long role name that will definitely exceed the ChromaDB collection name limit of 63 characters and cause an error when used as a collection name"
|
||||
sanitized = sanitize_collection_name(long_name)
|
||||
self.assertLessEqual(len(sanitized), MAX_COLLECTION_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_sanitize_collection_name_special_chars(self):
|
||||
"""Test sanitizing a name with special characters."""
|
||||
special_chars = "Agent@123!#$%^&*()"
|
||||
sanitized = sanitize_collection_name(special_chars)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_sanitize_collection_name_short_name(self):
|
||||
"""Test sanitizing a very short name."""
|
||||
short_name = "A"
|
||||
sanitized = sanitize_collection_name(short_name)
|
||||
self.assertGreaterEqual(len(sanitized), MIN_COLLECTION_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
|
||||
def test_sanitize_collection_name_bad_ends(self):
|
||||
"""Test sanitizing a name with non-alphanumeric start/end."""
|
||||
bad_ends = "_Agent_"
|
||||
sanitized = sanitize_collection_name(bad_ends)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
|
||||
def test_sanitize_collection_name_none(self):
|
||||
"""Test sanitizing a None value."""
|
||||
sanitized = sanitize_collection_name(None)
|
||||
self.assertEqual(sanitized, "default_collection")
|
||||
|
||||
def test_sanitize_collection_name_ipv4_pattern(self):
|
||||
"""Test sanitizing an IPv4 address."""
|
||||
ipv4 = "192.168.1.1"
|
||||
sanitized = sanitize_collection_name(ipv4)
|
||||
self.assertTrue(sanitized.startswith("ip_"))
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_is_ipv4_pattern(self):
|
||||
"""Test IPv4 pattern detection."""
|
||||
self.assertTrue(is_ipv4_pattern("192.168.1.1"))
|
||||
self.assertFalse(is_ipv4_pattern("not.an.ip.address"))
|
||||
|
||||
def test_sanitize_collection_name_properties(self):
|
||||
"""Test that sanitized collection names always meet ChromaDB requirements."""
|
||||
test_cases = [
|
||||
"A" * 100, # Very long name
|
||||
"_start_with_underscore",
|
||||
"end_with_underscore_",
|
||||
"contains@special#characters",
|
||||
"192.168.1.1", # IPv4 address
|
||||
"a" * 2, # Too short
|
||||
]
|
||||
for test_case in test_cases:
|
||||
sanitized = sanitize_collection_name(test_case)
|
||||
self.assertGreaterEqual(len(sanitized), MIN_COLLECTION_LENGTH)
|
||||
self.assertLessEqual(len(sanitized), MAX_COLLECTION_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
|
||||
def test_create_persistent_client_passes_args(self):
|
||||
with patch(
|
||||
"crewai.utilities.chromadb.PersistentClient"
|
||||
) as mock_persistent_client, tempfile.TemporaryDirectory() as tmpdir:
|
||||
mock_instance = MagicMock()
|
||||
mock_persistent_client.return_value = mock_instance
|
||||
|
||||
settings = Settings(allow_reset=True)
|
||||
client = create_persistent_client(path=tmpdir, settings=settings)
|
||||
|
||||
mock_persistent_client.assert_called_once_with(
|
||||
path=tmpdir, settings=settings
|
||||
)
|
||||
self.assertIs(client, mock_instance)
|
||||
|
||||
def test_create_persistent_client_process_safe(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
queue = multiprocessing.Queue()
|
||||
processes = [
|
||||
multiprocessing.Process(
|
||||
target=persistent_client_worker, args=(tmpdir, queue)
|
||||
)
|
||||
for _ in range(5)
|
||||
]
|
||||
|
||||
[p.start() for p in processes]
|
||||
[p.join() for p in processes]
|
||||
|
||||
errors = [queue.get(timeout=5) for _ in processes]
|
||||
self.assertTrue(all(err is None for err in errors))
|
||||
@@ -29,13 +29,15 @@ def mock_knowledge_source():
|
||||
"""
|
||||
return StringKnowledgeSource(content=content)
|
||||
|
||||
@patch('crewai.knowledge.storage.knowledge_storage.chromadb')
|
||||
def test_knowledge_included_in_planning(mock_chroma):
|
||||
|
||||
@patch("crewai.rag.config.utils.get_rag_client")
|
||||
def test_knowledge_included_in_planning(mock_get_client):
|
||||
"""Test that verifies knowledge sources are properly included in planning."""
|
||||
# Mock ChromaDB collection
|
||||
mock_collection = mock_chroma.return_value.get_or_create_collection.return_value
|
||||
mock_collection.add.return_value = None
|
||||
|
||||
# Mock RAG client
|
||||
mock_client = mock_get_client.return_value
|
||||
mock_client.get_or_create_collection.return_value = None
|
||||
mock_client.add_documents.return_value = None
|
||||
|
||||
# Create an agent with knowledge
|
||||
agent = Agent(
|
||||
role="AI Researcher",
|
||||
@@ -45,14 +47,14 @@ def test_knowledge_included_in_planning(mock_chroma):
|
||||
StringKnowledgeSource(
|
||||
content="AI systems require careful training and validation."
|
||||
)
|
||||
]
|
||||
],
|
||||
)
|
||||
|
||||
# Create a task for the agent
|
||||
task = Task(
|
||||
description="Explain the basics of AI systems",
|
||||
expected_output="A clear explanation of AI fundamentals",
|
||||
agent=agent
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Create a crew planner
|
||||
@@ -62,23 +64,29 @@ def test_knowledge_included_in_planning(mock_chroma):
|
||||
task_summary = planner._create_tasks_summary()
|
||||
|
||||
# Verify that knowledge is included in planning when present
|
||||
assert "AI systems require careful training" in task_summary, \
|
||||
assert "AI systems require careful training" in task_summary, (
|
||||
"Knowledge content should be present in task summary when knowledge exists"
|
||||
assert '"agent_knowledge"' in task_summary, \
|
||||
)
|
||||
assert '"agent_knowledge"' in task_summary, (
|
||||
"agent_knowledge field should be present in task summary when knowledge exists"
|
||||
)
|
||||
|
||||
# Verify that knowledge is properly formatted
|
||||
assert isinstance(task.agent.knowledge_sources, list), \
|
||||
assert isinstance(task.agent.knowledge_sources, list), (
|
||||
"Knowledge sources should be stored in a list"
|
||||
assert len(task.agent.knowledge_sources) > 0, \
|
||||
)
|
||||
assert len(task.agent.knowledge_sources) > 0, (
|
||||
"At least one knowledge source should be present"
|
||||
assert task.agent.knowledge_sources[0].content in task_summary, \
|
||||
)
|
||||
assert task.agent.knowledge_sources[0].content in task_summary, (
|
||||
"Knowledge source content should be included in task summary"
|
||||
)
|
||||
|
||||
# Verify that other expected components are still present
|
||||
assert task.description in task_summary, \
|
||||
assert task.description in task_summary, (
|
||||
"Task description should be present in task summary"
|
||||
assert task.expected_output in task_summary, \
|
||||
)
|
||||
assert task.expected_output in task_summary, (
|
||||
"Expected output should be present in task summary"
|
||||
assert agent.role in task_summary, \
|
||||
"Agent role should be present in task summary"
|
||||
)
|
||||
assert agent.role in task_summary, "Agent role should be present in task summary"
|
||||
|
||||
Reference in New Issue
Block a user