mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 08:38:30 +00:00
fix: delegate collection name sanitization to knowledge store
This commit is contained in:
committed by
Lucas Gomide
parent
df25703cc2
commit
6b14ffcffb
@@ -1621,6 +1621,38 @@ def test_agent_with_knowledge_sources():
|
||||
assert "red" in result.raw.lower()
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_agent_with_knowledge_sources_extensive_role():
|
||||
content = "Brandon's favorite color is red and he likes Mexican food."
|
||||
string_source = StringKnowledgeSource(content=content)
|
||||
|
||||
with patch(
|
||||
"crewai.knowledge.storage.knowledge_storage.KnowledgeStorage"
|
||||
) as MockKnowledge:
|
||||
mock_knowledge_instance = MockKnowledge.return_value
|
||||
mock_knowledge_instance.sources = [string_source]
|
||||
mock_knowledge_instance.query.return_value = [{"content": content}]
|
||||
|
||||
agent = Agent(
|
||||
role="Information Agent with extensive role description that is longer than 80 characters",
|
||||
goal="Provide information based on knowledge sources",
|
||||
backstory="You have access to specific knowledge sources.",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
knowledge_sources=[string_source],
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="What is Brandon's favorite color?",
|
||||
expected_output="Brandon's favorite color.",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
result = crew.kickoff()
|
||||
|
||||
assert "red" in result.raw.lower()
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_agent_with_knowledge_sources_works_with_copy():
|
||||
content = "Brandon's favorite color is red and he likes Mexican food."
|
||||
|
||||
File diff suppressed because one or more lines are too long
81
tests/utilities/test_chromadb_utils.py
Normal file
81
tests/utilities/test_chromadb_utils.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import unittest
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.utilities.chromadb import (
|
||||
MAX_COLLECTION_LENGTH,
|
||||
MIN_COLLECTION_LENGTH,
|
||||
is_ipv4_pattern,
|
||||
sanitize_collection_name,
|
||||
)
|
||||
|
||||
|
||||
class TestChromadbUtils(unittest.TestCase):
|
||||
def test_sanitize_collection_name_long_name(self):
|
||||
"""Test sanitizing a very long collection name."""
|
||||
long_name = "This is an extremely long role name that will definitely exceed the ChromaDB collection name limit of 63 characters and cause an error when used as a collection name"
|
||||
sanitized = sanitize_collection_name(long_name)
|
||||
self.assertLessEqual(len(sanitized), MAX_COLLECTION_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_sanitize_collection_name_special_chars(self):
|
||||
"""Test sanitizing a name with special characters."""
|
||||
special_chars = "Agent@123!#$%^&*()"
|
||||
sanitized = sanitize_collection_name(special_chars)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_sanitize_collection_name_short_name(self):
|
||||
"""Test sanitizing a very short name."""
|
||||
short_name = "A"
|
||||
sanitized = sanitize_collection_name(short_name)
|
||||
self.assertGreaterEqual(len(sanitized), MIN_COLLECTION_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
|
||||
def test_sanitize_collection_name_bad_ends(self):
|
||||
"""Test sanitizing a name with non-alphanumeric start/end."""
|
||||
bad_ends = "_Agent_"
|
||||
sanitized = sanitize_collection_name(bad_ends)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
|
||||
def test_sanitize_collection_name_none(self):
|
||||
"""Test sanitizing a None value."""
|
||||
sanitized = sanitize_collection_name(None)
|
||||
self.assertEqual(sanitized, "default_collection")
|
||||
|
||||
def test_sanitize_collection_name_ipv4_pattern(self):
|
||||
"""Test sanitizing an IPv4 address."""
|
||||
ipv4 = "192.168.1.1"
|
||||
sanitized = sanitize_collection_name(ipv4)
|
||||
self.assertTrue(sanitized.startswith("ip_"))
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_is_ipv4_pattern(self):
|
||||
"""Test IPv4 pattern detection."""
|
||||
self.assertTrue(is_ipv4_pattern("192.168.1.1"))
|
||||
self.assertFalse(is_ipv4_pattern("not.an.ip.address"))
|
||||
|
||||
def test_sanitize_collection_name_properties(self):
|
||||
"""Test that sanitized collection names always meet ChromaDB requirements."""
|
||||
test_cases = [
|
||||
"A" * 100, # Very long name
|
||||
"_start_with_underscore",
|
||||
"end_with_underscore_",
|
||||
"contains@special#characters",
|
||||
"192.168.1.1", # IPv4 address
|
||||
"a" * 2, # Too short
|
||||
]
|
||||
for test_case in test_cases:
|
||||
sanitized = sanitize_collection_name(test_case)
|
||||
self.assertGreaterEqual(len(sanitized), MIN_COLLECTION_LENGTH)
|
||||
self.assertLessEqual(len(sanitized), MAX_COLLECTION_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
@@ -1,14 +1,8 @@
|
||||
import unittest
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.utilities import is_ipv4_pattern, sanitize_collection_name
|
||||
from crewai.utilities.string_utils import (
|
||||
MAX_LENGTH,
|
||||
MIN_LENGTH,
|
||||
interpolate_only,
|
||||
)
|
||||
from crewai.utilities.string_utils import interpolate_only
|
||||
|
||||
|
||||
class TestInterpolateOnly:
|
||||
@@ -191,77 +185,3 @@ class TestInterpolateOnly:
|
||||
interpolate_only(template, inputs)
|
||||
|
||||
assert "inputs dictionary cannot be empty" in str(excinfo.value).lower()
|
||||
|
||||
|
||||
class TestStringUtils(unittest.TestCase):
|
||||
def test_sanitize_collection_name_long_name(self):
|
||||
"""Test sanitizing a very long collection name."""
|
||||
long_name = "This is an extremely long role name that will definitely exceed the ChromaDB collection name limit of 63 characters and cause an error when used as a collection name"
|
||||
sanitized = sanitize_collection_name(long_name)
|
||||
self.assertLessEqual(len(sanitized), MAX_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_sanitize_collection_name_special_chars(self):
|
||||
"""Test sanitizing a name with special characters."""
|
||||
special_chars = "Agent@123!#$%^&*()"
|
||||
sanitized = sanitize_collection_name(special_chars)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_sanitize_collection_name_short_name(self):
|
||||
"""Test sanitizing a very short name."""
|
||||
short_name = "A"
|
||||
sanitized = sanitize_collection_name(short_name)
|
||||
self.assertGreaterEqual(len(sanitized), MIN_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
|
||||
def test_sanitize_collection_name_bad_ends(self):
|
||||
"""Test sanitizing a name with non-alphanumeric start/end."""
|
||||
bad_ends = "_Agent_"
|
||||
sanitized = sanitize_collection_name(bad_ends)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
|
||||
def test_sanitize_collection_name_none(self):
|
||||
"""Test sanitizing a None value."""
|
||||
sanitized = sanitize_collection_name(None)
|
||||
self.assertEqual(sanitized, "default_collection")
|
||||
|
||||
def test_sanitize_collection_name_ipv4_pattern(self):
|
||||
"""Test sanitizing an IPv4 address."""
|
||||
ipv4 = "192.168.1.1"
|
||||
sanitized = sanitize_collection_name(ipv4)
|
||||
self.assertTrue(sanitized.startswith("ip_"))
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
self.assertTrue(all(c.isalnum() or c in ["_", "-"] for c in sanitized))
|
||||
|
||||
def test_is_ipv4_pattern(self):
|
||||
"""Test IPv4 pattern detection."""
|
||||
self.assertTrue(is_ipv4_pattern("192.168.1.1"))
|
||||
self.assertFalse(is_ipv4_pattern("not.an.ip.address"))
|
||||
|
||||
def test_sanitize_collection_name_properties(self):
|
||||
"""Test that sanitized collection names always meet ChromaDB requirements."""
|
||||
test_cases = [
|
||||
"A" * 100, # Very long name
|
||||
"_start_with_underscore",
|
||||
"end_with_underscore_",
|
||||
"contains@special#characters",
|
||||
"192.168.1.1", # IPv4 address
|
||||
"a" * 2, # Too short
|
||||
]
|
||||
for test_case in test_cases:
|
||||
sanitized = sanitize_collection_name(test_case)
|
||||
self.assertGreaterEqual(len(sanitized), MIN_LENGTH)
|
||||
self.assertLessEqual(len(sanitized), MAX_LENGTH)
|
||||
self.assertTrue(sanitized[0].isalnum())
|
||||
self.assertTrue(sanitized[-1].isalnum())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user