Address PR review: Add constants, IPv4 validation, error handling, and expanded tests

Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
Devin AI
2025-03-12 17:05:01 +00:00
committed by Lucas Gomide
parent 12a815e5db
commit df25703cc2
4 changed files with 81 additions and 13 deletions

View File

@@ -142,8 +142,13 @@ class Agent(BaseAgent):
self.embedder = crew_embedder
if self.knowledge_sources:
from crewai.utilities import sanitize_collection_name
knowledge_agent_name = sanitize_collection_name(self.role)
try:
from crewai.utilities import sanitize_collection_name
knowledge_agent_name = sanitize_collection_name(self.role)
except Exception as e:
self._logger.warning(f"Error sanitizing collection name: {e}")
knowledge_agent_name = "default_agent"
if isinstance(self.knowledge_sources, list) and all(
isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources
):

View File

@@ -7,7 +7,7 @@ from .parser import YamlParser
from .printer import Printer
from .prompts import Prompts
from .rpm_controller import RPMController
from .string_utils import sanitize_collection_name
from .string_utils import sanitize_collection_name, is_ipv4_pattern
from .exceptions.context_window_exceeding_exception import (
LLMContextLengthExceededException,
)
@@ -27,4 +27,5 @@ __all__ = [
"LLMContextLengthExceededException",
"EmbeddingConfigurator",
"sanitize_collection_name",
"is_ipv4_pattern",
]

View File

@@ -84,6 +84,28 @@ def interpolate_only(
from typing import Optional
# Constants for ChromaDB collection name requirements
MIN_LENGTH = 3
MAX_LENGTH = 63
DEFAULT_COLLECTION = "default_collection"
# Compiled regex patterns for better performance
INVALID_CHARS_PATTERN = re.compile(r"[^a-zA-Z0-9_-]")
IPV4_PATTERN = re.compile(r"^(\d{1,3}\.){3}\d{1,3}$")
def is_ipv4_pattern(name: str) -> bool:
"""
Check if a string matches an IPv4 address pattern.
Args:
name: The string to check
Returns:
True if the string matches an IPv4 pattern, False otherwise
"""
return bool(IPV4_PATTERN.match(name))
def sanitize_collection_name(name: Optional[str]) -> str:
"""
@@ -101,10 +123,14 @@ def sanitize_collection_name(name: Optional[str]) -> str:
A sanitized collection name that meets ChromaDB requirements
"""
if not name:
return "default_collection"
return DEFAULT_COLLECTION
# Handle IPv4 pattern
if is_ipv4_pattern(name):
name = f"ip_{name}"
# Replace spaces and invalid characters with underscores
sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", name)
sanitized = INVALID_CHARS_PATTERN.sub("_", name)
# Ensure it starts with alphanumeric
if not sanitized[0].isalnum():
@@ -114,12 +140,12 @@ def sanitize_collection_name(name: Optional[str]) -> str:
if not sanitized[-1].isalnum():
sanitized = sanitized[:-1] + "z"
# Ensure length is between 3-63 characters
if len(sanitized) < 3:
# Ensure length is between MIN_LENGTH-MAX_LENGTH characters
if len(sanitized) < MIN_LENGTH:
# Add padding with alphanumeric character at the end
sanitized = sanitized + "x" * (3 - len(sanitized))
if len(sanitized) > 63:
sanitized = sanitized[:63]
sanitized = sanitized + "x" * (MIN_LENGTH - len(sanitized))
if len(sanitized) > MAX_LENGTH:
sanitized = sanitized[:MAX_LENGTH]
# Ensure it still ends with alphanumeric after truncation
if not sanitized[-1].isalnum():
sanitized = sanitized[:-1] + "z"