Compare commits

...

1 Commits

Author SHA1 Message Date
Devin AI
774dbe3474 fix: Handle Mem0 metadata size limit breach by truncating large messages
- Add MAX_METADATA_SIZE_MEM0 constant for 2000 character limit
- Implement _truncate_metadata_if_needed() to handle large metadata
- Prioritize essential fields and truncate messages when needed
- Add comprehensive tests for metadata truncation scenarios
- Fixes #3391

Co-Authored-By: João <joao@crewai.com>
2025-08-23 15:56:51 +00:00
2 changed files with 110 additions and 1 deletions

View File

@@ -7,6 +7,7 @@ from crewai.utilities.chromadb import sanitize_collection_name
from crewai.memory.storage.interface import Storage
MAX_AGENT_ID_LENGTH_MEM0 = 255
MAX_METADATA_SIZE_MEM0 = 2000
class Mem0Storage(Storage):
@@ -98,8 +99,11 @@ class Mem0Storage(Storage):
}
# Shared base params
raw_metadata = {"type": base_metadata[self.memory_type], **metadata}
truncated_metadata = self._truncate_metadata_if_needed(raw_metadata)
params: dict[str, Any] = {
"metadata": {"type": base_metadata[self.memory_type], **metadata},
"metadata": truncated_metadata,
"infer": self.infer
}
@@ -181,3 +185,30 @@ class Mem0Storage(Storage):
agents = [self._sanitize_role(agent.role) for agent in agents]
agents = "_".join(agents)
return sanitize_collection_name(name=agents, max_collection_length=MAX_AGENT_ID_LENGTH_MEM0)
def _truncate_metadata_if_needed(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
"""
Truncate metadata to stay within Mem0's size limits.
Prioritizes essential fields and truncates messages if needed.
"""
import json
metadata_str = json.dumps(metadata, default=str)
if len(metadata_str) <= MAX_METADATA_SIZE_MEM0:
return metadata
truncated_metadata = metadata.copy()
if "messages" in truncated_metadata:
messages = truncated_metadata["messages"]
if isinstance(messages, list) and len(messages) > 0:
while len(json.dumps(truncated_metadata, default=str)) > MAX_METADATA_SIZE_MEM0 and len(messages) > 1:
messages = messages[-len(messages)//2:] # Keep last half
truncated_metadata["messages"] = messages
if len(json.dumps(truncated_metadata, default=str)) > MAX_METADATA_SIZE_MEM0:
truncated_metadata.pop("messages", None)
truncated_metadata["_truncated"] = True
return truncated_metadata

View File

@@ -328,6 +328,84 @@ def test_search_method_with_agent_entity():
assert results[0]["context"] == "Result 1"
def test_metadata_truncation_with_large_messages():
"""Test that large messages in metadata are truncated to stay under limit"""
mock_memory = MagicMock(spec=Memory)
with patch.object(Memory, "__new__", return_value=mock_memory):
mem0_storage = Mem0Storage(type="external", config={})
large_messages = [
{"role": "user", "content": "x" * 500},
{"role": "assistant", "content": "y" * 500},
{"role": "user", "content": "z" * 500},
{"role": "assistant", "content": "w" * 500},
]
large_metadata = {
"description": "Test task",
"messages": large_messages,
}
mem0_storage.save("test memory", large_metadata)
call_args = mem0_storage.memory.add.call_args
saved_metadata = call_args[1]["metadata"]
import json
metadata_str = json.dumps(saved_metadata, default=str)
assert len(metadata_str) <= 2000
assert saved_metadata["type"] == "external"
assert "description" in saved_metadata
def test_metadata_truncation_removes_messages_when_necessary():
"""Test that messages are completely removed if truncation isn't enough"""
mock_memory = MagicMock(spec=Memory)
with patch.object(Memory, "__new__", return_value=mock_memory):
mem0_storage = Mem0Storage(type="external", config={})
extremely_large_messages = [{"role": "user", "content": "x" * 2000}]
large_metadata = {
"description": "Test task",
"messages": extremely_large_messages,
}
mem0_storage.save("test memory", large_metadata)
call_args = mem0_storage.memory.add.call_args
saved_metadata = call_args[1]["metadata"]
assert "messages" not in saved_metadata
assert saved_metadata.get("_truncated") is True
assert saved_metadata["type"] == "external"
def test_small_metadata_not_truncated():
"""Test that small metadata is not modified"""
mock_memory = MagicMock(spec=Memory)
with patch.object(Memory, "__new__", return_value=mock_memory):
mem0_storage = Mem0Storage(type="external", config={})
small_metadata = {
"description": "Small task",
"messages": [{"role": "user", "content": "short message"}],
}
mem0_storage.save("test memory", small_metadata)
call_args = mem0_storage.memory.add.call_args
saved_metadata = call_args[1]["metadata"]
assert "messages" in saved_metadata
assert saved_metadata["messages"] == small_metadata["messages"]
assert "_truncated" not in saved_metadata
def test_search_method_with_agent_id_and_user_id():
mock_memory = MagicMock(spec=Memory)
mock_results = {"results": [{"score": 0.9, "memory": "Result 1"}, {"score": 0.4, "memory": "Result 2"}]}