From 774dbe34747cce94f85787f116cc1f92f8d18417 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 23 Aug 2025 15:56:51 +0000 Subject: [PATCH] fix: Handle Mem0 metadata size limit breach by truncating large messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add MAX_METADATA_SIZE_MEM0 constant for 2000 character limit - Implement _truncate_metadata_if_needed() to handle large metadata - Prioritize essential fields and truncate messages when needed - Add comprehensive tests for metadata truncation scenarios - Fixes #3391 Co-Authored-By: João --- src/crewai/memory/storage/mem0_storage.py | 33 +++++++++- tests/storage/test_mem0_storage.py | 78 +++++++++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/src/crewai/memory/storage/mem0_storage.py b/src/crewai/memory/storage/mem0_storage.py index 275d025ca..13a2908b8 100644 --- a/src/crewai/memory/storage/mem0_storage.py +++ b/src/crewai/memory/storage/mem0_storage.py @@ -7,6 +7,7 @@ from crewai.utilities.chromadb import sanitize_collection_name from crewai.memory.storage.interface import Storage MAX_AGENT_ID_LENGTH_MEM0 = 255 +MAX_METADATA_SIZE_MEM0 = 2000 class Mem0Storage(Storage): @@ -98,8 +99,11 @@ class Mem0Storage(Storage): } # Shared base params + raw_metadata = {"type": base_metadata[self.memory_type], **metadata} + truncated_metadata = self._truncate_metadata_if_needed(raw_metadata) + params: dict[str, Any] = { - "metadata": {"type": base_metadata[self.memory_type], **metadata}, + "metadata": truncated_metadata, "infer": self.infer } @@ -181,3 +185,30 @@ class Mem0Storage(Storage): agents = [self._sanitize_role(agent.role) for agent in agents] agents = "_".join(agents) return sanitize_collection_name(name=agents, max_collection_length=MAX_AGENT_ID_LENGTH_MEM0) + + def _truncate_metadata_if_needed(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Truncate metadata to stay within Mem0's size limits. + Prioritizes essential fields and truncates messages if needed. + """ + import json + + metadata_str = json.dumps(metadata, default=str) + + if len(metadata_str) <= MAX_METADATA_SIZE_MEM0: + return metadata + + truncated_metadata = metadata.copy() + + if "messages" in truncated_metadata: + messages = truncated_metadata["messages"] + if isinstance(messages, list) and len(messages) > 0: + while len(json.dumps(truncated_metadata, default=str)) > MAX_METADATA_SIZE_MEM0 and len(messages) > 1: + messages = messages[-len(messages)//2:] # Keep last half + truncated_metadata["messages"] = messages + + if len(json.dumps(truncated_metadata, default=str)) > MAX_METADATA_SIZE_MEM0: + truncated_metadata.pop("messages", None) + truncated_metadata["_truncated"] = True + + return truncated_metadata diff --git a/tests/storage/test_mem0_storage.py b/tests/storage/test_mem0_storage.py index 78786094b..9c2186842 100644 --- a/tests/storage/test_mem0_storage.py +++ b/tests/storage/test_mem0_storage.py @@ -328,6 +328,84 @@ def test_search_method_with_agent_entity(): assert results[0]["context"] == "Result 1" +def test_metadata_truncation_with_large_messages(): + """Test that large messages in metadata are truncated to stay under limit""" + mock_memory = MagicMock(spec=Memory) + + with patch.object(Memory, "__new__", return_value=mock_memory): + mem0_storage = Mem0Storage(type="external", config={}) + + large_messages = [ + {"role": "user", "content": "x" * 500}, + {"role": "assistant", "content": "y" * 500}, + {"role": "user", "content": "z" * 500}, + {"role": "assistant", "content": "w" * 500}, + ] + + large_metadata = { + "description": "Test task", + "messages": large_messages, + } + + mem0_storage.save("test memory", large_metadata) + + call_args = mem0_storage.memory.add.call_args + saved_metadata = call_args[1]["metadata"] + + import json + metadata_str = json.dumps(saved_metadata, default=str) + assert len(metadata_str) <= 2000 + + assert saved_metadata["type"] == "external" + assert "description" in saved_metadata + + +def test_metadata_truncation_removes_messages_when_necessary(): + """Test that messages are completely removed if truncation isn't enough""" + mock_memory = MagicMock(spec=Memory) + + with patch.object(Memory, "__new__", return_value=mock_memory): + mem0_storage = Mem0Storage(type="external", config={}) + + extremely_large_messages = [{"role": "user", "content": "x" * 2000}] + + large_metadata = { + "description": "Test task", + "messages": extremely_large_messages, + } + + mem0_storage.save("test memory", large_metadata) + + call_args = mem0_storage.memory.add.call_args + saved_metadata = call_args[1]["metadata"] + + assert "messages" not in saved_metadata + assert saved_metadata.get("_truncated") is True + assert saved_metadata["type"] == "external" + + +def test_small_metadata_not_truncated(): + """Test that small metadata is not modified""" + mock_memory = MagicMock(spec=Memory) + + with patch.object(Memory, "__new__", return_value=mock_memory): + mem0_storage = Mem0Storage(type="external", config={}) + + small_metadata = { + "description": "Small task", + "messages": [{"role": "user", "content": "short message"}], + } + + mem0_storage.save("test memory", small_metadata) + + call_args = mem0_storage.memory.add.call_args + saved_metadata = call_args[1]["metadata"] + + assert "messages" in saved_metadata + assert saved_metadata["messages"] == small_metadata["messages"] + assert "_truncated" not in saved_metadata + + def test_search_method_with_agent_id_and_user_id(): mock_memory = MagicMock(spec=Memory) mock_results = {"results": [{"score": 0.9, "memory": "Result 1"}, {"score": 0.4, "memory": "Result 2"}]}