From bf2f4dbce6ab8e8dfe50c8b17bae45b48dcc1d19 Mon Sep 17 00:00:00 2001 From: alex-clawd Date: Mon, 6 Apr 2026 10:48:58 -0700 Subject: [PATCH] fix: exclude embedding vectors from memory serialization (saves tokens) (#5298) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: exclude embedding vector from MemoryRecord serialization MemoryRecord.embedding (1536 floats for OpenAI embeddings) was included in model_dump()/JSON serialization and repr. When recall results flow to agents or get logged, these vectors burn tokens for zero value — agents never need the raw embedding. Added exclude=True and repr=False to the embedding field. The storage layer accesses record.embedding directly (not via model_dump), so persistence is unaffected. * test: validate embedding excluded from serialization Two tests: 1. MemoryRecord — model_dump, model_dump_json, and repr all exclude embedding. Direct attribute access still works for storage layer. 2. MemoryMatch — nested record serialization also excludes embedding. --- lib/crewai/src/crewai/memory/types.py | 4 ++- .../tests/memory/test_unified_memory.py | 36 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/lib/crewai/src/crewai/memory/types.py b/lib/crewai/src/crewai/memory/types.py index 929e10092..e787b569d 100644 --- a/lib/crewai/src/crewai/memory/types.py +++ b/lib/crewai/src/crewai/memory/types.py @@ -53,7 +53,9 @@ class MemoryRecord(BaseModel): ) embedding: list[float] | None = Field( default=None, - description="Vector embedding for semantic search. Computed on save if not provided.", + exclude=True, + repr=False, + description="Vector embedding for semantic search. Excluded from serialization to save tokens.", ) source: str | None = Field( default=None, diff --git a/lib/crewai/tests/memory/test_unified_memory.py b/lib/crewai/tests/memory/test_unified_memory.py index 98a041086..f36bf0c2b 100644 --- a/lib/crewai/tests/memory/test_unified_memory.py +++ b/lib/crewai/tests/memory/test_unified_memory.py @@ -40,6 +40,42 @@ def test_memory_match() -> None: assert m.match_reasons == ["semantic"] +def test_memory_record_embedding_excluded_from_serialization() -> None: + """Embedding vectors should not appear in serialized output to save tokens.""" + r = MemoryRecord(content="hello", embedding=[0.1, 0.2, 0.3]) + + # Direct access still works + assert r.embedding == [0.1, 0.2, 0.3] + + # model_dump excludes embedding by default + dumped = r.model_dump() + assert "embedding" not in dumped + assert dumped["content"] == "hello" + + # model_dump_json excludes embedding + json_str = r.model_dump_json() + assert "0.1" not in json_str + assert "embedding" not in json_str + + # repr excludes embedding + assert "0.1" not in repr(r) + + # Direct attribute access still works for storage layer + assert r.embedding is not None + assert len(r.embedding) == 3 + + +def test_memory_match_embedding_excluded_from_serialization() -> None: + """MemoryMatch serialization should not leak embedding vectors.""" + r = MemoryRecord(content="x", embedding=[0.5] * 1536) + m = MemoryMatch(record=r, score=0.9, match_reasons=["semantic"]) + + dumped = m.model_dump() + assert "embedding" not in dumped["record"] + assert dumped["record"]["content"] == "x" + assert dumped["score"] == 0.9 + + def test_scope_info() -> None: i = ScopeInfo(path="/", record_count=5, categories=["c1"], child_scopes=["/a"]) assert i.path == "/"