From bf2f4dbce6ab8e8dfe50c8b17bae45b48dcc1d19 Mon Sep 17 00:00:00 2001
From: alex-clawd <alex@crewai.com>
Date: Mon, 6 Apr 2026 10:48:58 -0700
Subject: [PATCH] fix: exclude embedding vectors from memory serialization
 (saves tokens) (#5298)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: exclude embedding vector from MemoryRecord serialization

MemoryRecord.embedding (1536 floats for OpenAI embeddings) was included
in model_dump()/JSON serialization and repr. When recall results flow
to agents or get logged, these vectors burn tokens for zero value —
agents never need the raw embedding.

Added exclude=True and repr=False to the embedding field. The storage
layer accesses record.embedding directly (not via model_dump), so
persistence is unaffected.

* test: validate embedding excluded from serialization

Two tests:
1. MemoryRecord — model_dump, model_dump_json, and repr all exclude
   embedding. Direct attribute access still works for storage layer.
2. MemoryMatch — nested record serialization also excludes embedding.
---
 lib/crewai/src/crewai/memory/types.py         |  4 ++-
 .../tests/memory/test_unified_memory.py       | 36 +++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/lib/crewai/src/crewai/memory/types.py b/lib/crewai/src/crewai/memory/types.py
index 929e10092..e787b569d 100644
--- a/lib/crewai/src/crewai/memory/types.py
+++ b/lib/crewai/src/crewai/memory/types.py
@@ -53,7 +53,9 @@ class MemoryRecord(BaseModel):
     )
     embedding: list[float] | None = Field(
         default=None,
-        description="Vector embedding for semantic search. Computed on save if not provided.",
+        exclude=True,
+        repr=False,
+        description="Vector embedding for semantic search. Excluded from serialization to save tokens.",
     )
     source: str | None = Field(
         default=None,
diff --git a/lib/crewai/tests/memory/test_unified_memory.py b/lib/crewai/tests/memory/test_unified_memory.py
index 98a041086..f36bf0c2b 100644
--- a/lib/crewai/tests/memory/test_unified_memory.py
+++ b/lib/crewai/tests/memory/test_unified_memory.py
@@ -40,6 +40,42 @@ def test_memory_match() -> None:
     assert m.match_reasons == ["semantic"]
 
 
+def test_memory_record_embedding_excluded_from_serialization() -> None:
+    """Embedding vectors should not appear in serialized output to save tokens."""
+    r = MemoryRecord(content="hello", embedding=[0.1, 0.2, 0.3])
+
+    # Direct access still works
+    assert r.embedding == [0.1, 0.2, 0.3]
+
+    # model_dump excludes embedding by default
+    dumped = r.model_dump()
+    assert "embedding" not in dumped
+    assert dumped["content"] == "hello"
+
+    # model_dump_json excludes embedding
+    json_str = r.model_dump_json()
+    assert "0.1" not in json_str
+    assert "embedding" not in json_str
+
+    # repr excludes embedding
+    assert "0.1" not in repr(r)
+
+    # Direct attribute access still works for storage layer
+    assert r.embedding is not None
+    assert len(r.embedding) == 3
+
+
+def test_memory_match_embedding_excluded_from_serialization() -> None:
+    """MemoryMatch serialization should not leak embedding vectors."""
+    r = MemoryRecord(content="x", embedding=[0.5] * 1536)
+    m = MemoryMatch(record=r, score=0.9, match_reasons=["semantic"])
+
+    dumped = m.model_dump()
+    assert "embedding" not in dumped["record"]
+    assert dumped["record"]["content"] == "x"
+    assert dumped["score"] == 0.9
+
+
 def test_scope_info() -> None:
     i = ScopeInfo(path="/", record_count=5, categories=["c1"], child_scopes=["/a"])
     assert i.path == "/"