fix(valkey): post-filter metadata outside FT.SEARCH (#5794)

The memory_index FT schema only materializes embedding, scope,
categories, created_at, and importance. ValkeyStorage._vector_search
was emitting metadata_filter clauses as @{key}:{value} against that
index, but metadata keys are user-defined and not part of the schema,
so the FT.SEARCH server would either error out or silently return
results that the metadata predicate failed to narrow.

Move metadata_filter to a Python post-filter that runs after FT.SEARCH
parses results. Overfetch from KNN (limit * 10, capped at 1000) when
metadata_filter is supplied so the post-filter still returns the
caller-requested number of hits in the common case, then truncate to
limit. Scope/categories predicates remain pushed down into FT.SEARCH
because they are valid index fields.

Adds TestValkeyStorageMetadataPostFilter and
TestValkeyStorageMatchesMetadataFilter test classes; updates three
pre-existing tests that asserted the broken contract.

Closes #5794

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-05-13 17:39:45 +00:00
parent c5a9a8da50
commit 808654c2f2
2 changed files with 542 additions and 40 deletions

View File

@@ -1222,6 +1222,39 @@ class ValkeyStorage:
return records_data
# Overfetch factor applied when ``metadata_filter`` is supplied so the
# Python post-filter has enough candidates to still return ``limit`` hits
# after dropping records whose metadata does not match. Capped at
# :pyattr:`_METADATA_POSTFILTER_MAX_FETCH` to avoid pathological queries.
_METADATA_POSTFILTER_OVERFETCH = 10
_METADATA_POSTFILTER_MAX_FETCH = 1000
@staticmethod
def _matches_metadata_filter(
record_metadata: dict[str, Any],
metadata_filter: dict[str, Any],
) -> bool:
"""Return ``True`` iff every key in ``metadata_filter`` matches the record.
Comparisons are performed on string representations so that callers can
pass numeric, boolean, or string filter values interchangeably (mirroring
how the values were originally stringified for the legacy FT query).
Args:
record_metadata: The record's metadata dict (may be empty).
metadata_filter: Required key/value pairs (AND logic).
Returns:
``True`` when every filter key is present in ``record_metadata`` and
its string representation equals the filter's string representation.
"""
for key, expected in metadata_filter.items():
if key not in record_metadata:
return False
if str(record_metadata[key]) != str(expected):
return False
return True
async def _vector_search(
self,
query_embedding: list[float],
@@ -1234,13 +1267,24 @@ class ValkeyStorage:
"""Perform server-side vector search using Valkey Search.
Uses FT.SEARCH command with KNN query for vector similarity.
Applies filters for scope, categories, and metadata in the same query.
Applies scope and category filters in the FT.SEARCH query directly,
and applies ``metadata_filter`` as a Python post-filter over the
results. Metadata is stored as an opaque JSON blob in the hash and
is not part of the ``memory_index`` FT schema, so it cannot be
narrowed inside FT.SEARCH; doing so would either error out or
silently return wrong results.
When ``metadata_filter`` is supplied, the underlying KNN limit is
scaled by :pyattr:`_METADATA_POSTFILTER_OVERFETCH` (capped at
:pyattr:`_METADATA_POSTFILTER_MAX_FETCH`) so the post-filter still
has enough candidates to return ``limit`` hits in the common case.
Args:
query_embedding: Embedding vector for the query.
scope_prefix: Optional scope path prefix to filter results.
categories: Optional list of categories (OR logic).
metadata_filter: Optional metadata key-value pairs (AND logic).
metadata_filter: Optional metadata key-value pairs (AND logic),
applied as a Python post-filter.
limit: Maximum number of results to return.
min_score: Minimum similarity score threshold (0.0 to 1.0).
@@ -1255,7 +1299,14 @@ class ValkeyStorage:
# Ensure vector index exists
await self._ensure_vector_index()
# Build query components
# Treat an empty metadata_filter dict the same as None so callers can
# pass ``{}`` without triggering overfetch or post-filter work.
metadata_filter = metadata_filter or None
# Build query components for the FT.SEARCH query. Note that
# ``metadata_filter`` is intentionally NOT included here because
# arbitrary metadata keys are not present in the ``memory_index``
# FT schema (only scope, categories, created_at, importance).
query_parts: list[str] = []
# Scope prefix filter
@@ -1277,25 +1328,26 @@ class ValkeyStorage:
cat_query = "|".join(escaped_categories)
query_parts.append(f"@categories:{{{cat_query}}}")
# Metadata filters (AND logic)
# Format: @{key}:{value}
if metadata_filter:
for key, value in metadata_filter.items():
# Escape key and value
escaped_key = self._escape_search_query(key)
escaped_value = self._escape_search_query(str(value))
query_parts.append(f"@{escaped_key}:{{{escaped_value}}}")
# Combine filters
filter_query = " ".join(query_parts) if query_parts else "*"
# When metadata_filter is supplied, overfetch candidates from KNN so
# the Python post-filter can still satisfy ``limit`` results.
if metadata_filter:
fetch_limit = min(
max(limit * self._METADATA_POSTFILTER_OVERFETCH, limit),
self._METADATA_POSTFILTER_MAX_FETCH,
)
else:
fetch_limit = limit
# Build KNN query with filters
# Format: (filter)=>[KNN limit @field $BLOB AS score]
# Note: Don't wrap single "*" in parentheses
if filter_query == "*":
query = f"{filter_query}=>[KNN {limit} @embedding $BLOB AS score]"
query = f"{filter_query}=>[KNN {fetch_limit} @embedding $BLOB AS score]"
else:
query = f"({filter_query})=>[KNN {limit} @embedding $BLOB AS score]"
query = f"({filter_query})=>[KNN {fetch_limit} @embedding $BLOB AS score]"
# Prepare embedding blob for PARAMS
embedding_blob = self._embedding_to_bytes(query_embedding)
@@ -1320,7 +1372,7 @@ class ValkeyStorage:
search_options = FtSearchOptions(
return_fields=return_fields,
params={"BLOB": embedding_blob},
limit=FtSearchLimit(0, limit),
limit=FtSearchLimit(0, fetch_limit),
)
try:
@@ -1365,6 +1417,20 @@ class ValkeyStorage:
if rec.scope == normalized or rec.scope.startswith(f"{normalized}/")
]
# Apply metadata_filter as a Python post-filter. The FT index does
# not materialize arbitrary metadata keys, so this is the only
# correct place to enforce metadata predicates.
if metadata_filter:
records = [
(rec, score)
for rec, score in records
if self._matches_metadata_filter(rec.metadata, metadata_filter)
]
# Truncate to the caller-requested limit after post-filtering.
if len(records) > limit:
records = records[:limit]
return records
except Exception as e:
@@ -1463,13 +1529,17 @@ class ValkeyStorage:
"""Search for memories by vector similarity (async).
Uses Valkey Search module for server-side vector similarity computation.
Applies filters for scope, categories, and metadata in the same query.
Scope and category predicates are pushed down into FT.SEARCH; the
``metadata_filter`` predicate is applied as a Python post-filter
because arbitrary metadata keys are not indexed in the
``memory_index`` FT schema.
Args:
query_embedding: Embedding vector for the query.
scope_prefix: Optional scope path prefix to filter results.
categories: Optional list of categories (OR logic).
metadata_filter: Optional metadata key-value pairs (AND logic).
metadata_filter: Optional metadata key-value pairs (AND logic),
applied as a Python post-filter over the FT.SEARCH results.
limit: Maximum number of results to return.
min_score: Minimum similarity score threshold (0.0 to 1.0).
@@ -1500,13 +1570,17 @@ class ValkeyStorage:
"""Search for memories by vector similarity (sync wrapper).
Uses Valkey Search module for server-side vector similarity computation.
Applies filters for scope, categories, and metadata in the same query.
Scope and category predicates are pushed down into FT.SEARCH; the
``metadata_filter`` predicate is applied as a Python post-filter
because arbitrary metadata keys are not indexed in the
``memory_index`` FT schema.
Args:
query_embedding: Embedding vector for the query.
scope_prefix: Optional scope path prefix to filter results.
categories: Optional list of categories (OR logic).
metadata_filter: Optional metadata key-value pairs (AND logic).
metadata_filter: Optional metadata key-value pairs (AND logic),
applied as a Python post-filter over the FT.SEARCH results.
limit: Maximum number of results to return.
min_score: Minimum similarity score threshold (0.0 to 1.0).

View File

@@ -229,7 +229,12 @@ class TestValkeyStorageVectorSearch:
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Test search with metadata filter only."""
"""Metadata filter is applied as a post-filter (issue #5794).
``metadata`` is not part of the ``memory_index`` FT schema, so the
FT.SEARCH query string must not reference metadata keys. The filter is
applied in Python after FT.SEARCH returns.
"""
record1 = MemoryRecord(
id="record-1",
content="Record with metadata",
@@ -248,14 +253,17 @@ class TestValkeyStorageVectorSearch:
limit=10
)
# Verify query contains metadata filters (AND logic)
# Metadata predicates must NOT be injected into the FT.SEARCH query
# (their schema fields do not exist on memory_index).
call_args = mock_ft_search.call_args
query = call_args[0][2]
assert "@agent_id:{agent\\-1}" in query or "@agent_id:{agent-1}" in query
assert "@priority:{high}" in query
assert "=>[KNN 10 @embedding $BLOB AS score]" in query
assert "@agent_id" not in query
assert "@priority" not in query
# KNN clause is still present, with the overfetch limit applied so the
# post-filter has enough candidates to return ``limit`` hits.
assert "[KNN 100 @embedding $BLOB AS score]" in query
# Verify results
# Post-filter retains the record whose metadata matches every filter key.
assert len(results) == 1
assert results[0][0].id == "record-1"
assert results[0][0].metadata["agent_id"] == "agent-1"
@@ -268,7 +276,7 @@ class TestValkeyStorageVectorSearch:
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Test search with combined filters (scope + categories + metadata)."""
"""Combined filters: scope + categories pushed down, metadata post-filtered."""
record1 = MemoryRecord(
id="record-1",
content="Record matching all filters",
@@ -290,13 +298,15 @@ class TestValkeyStorageVectorSearch:
limit=10
)
# Verify query contains all filters
# Scope and categories are valid index fields and stay in the query.
call_args = mock_ft_search.call_args
query = call_args[0][2]
assert "@scope:{/agent*}" in query
assert "@categories:{planning}" in query
assert "@agent_id:{agent\\-1}" in query or "@agent_id:{agent-1}" in query
assert "=>[KNN 10 @embedding $BLOB AS score]" in query
# Metadata predicates must NOT be in the FT.SEARCH query.
assert "@agent_id" not in query
# Overfetch limit is applied when metadata_filter is supplied.
assert "[KNN 100 @embedding $BLOB AS score]" in query
# Verify results
assert len(results) == 1
@@ -520,7 +530,7 @@ class TestValkeyStorageVectorSearch:
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Test search with numeric metadata values."""
"""Numeric metadata filters work via post-filter (string-coerced compare)."""
record1 = MemoryRecord(
id="record-1",
content="Record with numeric metadata",
@@ -539,11 +549,18 @@ class TestValkeyStorageVectorSearch:
limit=10
)
# Verify query contains string-converted metadata values
# Metadata predicates must NOT be in the FT.SEARCH query; the
# post-filter is responsible for matching numeric values.
call_args = mock_ft_search.call_args
query = call_args[0][2]
assert "@count:{42}" in query
assert "@score:{3" in query and "14}" in query
assert "@count" not in query
assert "@score" not in query
# Post-filter accepts numeric values (both int and float) and returns
# the matching record.
assert len(results) == 1
assert results[0][0].metadata["count"] == 42
assert results[0][0].metadata["score"] == 3.14
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@@ -883,7 +900,7 @@ class TestValkeyStorageVectorSearch:
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Test search with multiple metadata filters uses AND logic."""
"""Multiple metadata filters use AND logic in the post-filter."""
record1 = MemoryRecord(
id="record-1",
content="Record matching all metadata",
@@ -902,14 +919,15 @@ class TestValkeyStorageVectorSearch:
limit=10
)
# Verify query contains AND logic for metadata
# Metadata predicates must NOT be in the FT.SEARCH query — they are
# not part of the index schema.
call_args = mock_ft_search.call_args
query = call_args[0][2]
assert "@agent_id:" in query
assert "@priority:" in query
assert "@status:" in query
assert "@agent_id" not in query
assert "@priority" not in query
assert "@status" not in query
# Verify record matching all metadata is returned
# AND logic: every filter key must match for the record to be kept.
assert len(results) == 1
assert results[0][0].id == "record-1"
@@ -995,4 +1013,414 @@ class TestValkeyStorageVectorSearch:
# Verify all results are returned
assert len(results) == 2
assert results[0][0].id == "record-1"
assert results[1][0].id == "record-2"
assert results[1][0].id == "record-2"
class TestValkeyStorageMetadataPostFilter:
"""Regression tests for issue #5794.
The ``memory_index`` FT schema only contains fixed fields
(``embedding``, ``scope``, ``categories``, ``created_at``, ``importance``).
Arbitrary metadata keys are NOT indexed, so ``metadata_filter`` must be
applied as a Python post-filter over FT.SEARCH results — never injected
into the FT.SEARCH query string.
"""
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_metadata_filter_not_pushed_into_ft_query(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""No ``@<key>`` clause for any metadata key is emitted to FT.SEARCH."""
record = MemoryRecord(
id="record-1",
content="A record",
scope="/test",
metadata={"agent_id": "agent-1", "priority": "high"},
embedding=[0.1, 0.2, 0.3, 0.4],
)
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response([(record, 0.9)])
await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
metadata_filter={"agent_id": "agent-1", "priority": "high"},
limit=5,
)
query = mock_ft_search.call_args[0][2]
# The bug was: metadata keys were emitted as @<key>:{value} clauses
# that reference non-existent FT schema fields.
assert "@agent_id" not in query
assert "@priority" not in query
# The KNN clause uses the overfetched limit when metadata_filter is set.
assert "[KNN 50 @embedding $BLOB AS score]" in query
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_metadata_filter_excludes_records_with_missing_key(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Records whose metadata is missing a required key are dropped."""
matching = MemoryRecord(
id="match-1",
content="Has agent_id",
scope="/test",
metadata={"agent_id": "agent-1"},
embedding=[0.1, 0.2, 0.3, 0.4],
)
missing_key = MemoryRecord(
id="missing-1",
content="Missing agent_id entirely",
scope="/test",
metadata={"unrelated": "value"},
embedding=[0.2, 0.3, 0.4, 0.5],
)
empty_metadata = MemoryRecord(
id="empty-1",
content="No metadata",
scope="/test",
metadata={},
embedding=[0.3, 0.4, 0.5, 0.6],
)
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response([
(matching, 0.95),
(missing_key, 0.85),
(empty_metadata, 0.75),
])
results = await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
metadata_filter={"agent_id": "agent-1"},
limit=10,
)
# Only the record whose metadata has agent_id=agent-1 survives.
assert [rec.id for rec, _ in results] == ["match-1"]
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_metadata_filter_excludes_records_with_mismatched_value(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Records whose metadata value differs from the filter are dropped."""
agent_one = MemoryRecord(
id="agent-1-rec",
content="agent-1",
scope="/test",
metadata={"agent_id": "agent-1"},
embedding=[0.1, 0.2, 0.3, 0.4],
)
agent_two = MemoryRecord(
id="agent-2-rec",
content="agent-2",
scope="/test",
metadata={"agent_id": "agent-2"},
embedding=[0.2, 0.3, 0.4, 0.5],
)
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response([
(agent_one, 0.9),
(agent_two, 0.8),
])
results = await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
metadata_filter={"agent_id": "agent-1"},
limit=10,
)
assert [rec.id for rec, _ in results] == ["agent-1-rec"]
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_metadata_filter_and_logic_requires_every_key(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Multi-key filters require every key/value pair to match (AND)."""
full_match = MemoryRecord(
id="full",
content="Matches both",
scope="/test",
metadata={"agent_id": "agent-1", "priority": "high"},
embedding=[0.1, 0.2, 0.3, 0.4],
)
partial = MemoryRecord(
id="partial",
content="Only agent_id matches",
scope="/test",
metadata={"agent_id": "agent-1", "priority": "low"},
embedding=[0.2, 0.3, 0.4, 0.5],
)
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response([
(full_match, 0.95),
(partial, 0.93),
])
results = await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
metadata_filter={"agent_id": "agent-1", "priority": "high"},
limit=10,
)
assert [rec.id for rec, _ in results] == ["full"]
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_metadata_filter_matches_numeric_values_via_string_compare(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Numeric metadata values match when stringified equal to the filter."""
record = MemoryRecord(
id="numeric",
content="Numeric metadata",
scope="/test",
metadata={"count": 42, "ratio": 3.14, "active": True},
embedding=[0.1, 0.2, 0.3, 0.4],
)
other = MemoryRecord(
id="numeric-other",
content="Different count",
scope="/test",
metadata={"count": 41, "ratio": 3.14, "active": True},
embedding=[0.2, 0.3, 0.4, 0.5],
)
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response([
(record, 0.9),
(other, 0.85),
])
results = await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
metadata_filter={"count": 42, "ratio": 3.14, "active": True},
limit=10,
)
assert [rec.id for rec, _ in results] == ["numeric"]
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_empty_metadata_filter_dict_skips_post_filter(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""An empty ``{}`` filter behaves like ``None`` (no overfetch, no filtering)."""
record1 = MemoryRecord(
id="r1",
content="r1",
scope="/test",
metadata={"k": "v"},
embedding=[0.1, 0.2, 0.3, 0.4],
)
record2 = MemoryRecord(
id="r2",
content="r2",
scope="/test",
metadata={},
embedding=[0.2, 0.3, 0.4, 0.5],
)
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response([
(record1, 0.9),
(record2, 0.8),
])
results = await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
metadata_filter={},
limit=10,
)
query = mock_ft_search.call_args[0][2]
# No overfetch — KNN limit stays at the caller-requested limit.
assert "[KNN 10 @embedding $BLOB AS score]" in query
# All records returned (no filtering performed).
assert [rec.id for rec, _ in results] == ["r1", "r2"]
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_metadata_filter_overfetches_to_preserve_caller_limit(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""KNN is overfetched so post-filter can still return ``limit`` hits.
Without overfetch, KNN would return at most ``limit`` candidates and
the post-filter could drop every one, leaving the caller with fewer
than the requested ``limit`` results. The overfetch ensures the
post-filter has enough candidates to satisfy the caller in the common
case where ~10% of candidates match the metadata predicate.
"""
matching = [
MemoryRecord(
id=f"match-{i}",
content=f"match {i}",
scope="/test",
metadata={"tier": "gold"},
embedding=[0.1 * (i + 1), 0.2, 0.3, 0.4],
)
for i in range(5)
]
non_matching = [
MemoryRecord(
id=f"skip-{i}",
content=f"skip {i}",
scope="/test",
metadata={"tier": "silver"},
embedding=[0.1 * (i + 1), 0.3, 0.4, 0.5],
)
for i in range(15)
]
# FT.SEARCH returns 20 candidates (mix of gold/silver) sorted by score.
# If the KNN had been capped at limit=5, the silver records mixed in
# would have starved out the gold matches and ``results`` would have
# been shorter than 5.
combined = []
score = 0.99
for rec in matching + non_matching:
combined.append((rec, score))
score -= 0.01
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response(combined)
results = await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
metadata_filter={"tier": "gold"},
limit=5,
)
query = mock_ft_search.call_args[0][2]
# KNN limit is overfetched (limit * OVERFETCH = 50) to give the
# post-filter enough candidates.
assert "[KNN 50 @embedding $BLOB AS score]" in query
# All five matching records are returned, in score-descending order.
assert [rec.id for rec, _ in results] == [
"match-0", "match-1", "match-2", "match-3", "match-4"
]
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_metadata_filter_truncates_to_caller_limit(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""When post-filter retains more than ``limit`` records, truncate."""
records = [
MemoryRecord(
id=f"rec-{i}",
content=f"rec {i}",
scope="/test",
metadata={"tier": "gold"},
embedding=[0.1 * (i + 1), 0.2, 0.3, 0.4],
)
for i in range(8)
]
scored = [(rec, 0.9 - 0.05 * i) for i, rec in enumerate(records)]
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response(scored)
results = await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
metadata_filter={"tier": "gold"},
limit=3,
)
# Caller asked for 3, so only the top 3 (by score) are returned even
# though 8 candidates matched the metadata filter.
assert len(results) == 3
assert [rec.id for rec, _ in results] == ["rec-0", "rec-1", "rec-2"]
@pytest.mark.asyncio
@patch("crewai.memory.storage.valkey_storage.ft.search")
@patch("crewai.memory.storage.valkey_storage.ft.list")
async def test_metadata_filter_does_not_affect_scope_or_categories_pushdown(
self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
) -> None:
"""Scope/categories predicates are still pushed into FT.SEARCH.
They are valid index fields and benefit from server-side filtering.
Only ``metadata_filter`` is moved to the Python side.
"""
record = MemoryRecord(
id="rec",
content="rec",
scope="/agent/task",
categories=["planning"],
metadata={"agent_id": "agent-1"},
embedding=[0.1, 0.2, 0.3, 0.4],
)
mock_ft_list.return_value = [b"memory_index"]
mock_ft_search.return_value = create_mock_ft_search_response([(record, 0.9)])
await valkey_storage.asearch(
[0.1, 0.2, 0.3, 0.4],
scope_prefix="/agent",
categories=["planning"],
metadata_filter={"agent_id": "agent-1"},
limit=5,
)
query = mock_ft_search.call_args[0][2]
assert "@scope:{/agent*}" in query
assert "@categories:{planning}" in query
# Metadata predicates remain on the Python side.
assert "@agent_id" not in query
class TestValkeyStorageMatchesMetadataFilter:
"""Unit tests for the ``_matches_metadata_filter`` static helper."""
def test_returns_true_when_all_keys_match(self) -> None:
assert ValkeyStorage._matches_metadata_filter(
{"a": "1", "b": "2", "c": "3"},
{"a": "1", "b": "2"},
)
def test_returns_false_when_key_missing(self) -> None:
assert not ValkeyStorage._matches_metadata_filter(
{"a": "1"},
{"a": "1", "b": "2"},
)
def test_returns_false_when_value_mismatches(self) -> None:
assert not ValkeyStorage._matches_metadata_filter(
{"a": "1", "b": "wrong"},
{"a": "1", "b": "2"},
)
def test_coerces_to_string_for_comparison(self) -> None:
# Filter pre-stringified by caller still matches typed record value.
assert ValkeyStorage._matches_metadata_filter(
{"count": 42, "ratio": 3.14, "flag": True},
{"count": "42", "ratio": "3.14", "flag": "True"},
)
# And vice versa: typed filter matches typed record value.
assert ValkeyStorage._matches_metadata_filter(
{"count": 42},
{"count": 42},
)
def test_empty_filter_always_matches(self) -> None:
assert ValkeyStorage._matches_metadata_filter({}, {})
assert ValkeyStorage._matches_metadata_filter({"a": "1"}, {})