fix(valkey): post-filter metadata outside FT.SEARCH (#5794)

The memory_index FT schema only materializes embedding, scope, categories, created_at, and importance. ValkeyStorage._vector_search was emitting metadata_filter clauses as @{key}:{value} against that index, but metadata keys are user-defined and not part of the schema, so the FT.SEARCH server would either error out or silently return results that the metadata predicate failed to narrow. Move metadata_filter to a Python post-filter that runs after FT.SEARCH parses results. Overfetch from KNN (limit * 10, capped at 1000) when metadata_filter is supplied so the post-filter still returns the caller-requested number of hits in the common case, then truncate to limit. Scope/categories predicates remain pushed down into FT.SEARCH because they are valid index fields. Adds TestValkeyStorageMetadataPostFilter and TestValkeyStorageMatchesMetadataFilter test classes; updates three pre-existing tests that asserted the broken contract. Closes #5794 Co-Authored-By: João <joao@crewai.com>
2026-07-01 13:18:10 +00:00 · 2026-05-13 17:39:45 +00:00
parent c5a9a8da50
commit 808654c2f2
2 changed files with 542 additions and 40 deletions
--- a/lib/crewai/src/crewai/memory/storage/valkey_storage.py
+++ b/lib/crewai/src/crewai/memory/storage/valkey_storage.py
@@ -1222,6 +1222,39 @@ class ValkeyStorage:

        return records_data

+    # Overfetch factor applied when ``metadata_filter`` is supplied so the
+    # Python post-filter has enough candidates to still return ``limit`` hits
+    # after dropping records whose metadata does not match. Capped at
+    # :pyattr:`_METADATA_POSTFILTER_MAX_FETCH` to avoid pathological queries.
+    _METADATA_POSTFILTER_OVERFETCH = 10
+    _METADATA_POSTFILTER_MAX_FETCH = 1000
+
+    @staticmethod
+    def _matches_metadata_filter(
+        record_metadata: dict[str, Any],
+        metadata_filter: dict[str, Any],
+    ) -> bool:
+        """Return ``True`` iff every key in ``metadata_filter`` matches the record.
+
+        Comparisons are performed on string representations so that callers can
+        pass numeric, boolean, or string filter values interchangeably (mirroring
+        how the values were originally stringified for the legacy FT query).
+
+        Args:
+            record_metadata: The record's metadata dict (may be empty).
+            metadata_filter: Required key/value pairs (AND logic).
+
+        Returns:
+            ``True`` when every filter key is present in ``record_metadata`` and
+            its string representation equals the filter's string representation.
+        """
+        for key, expected in metadata_filter.items():
+            if key not in record_metadata:
+                return False
+            if str(record_metadata[key]) != str(expected):
+                return False
+        return True
+
    async def _vector_search(
        self,
        query_embedding: list[float],
@@ -1234,13 +1267,24 @@ class ValkeyStorage:
        """Perform server-side vector search using Valkey Search.

        Uses FT.SEARCH command with KNN query for vector similarity.
-        Applies filters for scope, categories, and metadata in the same query.
+        Applies scope and category filters in the FT.SEARCH query directly,
+        and applies ``metadata_filter`` as a Python post-filter over the
+        results. Metadata is stored as an opaque JSON blob in the hash and
+        is not part of the ``memory_index`` FT schema, so it cannot be
+        narrowed inside FT.SEARCH; doing so would either error out or
+        silently return wrong results.
+
+        When ``metadata_filter`` is supplied, the underlying KNN limit is
+        scaled by :pyattr:`_METADATA_POSTFILTER_OVERFETCH` (capped at
+        :pyattr:`_METADATA_POSTFILTER_MAX_FETCH`) so the post-filter still
+        has enough candidates to return ``limit`` hits in the common case.

        Args:
            query_embedding: Embedding vector for the query.
            scope_prefix: Optional scope path prefix to filter results.
            categories: Optional list of categories (OR logic).
-            metadata_filter: Optional metadata key-value pairs (AND logic).
+            metadata_filter: Optional metadata key-value pairs (AND logic),
+                applied as a Python post-filter.
            limit: Maximum number of results to return.
            min_score: Minimum similarity score threshold (0.0 to 1.0).

@@ -1255,7 +1299,14 @@ class ValkeyStorage:
        # Ensure vector index exists
        await self._ensure_vector_index()

-        # Build query components
+        # Treat an empty metadata_filter dict the same as None so callers can
+        # pass ``{}`` without triggering overfetch or post-filter work.
+        metadata_filter = metadata_filter or None
+
+        # Build query components for the FT.SEARCH query. Note that
+        # ``metadata_filter`` is intentionally NOT included here because
+        # arbitrary metadata keys are not present in the ``memory_index``
+        # FT schema (only scope, categories, created_at, importance).
        query_parts: list[str] = []

        # Scope prefix filter
@@ -1277,25 +1328,26 @@ class ValkeyStorage:
            cat_query = "|".join(escaped_categories)
            query_parts.append(f"@categories:{{{cat_query}}}")

-        # Metadata filters (AND logic)
-        # Format: @{key}:{value}
-        if metadata_filter:
-            for key, value in metadata_filter.items():
-                # Escape key and value
-                escaped_key = self._escape_search_query(key)
-                escaped_value = self._escape_search_query(str(value))
-                query_parts.append(f"@{escaped_key}:{{{escaped_value}}}")
-
        # Combine filters
        filter_query = " ".join(query_parts) if query_parts else "*"

+        # When metadata_filter is supplied, overfetch candidates from KNN so
+        # the Python post-filter can still satisfy ``limit`` results.
+        if metadata_filter:
+            fetch_limit = min(
+                max(limit * self._METADATA_POSTFILTER_OVERFETCH, limit),
+                self._METADATA_POSTFILTER_MAX_FETCH,
+            )
+        else:
+            fetch_limit = limit
+
        # Build KNN query with filters
        # Format: (filter)=>[KNN limit @field $BLOB AS score]
        # Note: Don't wrap single "*" in parentheses
        if filter_query == "*":
-            query = f"{filter_query}=>[KNN {limit} @embedding $BLOB AS score]"
+            query = f"{filter_query}=>[KNN {fetch_limit} @embedding $BLOB AS score]"
        else:
-            query = f"({filter_query})=>[KNN {limit} @embedding $BLOB AS score]"
+            query = f"({filter_query})=>[KNN {fetch_limit} @embedding $BLOB AS score]"

        # Prepare embedding blob for PARAMS
        embedding_blob = self._embedding_to_bytes(query_embedding)
@@ -1320,7 +1372,7 @@ class ValkeyStorage:
        search_options = FtSearchOptions(
            return_fields=return_fields,
            params={"BLOB": embedding_blob},
-            limit=FtSearchLimit(0, limit),
+            limit=FtSearchLimit(0, fetch_limit),
        )

        try:
@@ -1365,6 +1417,20 @@ class ValkeyStorage:
                    if rec.scope == normalized or rec.scope.startswith(f"{normalized}/")
                ]

+            # Apply metadata_filter as a Python post-filter. The FT index does
+            # not materialize arbitrary metadata keys, so this is the only
+            # correct place to enforce metadata predicates.
+            if metadata_filter:
+                records = [
+                    (rec, score)
+                    for rec, score in records
+                    if self._matches_metadata_filter(rec.metadata, metadata_filter)
+                ]
+
+            # Truncate to the caller-requested limit after post-filtering.
+            if len(records) > limit:
+                records = records[:limit]
+
            return records

        except Exception as e:
@@ -1463,13 +1529,17 @@ class ValkeyStorage:
        """Search for memories by vector similarity (async).

        Uses Valkey Search module for server-side vector similarity computation.
-        Applies filters for scope, categories, and metadata in the same query.
+        Scope and category predicates are pushed down into FT.SEARCH; the
+        ``metadata_filter`` predicate is applied as a Python post-filter
+        because arbitrary metadata keys are not indexed in the
+        ``memory_index`` FT schema.

        Args:
            query_embedding: Embedding vector for the query.
            scope_prefix: Optional scope path prefix to filter results.
            categories: Optional list of categories (OR logic).
-            metadata_filter: Optional metadata key-value pairs (AND logic).
+            metadata_filter: Optional metadata key-value pairs (AND logic),
+                applied as a Python post-filter over the FT.SEARCH results.
            limit: Maximum number of results to return.
            min_score: Minimum similarity score threshold (0.0 to 1.0).

@@ -1500,13 +1570,17 @@ class ValkeyStorage:
        """Search for memories by vector similarity (sync wrapper).

        Uses Valkey Search module for server-side vector similarity computation.
-        Applies filters for scope, categories, and metadata in the same query.
+        Scope and category predicates are pushed down into FT.SEARCH; the
+        ``metadata_filter`` predicate is applied as a Python post-filter
+        because arbitrary metadata keys are not indexed in the
+        ``memory_index`` FT schema.

        Args:
            query_embedding: Embedding vector for the query.
            scope_prefix: Optional scope path prefix to filter results.
            categories: Optional list of categories (OR logic).
-            metadata_filter: Optional metadata key-value pairs (AND logic).
+            metadata_filter: Optional metadata key-value pairs (AND logic),
+                applied as a Python post-filter over the FT.SEARCH results.
            limit: Maximum number of results to return.
            min_score: Minimum similarity score threshold (0.0 to 1.0).

--- a/lib/crewai/tests/memory/storage/test_valkey_storage_search.py
+++ b/lib/crewai/tests/memory/storage/test_valkey_storage_search.py
@@ -229,7 +229,12 @@ class TestValkeyStorageVectorSearch:
        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
    ) -> None:
-        """Test search with metadata filter only."""
+        """Metadata filter is applied as a post-filter (issue #5794).
+
+        ``metadata`` is not part of the ``memory_index`` FT schema, so the
+        FT.SEARCH query string must not reference metadata keys. The filter is
+        applied in Python after FT.SEARCH returns.
+        """
        record1 = MemoryRecord(
            id="record-1",
            content="Record with metadata",
@@ -248,14 +253,17 @@ class TestValkeyStorageVectorSearch:
            limit=10
        )

-        # Verify query contains metadata filters (AND logic)
+        # Metadata predicates must NOT be injected into the FT.SEARCH query
+        # (their schema fields do not exist on memory_index).
        call_args = mock_ft_search.call_args
        query = call_args[0][2]
-        assert "@agent_id:{agent\\-1}" in query or "@agent_id:{agent-1}" in query
-        assert "@priority:{high}" in query
-        assert "=>[KNN 10 @embedding $BLOB AS score]" in query
+        assert "@agent_id" not in query
+        assert "@priority" not in query
+        # KNN clause is still present, with the overfetch limit applied so the
+        # post-filter has enough candidates to return ``limit`` hits.
+        assert "[KNN 100 @embedding $BLOB AS score]" in query

-        # Verify results
+        # Post-filter retains the record whose metadata matches every filter key.
        assert len(results) == 1
        assert results[0][0].id == "record-1"
        assert results[0][0].metadata["agent_id"] == "agent-1"
@@ -268,7 +276,7 @@ class TestValkeyStorageVectorSearch:
        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
    ) -> None:
-        """Test search with combined filters (scope + categories + metadata)."""
+        """Combined filters: scope + categories pushed down, metadata post-filtered."""
        record1 = MemoryRecord(
            id="record-1",
            content="Record matching all filters",
@@ -290,13 +298,15 @@ class TestValkeyStorageVectorSearch:
            limit=10
        )

-        # Verify query contains all filters
+        # Scope and categories are valid index fields and stay in the query.
        call_args = mock_ft_search.call_args
        query = call_args[0][2]
        assert "@scope:{/agent*}" in query
        assert "@categories:{planning}" in query
-        assert "@agent_id:{agent\\-1}" in query or "@agent_id:{agent-1}" in query
-        assert "=>[KNN 10 @embedding $BLOB AS score]" in query
+        # Metadata predicates must NOT be in the FT.SEARCH query.
+        assert "@agent_id" not in query
+        # Overfetch limit is applied when metadata_filter is supplied.
+        assert "[KNN 100 @embedding $BLOB AS score]" in query

        # Verify results
        assert len(results) == 1
@@ -520,7 +530,7 @@ class TestValkeyStorageVectorSearch:
        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
    ) -> None:
-        """Test search with numeric metadata values."""
+        """Numeric metadata filters work via post-filter (string-coerced compare)."""
        record1 = MemoryRecord(
            id="record-1",
            content="Record with numeric metadata",
@@ -539,11 +549,18 @@ class TestValkeyStorageVectorSearch:
            limit=10
        )

-        # Verify query contains string-converted metadata values
+        # Metadata predicates must NOT be in the FT.SEARCH query; the
+        # post-filter is responsible for matching numeric values.
        call_args = mock_ft_search.call_args
        query = call_args[0][2]
-        assert "@count:{42}" in query
-        assert "@score:{3" in query and "14}" in query
+        assert "@count" not in query
+        assert "@score" not in query
+
+        # Post-filter accepts numeric values (both int and float) and returns
+        # the matching record.
+        assert len(results) == 1
+        assert results[0][0].metadata["count"] == 42
+        assert results[0][0].metadata["score"] == 3.14

    @pytest.mark.asyncio
    @patch("crewai.memory.storage.valkey_storage.ft.search")
@@ -883,7 +900,7 @@ class TestValkeyStorageVectorSearch:
        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
    ) -> None:
-        """Test search with multiple metadata filters uses AND logic."""
+        """Multiple metadata filters use AND logic in the post-filter."""
        record1 = MemoryRecord(
            id="record-1",
            content="Record matching all metadata",
@@ -902,14 +919,15 @@ class TestValkeyStorageVectorSearch:
            limit=10
        )

-        # Verify query contains AND logic for metadata
+        # Metadata predicates must NOT be in the FT.SEARCH query — they are
+        # not part of the index schema.
        call_args = mock_ft_search.call_args
        query = call_args[0][2]
-        assert "@agent_id:" in query
-        assert "@priority:" in query
-        assert "@status:" in query
+        assert "@agent_id" not in query
+        assert "@priority" not in query
+        assert "@status" not in query

-        # Verify record matching all metadata is returned
+        # AND logic: every filter key must match for the record to be kept.
        assert len(results) == 1
        assert results[0][0].id == "record-1"

@@ -995,4 +1013,414 @@ class TestValkeyStorageVectorSearch:
        # Verify all results are returned
        assert len(results) == 2
        assert results[0][0].id == "record-1"
-        assert results[1][0].id == "record-2"
+        assert results[1][0].id == "record-2"
+
+
+class TestValkeyStorageMetadataPostFilter:
+    """Regression tests for issue #5794.
+
+    The ``memory_index`` FT schema only contains fixed fields
+    (``embedding``, ``scope``, ``categories``, ``created_at``, ``importance``).
+    Arbitrary metadata keys are NOT indexed, so ``metadata_filter`` must be
+    applied as a Python post-filter over FT.SEARCH results — never injected
+    into the FT.SEARCH query string.
+    """
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_metadata_filter_not_pushed_into_ft_query(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """No ``@<key>`` clause for any metadata key is emitted to FT.SEARCH."""
+        record = MemoryRecord(
+            id="record-1",
+            content="A record",
+            scope="/test",
+            metadata={"agent_id": "agent-1", "priority": "high"},
+            embedding=[0.1, 0.2, 0.3, 0.4],
+        )
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response([(record, 0.9)])
+
+        await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            metadata_filter={"agent_id": "agent-1", "priority": "high"},
+            limit=5,
+        )
+
+        query = mock_ft_search.call_args[0][2]
+        # The bug was: metadata keys were emitted as @<key>:{value} clauses
+        # that reference non-existent FT schema fields.
+        assert "@agent_id" not in query
+        assert "@priority" not in query
+        # The KNN clause uses the overfetched limit when metadata_filter is set.
+        assert "[KNN 50 @embedding $BLOB AS score]" in query
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_metadata_filter_excludes_records_with_missing_key(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """Records whose metadata is missing a required key are dropped."""
+        matching = MemoryRecord(
+            id="match-1",
+            content="Has agent_id",
+            scope="/test",
+            metadata={"agent_id": "agent-1"},
+            embedding=[0.1, 0.2, 0.3, 0.4],
+        )
+        missing_key = MemoryRecord(
+            id="missing-1",
+            content="Missing agent_id entirely",
+            scope="/test",
+            metadata={"unrelated": "value"},
+            embedding=[0.2, 0.3, 0.4, 0.5],
+        )
+        empty_metadata = MemoryRecord(
+            id="empty-1",
+            content="No metadata",
+            scope="/test",
+            metadata={},
+            embedding=[0.3, 0.4, 0.5, 0.6],
+        )
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response([
+            (matching, 0.95),
+            (missing_key, 0.85),
+            (empty_metadata, 0.75),
+        ])
+
+        results = await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            metadata_filter={"agent_id": "agent-1"},
+            limit=10,
+        )
+
+        # Only the record whose metadata has agent_id=agent-1 survives.
+        assert [rec.id for rec, _ in results] == ["match-1"]
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_metadata_filter_excludes_records_with_mismatched_value(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """Records whose metadata value differs from the filter are dropped."""
+        agent_one = MemoryRecord(
+            id="agent-1-rec",
+            content="agent-1",
+            scope="/test",
+            metadata={"agent_id": "agent-1"},
+            embedding=[0.1, 0.2, 0.3, 0.4],
+        )
+        agent_two = MemoryRecord(
+            id="agent-2-rec",
+            content="agent-2",
+            scope="/test",
+            metadata={"agent_id": "agent-2"},
+            embedding=[0.2, 0.3, 0.4, 0.5],
+        )
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response([
+            (agent_one, 0.9),
+            (agent_two, 0.8),
+        ])
+
+        results = await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            metadata_filter={"agent_id": "agent-1"},
+            limit=10,
+        )
+
+        assert [rec.id for rec, _ in results] == ["agent-1-rec"]
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_metadata_filter_and_logic_requires_every_key(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """Multi-key filters require every key/value pair to match (AND)."""
+        full_match = MemoryRecord(
+            id="full",
+            content="Matches both",
+            scope="/test",
+            metadata={"agent_id": "agent-1", "priority": "high"},
+            embedding=[0.1, 0.2, 0.3, 0.4],
+        )
+        partial = MemoryRecord(
+            id="partial",
+            content="Only agent_id matches",
+            scope="/test",
+            metadata={"agent_id": "agent-1", "priority": "low"},
+            embedding=[0.2, 0.3, 0.4, 0.5],
+        )
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response([
+            (full_match, 0.95),
+            (partial, 0.93),
+        ])
+
+        results = await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            metadata_filter={"agent_id": "agent-1", "priority": "high"},
+            limit=10,
+        )
+
+        assert [rec.id for rec, _ in results] == ["full"]
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_metadata_filter_matches_numeric_values_via_string_compare(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """Numeric metadata values match when stringified equal to the filter."""
+        record = MemoryRecord(
+            id="numeric",
+            content="Numeric metadata",
+            scope="/test",
+            metadata={"count": 42, "ratio": 3.14, "active": True},
+            embedding=[0.1, 0.2, 0.3, 0.4],
+        )
+        other = MemoryRecord(
+            id="numeric-other",
+            content="Different count",
+            scope="/test",
+            metadata={"count": 41, "ratio": 3.14, "active": True},
+            embedding=[0.2, 0.3, 0.4, 0.5],
+        )
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response([
+            (record, 0.9),
+            (other, 0.85),
+        ])
+
+        results = await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            metadata_filter={"count": 42, "ratio": 3.14, "active": True},
+            limit=10,
+        )
+
+        assert [rec.id for rec, _ in results] == ["numeric"]
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_empty_metadata_filter_dict_skips_post_filter(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """An empty ``{}`` filter behaves like ``None`` (no overfetch, no filtering)."""
+        record1 = MemoryRecord(
+            id="r1",
+            content="r1",
+            scope="/test",
+            metadata={"k": "v"},
+            embedding=[0.1, 0.2, 0.3, 0.4],
+        )
+        record2 = MemoryRecord(
+            id="r2",
+            content="r2",
+            scope="/test",
+            metadata={},
+            embedding=[0.2, 0.3, 0.4, 0.5],
+        )
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response([
+            (record1, 0.9),
+            (record2, 0.8),
+        ])
+
+        results = await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            metadata_filter={},
+            limit=10,
+        )
+
+        query = mock_ft_search.call_args[0][2]
+        # No overfetch — KNN limit stays at the caller-requested limit.
+        assert "[KNN 10 @embedding $BLOB AS score]" in query
+        # All records returned (no filtering performed).
+        assert [rec.id for rec, _ in results] == ["r1", "r2"]
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_metadata_filter_overfetches_to_preserve_caller_limit(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """KNN is overfetched so post-filter can still return ``limit`` hits.
+
+        Without overfetch, KNN would return at most ``limit`` candidates and
+        the post-filter could drop every one, leaving the caller with fewer
+        than the requested ``limit`` results. The overfetch ensures the
+        post-filter has enough candidates to satisfy the caller in the common
+        case where ~10% of candidates match the metadata predicate.
+        """
+        matching = [
+            MemoryRecord(
+                id=f"match-{i}",
+                content=f"match {i}",
+                scope="/test",
+                metadata={"tier": "gold"},
+                embedding=[0.1 * (i + 1), 0.2, 0.3, 0.4],
+            )
+            for i in range(5)
+        ]
+        non_matching = [
+            MemoryRecord(
+                id=f"skip-{i}",
+                content=f"skip {i}",
+                scope="/test",
+                metadata={"tier": "silver"},
+                embedding=[0.1 * (i + 1), 0.3, 0.4, 0.5],
+            )
+            for i in range(15)
+        ]
+
+        # FT.SEARCH returns 20 candidates (mix of gold/silver) sorted by score.
+        # If the KNN had been capped at limit=5, the silver records mixed in
+        # would have starved out the gold matches and ``results`` would have
+        # been shorter than 5.
+        combined = []
+        score = 0.99
+        for rec in matching + non_matching:
+            combined.append((rec, score))
+            score -= 0.01
+
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response(combined)
+
+        results = await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            metadata_filter={"tier": "gold"},
+            limit=5,
+        )
+
+        query = mock_ft_search.call_args[0][2]
+        # KNN limit is overfetched (limit * OVERFETCH = 50) to give the
+        # post-filter enough candidates.
+        assert "[KNN 50 @embedding $BLOB AS score]" in query
+
+        # All five matching records are returned, in score-descending order.
+        assert [rec.id for rec, _ in results] == [
+            "match-0", "match-1", "match-2", "match-3", "match-4"
+        ]
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_metadata_filter_truncates_to_caller_limit(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """When post-filter retains more than ``limit`` records, truncate."""
+        records = [
+            MemoryRecord(
+                id=f"rec-{i}",
+                content=f"rec {i}",
+                scope="/test",
+                metadata={"tier": "gold"},
+                embedding=[0.1 * (i + 1), 0.2, 0.3, 0.4],
+            )
+            for i in range(8)
+        ]
+        scored = [(rec, 0.9 - 0.05 * i) for i, rec in enumerate(records)]
+
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response(scored)
+
+        results = await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            metadata_filter={"tier": "gold"},
+            limit=3,
+        )
+
+        # Caller asked for 3, so only the top 3 (by score) are returned even
+        # though 8 candidates matched the metadata filter.
+        assert len(results) == 3
+        assert [rec.id for rec, _ in results] == ["rec-0", "rec-1", "rec-2"]
+
+    @pytest.mark.asyncio
+    @patch("crewai.memory.storage.valkey_storage.ft.search")
+    @patch("crewai.memory.storage.valkey_storage.ft.list")
+    async def test_metadata_filter_does_not_affect_scope_or_categories_pushdown(
+        self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock,
+        valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock
+    ) -> None:
+        """Scope/categories predicates are still pushed into FT.SEARCH.
+
+        They are valid index fields and benefit from server-side filtering.
+        Only ``metadata_filter`` is moved to the Python side.
+        """
+        record = MemoryRecord(
+            id="rec",
+            content="rec",
+            scope="/agent/task",
+            categories=["planning"],
+            metadata={"agent_id": "agent-1"},
+            embedding=[0.1, 0.2, 0.3, 0.4],
+        )
+        mock_ft_list.return_value = [b"memory_index"]
+        mock_ft_search.return_value = create_mock_ft_search_response([(record, 0.9)])
+
+        await valkey_storage.asearch(
+            [0.1, 0.2, 0.3, 0.4],
+            scope_prefix="/agent",
+            categories=["planning"],
+            metadata_filter={"agent_id": "agent-1"},
+            limit=5,
+        )
+
+        query = mock_ft_search.call_args[0][2]
+        assert "@scope:{/agent*}" in query
+        assert "@categories:{planning}" in query
+        # Metadata predicates remain on the Python side.
+        assert "@agent_id" not in query
+
+
+class TestValkeyStorageMatchesMetadataFilter:
+    """Unit tests for the ``_matches_metadata_filter`` static helper."""
+
+    def test_returns_true_when_all_keys_match(self) -> None:
+        assert ValkeyStorage._matches_metadata_filter(
+            {"a": "1", "b": "2", "c": "3"},
+            {"a": "1", "b": "2"},
+        )
+
+    def test_returns_false_when_key_missing(self) -> None:
+        assert not ValkeyStorage._matches_metadata_filter(
+            {"a": "1"},
+            {"a": "1", "b": "2"},
+        )
+
+    def test_returns_false_when_value_mismatches(self) -> None:
+        assert not ValkeyStorage._matches_metadata_filter(
+            {"a": "1", "b": "wrong"},
+            {"a": "1", "b": "2"},
+        )
+
+    def test_coerces_to_string_for_comparison(self) -> None:
+        # Filter pre-stringified by caller still matches typed record value.
+        assert ValkeyStorage._matches_metadata_filter(
+            {"count": 42, "ratio": 3.14, "flag": True},
+            {"count": "42", "ratio": "3.14", "flag": "True"},
+        )
+        # And vice versa: typed filter matches typed record value.
+        assert ValkeyStorage._matches_metadata_filter(
+            {"count": 42},
+            {"count": 42},
+        )
+
+    def test_empty_filter_always_matches(self) -> None:
+        assert ValkeyStorage._matches_metadata_filter({}, {})
+        assert ValkeyStorage._matches_metadata_filter({"a": "1"}, {})