diff --git a/lib/crewai/src/crewai/memory/storage/valkey_storage.py b/lib/crewai/src/crewai/memory/storage/valkey_storage.py index ceae25c3b..c33c2bd2b 100644 --- a/lib/crewai/src/crewai/memory/storage/valkey_storage.py +++ b/lib/crewai/src/crewai/memory/storage/valkey_storage.py @@ -1222,6 +1222,39 @@ class ValkeyStorage: return records_data + # Overfetch factor applied when ``metadata_filter`` is supplied so the + # Python post-filter has enough candidates to still return ``limit`` hits + # after dropping records whose metadata does not match. Capped at + # :pyattr:`_METADATA_POSTFILTER_MAX_FETCH` to avoid pathological queries. + _METADATA_POSTFILTER_OVERFETCH = 10 + _METADATA_POSTFILTER_MAX_FETCH = 1000 + + @staticmethod + def _matches_metadata_filter( + record_metadata: dict[str, Any], + metadata_filter: dict[str, Any], + ) -> bool: + """Return ``True`` iff every key in ``metadata_filter`` matches the record. + + Comparisons are performed on string representations so that callers can + pass numeric, boolean, or string filter values interchangeably (mirroring + how the values were originally stringified for the legacy FT query). + + Args: + record_metadata: The record's metadata dict (may be empty). + metadata_filter: Required key/value pairs (AND logic). + + Returns: + ``True`` when every filter key is present in ``record_metadata`` and + its string representation equals the filter's string representation. + """ + for key, expected in metadata_filter.items(): + if key not in record_metadata: + return False + if str(record_metadata[key]) != str(expected): + return False + return True + async def _vector_search( self, query_embedding: list[float], @@ -1234,13 +1267,24 @@ class ValkeyStorage: """Perform server-side vector search using Valkey Search. Uses FT.SEARCH command with KNN query for vector similarity. - Applies filters for scope, categories, and metadata in the same query. + Applies scope and category filters in the FT.SEARCH query directly, + and applies ``metadata_filter`` as a Python post-filter over the + results. Metadata is stored as an opaque JSON blob in the hash and + is not part of the ``memory_index`` FT schema, so it cannot be + narrowed inside FT.SEARCH; doing so would either error out or + silently return wrong results. + + When ``metadata_filter`` is supplied, the underlying KNN limit is + scaled by :pyattr:`_METADATA_POSTFILTER_OVERFETCH` (capped at + :pyattr:`_METADATA_POSTFILTER_MAX_FETCH`) so the post-filter still + has enough candidates to return ``limit`` hits in the common case. Args: query_embedding: Embedding vector for the query. scope_prefix: Optional scope path prefix to filter results. categories: Optional list of categories (OR logic). - metadata_filter: Optional metadata key-value pairs (AND logic). + metadata_filter: Optional metadata key-value pairs (AND logic), + applied as a Python post-filter. limit: Maximum number of results to return. min_score: Minimum similarity score threshold (0.0 to 1.0). @@ -1255,7 +1299,14 @@ class ValkeyStorage: # Ensure vector index exists await self._ensure_vector_index() - # Build query components + # Treat an empty metadata_filter dict the same as None so callers can + # pass ``{}`` without triggering overfetch or post-filter work. + metadata_filter = metadata_filter or None + + # Build query components for the FT.SEARCH query. Note that + # ``metadata_filter`` is intentionally NOT included here because + # arbitrary metadata keys are not present in the ``memory_index`` + # FT schema (only scope, categories, created_at, importance). query_parts: list[str] = [] # Scope prefix filter @@ -1277,25 +1328,26 @@ class ValkeyStorage: cat_query = "|".join(escaped_categories) query_parts.append(f"@categories:{{{cat_query}}}") - # Metadata filters (AND logic) - # Format: @{key}:{value} - if metadata_filter: - for key, value in metadata_filter.items(): - # Escape key and value - escaped_key = self._escape_search_query(key) - escaped_value = self._escape_search_query(str(value)) - query_parts.append(f"@{escaped_key}:{{{escaped_value}}}") - # Combine filters filter_query = " ".join(query_parts) if query_parts else "*" + # When metadata_filter is supplied, overfetch candidates from KNN so + # the Python post-filter can still satisfy ``limit`` results. + if metadata_filter: + fetch_limit = min( + max(limit * self._METADATA_POSTFILTER_OVERFETCH, limit), + self._METADATA_POSTFILTER_MAX_FETCH, + ) + else: + fetch_limit = limit + # Build KNN query with filters # Format: (filter)=>[KNN limit @field $BLOB AS score] # Note: Don't wrap single "*" in parentheses if filter_query == "*": - query = f"{filter_query}=>[KNN {limit} @embedding $BLOB AS score]" + query = f"{filter_query}=>[KNN {fetch_limit} @embedding $BLOB AS score]" else: - query = f"({filter_query})=>[KNN {limit} @embedding $BLOB AS score]" + query = f"({filter_query})=>[KNN {fetch_limit} @embedding $BLOB AS score]" # Prepare embedding blob for PARAMS embedding_blob = self._embedding_to_bytes(query_embedding) @@ -1320,7 +1372,7 @@ class ValkeyStorage: search_options = FtSearchOptions( return_fields=return_fields, params={"BLOB": embedding_blob}, - limit=FtSearchLimit(0, limit), + limit=FtSearchLimit(0, fetch_limit), ) try: @@ -1365,6 +1417,20 @@ class ValkeyStorage: if rec.scope == normalized or rec.scope.startswith(f"{normalized}/") ] + # Apply metadata_filter as a Python post-filter. The FT index does + # not materialize arbitrary metadata keys, so this is the only + # correct place to enforce metadata predicates. + if metadata_filter: + records = [ + (rec, score) + for rec, score in records + if self._matches_metadata_filter(rec.metadata, metadata_filter) + ] + + # Truncate to the caller-requested limit after post-filtering. + if len(records) > limit: + records = records[:limit] + return records except Exception as e: @@ -1463,13 +1529,17 @@ class ValkeyStorage: """Search for memories by vector similarity (async). Uses Valkey Search module for server-side vector similarity computation. - Applies filters for scope, categories, and metadata in the same query. + Scope and category predicates are pushed down into FT.SEARCH; the + ``metadata_filter`` predicate is applied as a Python post-filter + because arbitrary metadata keys are not indexed in the + ``memory_index`` FT schema. Args: query_embedding: Embedding vector for the query. scope_prefix: Optional scope path prefix to filter results. categories: Optional list of categories (OR logic). - metadata_filter: Optional metadata key-value pairs (AND logic). + metadata_filter: Optional metadata key-value pairs (AND logic), + applied as a Python post-filter over the FT.SEARCH results. limit: Maximum number of results to return. min_score: Minimum similarity score threshold (0.0 to 1.0). @@ -1500,13 +1570,17 @@ class ValkeyStorage: """Search for memories by vector similarity (sync wrapper). Uses Valkey Search module for server-side vector similarity computation. - Applies filters for scope, categories, and metadata in the same query. + Scope and category predicates are pushed down into FT.SEARCH; the + ``metadata_filter`` predicate is applied as a Python post-filter + because arbitrary metadata keys are not indexed in the + ``memory_index`` FT schema. Args: query_embedding: Embedding vector for the query. scope_prefix: Optional scope path prefix to filter results. categories: Optional list of categories (OR logic). - metadata_filter: Optional metadata key-value pairs (AND logic). + metadata_filter: Optional metadata key-value pairs (AND logic), + applied as a Python post-filter over the FT.SEARCH results. limit: Maximum number of results to return. min_score: Minimum similarity score threshold (0.0 to 1.0). diff --git a/lib/crewai/tests/memory/storage/test_valkey_storage_search.py b/lib/crewai/tests/memory/storage/test_valkey_storage_search.py index 73ee98920..c8e2eeeac 100644 --- a/lib/crewai/tests/memory/storage/test_valkey_storage_search.py +++ b/lib/crewai/tests/memory/storage/test_valkey_storage_search.py @@ -229,7 +229,12 @@ class TestValkeyStorageVectorSearch: self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock ) -> None: - """Test search with metadata filter only.""" + """Metadata filter is applied as a post-filter (issue #5794). + + ``metadata`` is not part of the ``memory_index`` FT schema, so the + FT.SEARCH query string must not reference metadata keys. The filter is + applied in Python after FT.SEARCH returns. + """ record1 = MemoryRecord( id="record-1", content="Record with metadata", @@ -248,14 +253,17 @@ class TestValkeyStorageVectorSearch: limit=10 ) - # Verify query contains metadata filters (AND logic) + # Metadata predicates must NOT be injected into the FT.SEARCH query + # (their schema fields do not exist on memory_index). call_args = mock_ft_search.call_args query = call_args[0][2] - assert "@agent_id:{agent\\-1}" in query or "@agent_id:{agent-1}" in query - assert "@priority:{high}" in query - assert "=>[KNN 10 @embedding $BLOB AS score]" in query + assert "@agent_id" not in query + assert "@priority" not in query + # KNN clause is still present, with the overfetch limit applied so the + # post-filter has enough candidates to return ``limit`` hits. + assert "[KNN 100 @embedding $BLOB AS score]" in query - # Verify results + # Post-filter retains the record whose metadata matches every filter key. assert len(results) == 1 assert results[0][0].id == "record-1" assert results[0][0].metadata["agent_id"] == "agent-1" @@ -268,7 +276,7 @@ class TestValkeyStorageVectorSearch: self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock ) -> None: - """Test search with combined filters (scope + categories + metadata).""" + """Combined filters: scope + categories pushed down, metadata post-filtered.""" record1 = MemoryRecord( id="record-1", content="Record matching all filters", @@ -290,13 +298,15 @@ class TestValkeyStorageVectorSearch: limit=10 ) - # Verify query contains all filters + # Scope and categories are valid index fields and stay in the query. call_args = mock_ft_search.call_args query = call_args[0][2] assert "@scope:{/agent*}" in query assert "@categories:{planning}" in query - assert "@agent_id:{agent\\-1}" in query or "@agent_id:{agent-1}" in query - assert "=>[KNN 10 @embedding $BLOB AS score]" in query + # Metadata predicates must NOT be in the FT.SEARCH query. + assert "@agent_id" not in query + # Overfetch limit is applied when metadata_filter is supplied. + assert "[KNN 100 @embedding $BLOB AS score]" in query # Verify results assert len(results) == 1 @@ -520,7 +530,7 @@ class TestValkeyStorageVectorSearch: self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock ) -> None: - """Test search with numeric metadata values.""" + """Numeric metadata filters work via post-filter (string-coerced compare).""" record1 = MemoryRecord( id="record-1", content="Record with numeric metadata", @@ -539,11 +549,18 @@ class TestValkeyStorageVectorSearch: limit=10 ) - # Verify query contains string-converted metadata values + # Metadata predicates must NOT be in the FT.SEARCH query; the + # post-filter is responsible for matching numeric values. call_args = mock_ft_search.call_args query = call_args[0][2] - assert "@count:{42}" in query - assert "@score:{3" in query and "14}" in query + assert "@count" not in query + assert "@score" not in query + + # Post-filter accepts numeric values (both int and float) and returns + # the matching record. + assert len(results) == 1 + assert results[0][0].metadata["count"] == 42 + assert results[0][0].metadata["score"] == 3.14 @pytest.mark.asyncio @patch("crewai.memory.storage.valkey_storage.ft.search") @@ -883,7 +900,7 @@ class TestValkeyStorageVectorSearch: self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock ) -> None: - """Test search with multiple metadata filters uses AND logic.""" + """Multiple metadata filters use AND logic in the post-filter.""" record1 = MemoryRecord( id="record-1", content="Record matching all metadata", @@ -902,14 +919,15 @@ class TestValkeyStorageVectorSearch: limit=10 ) - # Verify query contains AND logic for metadata + # Metadata predicates must NOT be in the FT.SEARCH query — they are + # not part of the index schema. call_args = mock_ft_search.call_args query = call_args[0][2] - assert "@agent_id:" in query - assert "@priority:" in query - assert "@status:" in query + assert "@agent_id" not in query + assert "@priority" not in query + assert "@status" not in query - # Verify record matching all metadata is returned + # AND logic: every filter key must match for the record to be kept. assert len(results) == 1 assert results[0][0].id == "record-1" @@ -995,4 +1013,414 @@ class TestValkeyStorageVectorSearch: # Verify all results are returned assert len(results) == 2 assert results[0][0].id == "record-1" - assert results[1][0].id == "record-2" \ No newline at end of file + assert results[1][0].id == "record-2" + + +class TestValkeyStorageMetadataPostFilter: + """Regression tests for issue #5794. + + The ``memory_index`` FT schema only contains fixed fields + (``embedding``, ``scope``, ``categories``, ``created_at``, ``importance``). + Arbitrary metadata keys are NOT indexed, so ``metadata_filter`` must be + applied as a Python post-filter over FT.SEARCH results — never injected + into the FT.SEARCH query string. + """ + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_metadata_filter_not_pushed_into_ft_query( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """No ``@`` clause for any metadata key is emitted to FT.SEARCH.""" + record = MemoryRecord( + id="record-1", + content="A record", + scope="/test", + metadata={"agent_id": "agent-1", "priority": "high"}, + embedding=[0.1, 0.2, 0.3, 0.4], + ) + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response([(record, 0.9)]) + + await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + metadata_filter={"agent_id": "agent-1", "priority": "high"}, + limit=5, + ) + + query = mock_ft_search.call_args[0][2] + # The bug was: metadata keys were emitted as @:{value} clauses + # that reference non-existent FT schema fields. + assert "@agent_id" not in query + assert "@priority" not in query + # The KNN clause uses the overfetched limit when metadata_filter is set. + assert "[KNN 50 @embedding $BLOB AS score]" in query + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_metadata_filter_excludes_records_with_missing_key( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """Records whose metadata is missing a required key are dropped.""" + matching = MemoryRecord( + id="match-1", + content="Has agent_id", + scope="/test", + metadata={"agent_id": "agent-1"}, + embedding=[0.1, 0.2, 0.3, 0.4], + ) + missing_key = MemoryRecord( + id="missing-1", + content="Missing agent_id entirely", + scope="/test", + metadata={"unrelated": "value"}, + embedding=[0.2, 0.3, 0.4, 0.5], + ) + empty_metadata = MemoryRecord( + id="empty-1", + content="No metadata", + scope="/test", + metadata={}, + embedding=[0.3, 0.4, 0.5, 0.6], + ) + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response([ + (matching, 0.95), + (missing_key, 0.85), + (empty_metadata, 0.75), + ]) + + results = await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + metadata_filter={"agent_id": "agent-1"}, + limit=10, + ) + + # Only the record whose metadata has agent_id=agent-1 survives. + assert [rec.id for rec, _ in results] == ["match-1"] + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_metadata_filter_excludes_records_with_mismatched_value( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """Records whose metadata value differs from the filter are dropped.""" + agent_one = MemoryRecord( + id="agent-1-rec", + content="agent-1", + scope="/test", + metadata={"agent_id": "agent-1"}, + embedding=[0.1, 0.2, 0.3, 0.4], + ) + agent_two = MemoryRecord( + id="agent-2-rec", + content="agent-2", + scope="/test", + metadata={"agent_id": "agent-2"}, + embedding=[0.2, 0.3, 0.4, 0.5], + ) + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response([ + (agent_one, 0.9), + (agent_two, 0.8), + ]) + + results = await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + metadata_filter={"agent_id": "agent-1"}, + limit=10, + ) + + assert [rec.id for rec, _ in results] == ["agent-1-rec"] + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_metadata_filter_and_logic_requires_every_key( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """Multi-key filters require every key/value pair to match (AND).""" + full_match = MemoryRecord( + id="full", + content="Matches both", + scope="/test", + metadata={"agent_id": "agent-1", "priority": "high"}, + embedding=[0.1, 0.2, 0.3, 0.4], + ) + partial = MemoryRecord( + id="partial", + content="Only agent_id matches", + scope="/test", + metadata={"agent_id": "agent-1", "priority": "low"}, + embedding=[0.2, 0.3, 0.4, 0.5], + ) + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response([ + (full_match, 0.95), + (partial, 0.93), + ]) + + results = await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + metadata_filter={"agent_id": "agent-1", "priority": "high"}, + limit=10, + ) + + assert [rec.id for rec, _ in results] == ["full"] + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_metadata_filter_matches_numeric_values_via_string_compare( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """Numeric metadata values match when stringified equal to the filter.""" + record = MemoryRecord( + id="numeric", + content="Numeric metadata", + scope="/test", + metadata={"count": 42, "ratio": 3.14, "active": True}, + embedding=[0.1, 0.2, 0.3, 0.4], + ) + other = MemoryRecord( + id="numeric-other", + content="Different count", + scope="/test", + metadata={"count": 41, "ratio": 3.14, "active": True}, + embedding=[0.2, 0.3, 0.4, 0.5], + ) + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response([ + (record, 0.9), + (other, 0.85), + ]) + + results = await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + metadata_filter={"count": 42, "ratio": 3.14, "active": True}, + limit=10, + ) + + assert [rec.id for rec, _ in results] == ["numeric"] + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_empty_metadata_filter_dict_skips_post_filter( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """An empty ``{}`` filter behaves like ``None`` (no overfetch, no filtering).""" + record1 = MemoryRecord( + id="r1", + content="r1", + scope="/test", + metadata={"k": "v"}, + embedding=[0.1, 0.2, 0.3, 0.4], + ) + record2 = MemoryRecord( + id="r2", + content="r2", + scope="/test", + metadata={}, + embedding=[0.2, 0.3, 0.4, 0.5], + ) + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response([ + (record1, 0.9), + (record2, 0.8), + ]) + + results = await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + metadata_filter={}, + limit=10, + ) + + query = mock_ft_search.call_args[0][2] + # No overfetch — KNN limit stays at the caller-requested limit. + assert "[KNN 10 @embedding $BLOB AS score]" in query + # All records returned (no filtering performed). + assert [rec.id for rec, _ in results] == ["r1", "r2"] + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_metadata_filter_overfetches_to_preserve_caller_limit( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """KNN is overfetched so post-filter can still return ``limit`` hits. + + Without overfetch, KNN would return at most ``limit`` candidates and + the post-filter could drop every one, leaving the caller with fewer + than the requested ``limit`` results. The overfetch ensures the + post-filter has enough candidates to satisfy the caller in the common + case where ~10% of candidates match the metadata predicate. + """ + matching = [ + MemoryRecord( + id=f"match-{i}", + content=f"match {i}", + scope="/test", + metadata={"tier": "gold"}, + embedding=[0.1 * (i + 1), 0.2, 0.3, 0.4], + ) + for i in range(5) + ] + non_matching = [ + MemoryRecord( + id=f"skip-{i}", + content=f"skip {i}", + scope="/test", + metadata={"tier": "silver"}, + embedding=[0.1 * (i + 1), 0.3, 0.4, 0.5], + ) + for i in range(15) + ] + + # FT.SEARCH returns 20 candidates (mix of gold/silver) sorted by score. + # If the KNN had been capped at limit=5, the silver records mixed in + # would have starved out the gold matches and ``results`` would have + # been shorter than 5. + combined = [] + score = 0.99 + for rec in matching + non_matching: + combined.append((rec, score)) + score -= 0.01 + + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response(combined) + + results = await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + metadata_filter={"tier": "gold"}, + limit=5, + ) + + query = mock_ft_search.call_args[0][2] + # KNN limit is overfetched (limit * OVERFETCH = 50) to give the + # post-filter enough candidates. + assert "[KNN 50 @embedding $BLOB AS score]" in query + + # All five matching records are returned, in score-descending order. + assert [rec.id for rec, _ in results] == [ + "match-0", "match-1", "match-2", "match-3", "match-4" + ] + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_metadata_filter_truncates_to_caller_limit( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """When post-filter retains more than ``limit`` records, truncate.""" + records = [ + MemoryRecord( + id=f"rec-{i}", + content=f"rec {i}", + scope="/test", + metadata={"tier": "gold"}, + embedding=[0.1 * (i + 1), 0.2, 0.3, 0.4], + ) + for i in range(8) + ] + scored = [(rec, 0.9 - 0.05 * i) for i, rec in enumerate(records)] + + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response(scored) + + results = await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + metadata_filter={"tier": "gold"}, + limit=3, + ) + + # Caller asked for 3, so only the top 3 (by score) are returned even + # though 8 candidates matched the metadata filter. + assert len(results) == 3 + assert [rec.id for rec, _ in results] == ["rec-0", "rec-1", "rec-2"] + + @pytest.mark.asyncio + @patch("crewai.memory.storage.valkey_storage.ft.search") + @patch("crewai.memory.storage.valkey_storage.ft.list") + async def test_metadata_filter_does_not_affect_scope_or_categories_pushdown( + self, mock_ft_list: AsyncMock, mock_ft_search: AsyncMock, + valkey_storage: ValkeyStorage, mock_glide_client: AsyncMock + ) -> None: + """Scope/categories predicates are still pushed into FT.SEARCH. + + They are valid index fields and benefit from server-side filtering. + Only ``metadata_filter`` is moved to the Python side. + """ + record = MemoryRecord( + id="rec", + content="rec", + scope="/agent/task", + categories=["planning"], + metadata={"agent_id": "agent-1"}, + embedding=[0.1, 0.2, 0.3, 0.4], + ) + mock_ft_list.return_value = [b"memory_index"] + mock_ft_search.return_value = create_mock_ft_search_response([(record, 0.9)]) + + await valkey_storage.asearch( + [0.1, 0.2, 0.3, 0.4], + scope_prefix="/agent", + categories=["planning"], + metadata_filter={"agent_id": "agent-1"}, + limit=5, + ) + + query = mock_ft_search.call_args[0][2] + assert "@scope:{/agent*}" in query + assert "@categories:{planning}" in query + # Metadata predicates remain on the Python side. + assert "@agent_id" not in query + + +class TestValkeyStorageMatchesMetadataFilter: + """Unit tests for the ``_matches_metadata_filter`` static helper.""" + + def test_returns_true_when_all_keys_match(self) -> None: + assert ValkeyStorage._matches_metadata_filter( + {"a": "1", "b": "2", "c": "3"}, + {"a": "1", "b": "2"}, + ) + + def test_returns_false_when_key_missing(self) -> None: + assert not ValkeyStorage._matches_metadata_filter( + {"a": "1"}, + {"a": "1", "b": "2"}, + ) + + def test_returns_false_when_value_mismatches(self) -> None: + assert not ValkeyStorage._matches_metadata_filter( + {"a": "1", "b": "wrong"}, + {"a": "1", "b": "2"}, + ) + + def test_coerces_to_string_for_comparison(self) -> None: + # Filter pre-stringified by caller still matches typed record value. + assert ValkeyStorage._matches_metadata_filter( + {"count": 42, "ratio": 3.14, "flag": True}, + {"count": "42", "ratio": "3.14", "flag": "True"}, + ) + # And vice versa: typed filter matches typed record value. + assert ValkeyStorage._matches_metadata_filter( + {"count": 42}, + {"count": 42}, + ) + + def test_empty_filter_always_matches(self) -> None: + assert ValkeyStorage._matches_metadata_filter({}, {}) + assert ValkeyStorage._matches_metadata_filter({"a": "1"}, {}) \ No newline at end of file