mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-07 10:12:38 +00:00
fix(memory): escape user input in LanceDBStorage SQL filters (#5728)
LanceDBStorage interpolated caller-supplied scope paths and record IDs directly into the WHERE clauses passed to LanceDB's where(), which accepts a raw DataFusion SQL expression and does not support parameterized queries. A malicious or unprivileged caller could escape the configured scope sandbox -- for example, calling delete(scope_prefix="/alpha' OR scope LIKE '/%") would wipe every record in the table instead of just the /alpha subtree -- and ordinary strings containing apostrophes (e.g. 'O''Brien') could crash the SQL parser. Add _escape_sql_str() and _escape_like() helpers and route every user-controlled value through them in search(), delete(), reset(), and the shared _scan_rows() reader. The LIKE clauses now also use ESCAPE '\\' so % and _ in caller-supplied prefixes are treated as literals instead of wildcards. Adds tests/memory/test_lancedb_storage_security.py covering each sink (search, delete by scope, delete by id, reset, scan-based readers) with both injection payloads and legitimate apostrophe- containing scopes/IDs.
This commit is contained in:
@@ -112,6 +112,37 @@ class LanceDBStorage:
|
||||
with store_lock(self._lock_name):
|
||||
self._table = self._create_table(vector_dim)
|
||||
|
||||
@staticmethod
|
||||
def _escape_sql_str(value: Any) -> str:
|
||||
"""Escape a string for inclusion inside a single-quoted SQL literal.
|
||||
|
||||
LanceDB's ``where()`` accepts an Apache DataFusion SQL expression as a
|
||||
raw string -- it does not support parameterized queries. Any caller-
|
||||
supplied value (scope path, record id, etc.) that is interpolated into
|
||||
a string literal MUST first have its single quotes doubled, otherwise
|
||||
an attacker (or simply a record with an apostrophe in its id) can
|
||||
terminate the literal early and inject arbitrary SQL.
|
||||
"""
|
||||
return str(value).replace("'", "''")
|
||||
|
||||
@staticmethod
|
||||
def _escape_like(value: Any) -> str:
|
||||
"""Escape a string for use as a literal prefix inside a ``LIKE`` clause.
|
||||
|
||||
Doubles single quotes (so the pattern can't break out of its literal)
|
||||
and escapes the SQL ``LIKE`` metacharacters ``%`` and ``_`` so that
|
||||
callers passing those characters in a scope path don't accidentally
|
||||
(or maliciously) widen the match. The returned pattern fragment is
|
||||
intended to be paired with ``ESCAPE '\\'``.
|
||||
"""
|
||||
return (
|
||||
str(value)
|
||||
.replace("\\", "\\\\")
|
||||
.replace("%", "\\%")
|
||||
.replace("_", "\\_")
|
||||
.replace("'", "''")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _infer_dim_from_table(table: Any) -> int:
|
||||
"""Read vector dimension from an existing table's schema."""
|
||||
@@ -317,7 +348,7 @@ class LanceDBStorage:
|
||||
"""Update a record by ID. Preserves created_at, updates last_accessed."""
|
||||
with store_lock(self._lock_name):
|
||||
self._ensure_table()
|
||||
safe_id = str(record.id).replace("'", "''")
|
||||
safe_id = self._escape_sql_str(record.id)
|
||||
self._do_write("delete", f"id = '{safe_id}'")
|
||||
row = self._record_to_row(record)
|
||||
if row["vector"] is None or len(row["vector"]) != self._vector_dim:
|
||||
@@ -338,7 +369,7 @@ class LanceDBStorage:
|
||||
return
|
||||
with store_lock(self._lock_name):
|
||||
now = datetime.utcnow().isoformat()
|
||||
safe_ids = [str(rid).replace("'", "''") for rid in record_ids]
|
||||
safe_ids = [self._escape_sql_str(rid) for rid in record_ids]
|
||||
ids_expr = ", ".join(f"'{rid}'" for rid in safe_ids)
|
||||
self._do_write(
|
||||
"update",
|
||||
@@ -350,7 +381,7 @@ class LanceDBStorage:
|
||||
"""Return a single record by ID, or None if not found."""
|
||||
if self._table is None:
|
||||
return None
|
||||
safe_id = str(record_id).replace("'", "''")
|
||||
safe_id = self._escape_sql_str(record_id)
|
||||
rows = self._table.search().where(f"id = '{safe_id}'").limit(1).to_list()
|
||||
if not rows:
|
||||
return None
|
||||
@@ -370,8 +401,8 @@ class LanceDBStorage:
|
||||
query = self._table.search(query_embedding)
|
||||
if scope_prefix is not None and scope_prefix.strip("/"):
|
||||
prefix = scope_prefix.rstrip("/")
|
||||
like_val = prefix + "%"
|
||||
query = query.where(f"scope LIKE '{like_val}'")
|
||||
like_val = self._escape_like(prefix) + "%"
|
||||
query = query.where(f"scope LIKE '{like_val}' ESCAPE '\\'")
|
||||
results = query.limit(
|
||||
limit * 3 if (categories or metadata_filter) else limit
|
||||
).to_list()
|
||||
@@ -405,7 +436,8 @@ class LanceDBStorage:
|
||||
with store_lock(self._lock_name):
|
||||
if record_ids and not (categories or metadata_filter):
|
||||
before = int(self._table.count_rows())
|
||||
ids_expr = ", ".join(f"'{rid}'" for rid in record_ids)
|
||||
safe_ids = [self._escape_sql_str(rid) for rid in record_ids]
|
||||
ids_expr = ", ".join(f"'{rid}'" for rid in safe_ids)
|
||||
self._do_write("delete", f"id IN ({ids_expr})")
|
||||
return before - int(self._table.count_rows())
|
||||
if categories or metadata_filter:
|
||||
@@ -427,7 +459,8 @@ class LanceDBStorage:
|
||||
if not to_delete:
|
||||
return 0
|
||||
before = int(self._table.count_rows())
|
||||
ids_expr = ", ".join(f"'{rid}'" for rid in to_delete)
|
||||
safe_ids = [self._escape_sql_str(rid) for rid in to_delete]
|
||||
ids_expr = ", ".join(f"'{rid}'" for rid in safe_ids)
|
||||
self._do_write("delete", f"id IN ({ids_expr})")
|
||||
return before - int(self._table.count_rows())
|
||||
conditions = []
|
||||
@@ -435,9 +468,13 @@ class LanceDBStorage:
|
||||
prefix = scope_prefix.rstrip("/")
|
||||
if not prefix.startswith("/"):
|
||||
prefix = "/" + prefix
|
||||
conditions.append(f"scope LIKE '{prefix}%' OR scope = '/'")
|
||||
like_val = self._escape_like(prefix) + "%"
|
||||
conditions.append(
|
||||
f"(scope LIKE '{like_val}' ESCAPE '\\' OR scope = '/')"
|
||||
)
|
||||
if older_than is not None:
|
||||
conditions.append(f"created_at < '{older_than.isoformat()}'")
|
||||
safe_ts = self._escape_sql_str(older_than.isoformat())
|
||||
conditions.append(f"created_at < '{safe_ts}'")
|
||||
if not conditions:
|
||||
before = int(self._table.count_rows())
|
||||
self._do_write("delete", "id != ''")
|
||||
@@ -469,7 +506,8 @@ class LanceDBStorage:
|
||||
return []
|
||||
q = self._table.search()
|
||||
if scope_prefix is not None and scope_prefix.strip("/"):
|
||||
q = q.where(f"scope LIKE '{scope_prefix.rstrip('/')}%'")
|
||||
like_val = self._escape_like(scope_prefix.rstrip("/")) + "%"
|
||||
q = q.where(f"scope LIKE '{like_val}' ESCAPE '\\'")
|
||||
if columns is not None:
|
||||
q = q.select(columns)
|
||||
result: list[dict[str, Any]] = q.limit(limit).to_list()
|
||||
@@ -595,8 +633,10 @@ class LanceDBStorage:
|
||||
return
|
||||
prefix = scope_prefix.rstrip("/")
|
||||
if prefix:
|
||||
safe_prefix = self._escape_sql_str(prefix)
|
||||
self._do_write(
|
||||
"delete", f"scope >= '{prefix}' AND scope < '{prefix}/\uffff'"
|
||||
"delete",
|
||||
f"scope >= '{safe_prefix}' AND scope < '{safe_prefix}/\uffff'",
|
||||
)
|
||||
|
||||
def optimize(self) -> None:
|
||||
|
||||
271
lib/crewai/tests/memory/test_lancedb_storage_security.py
Normal file
271
lib/crewai/tests/memory/test_lancedb_storage_security.py
Normal file
@@ -0,0 +1,271 @@
|
||||
"""Regression tests for SQL-injection hardening of ``LanceDBStorage``.
|
||||
|
||||
Issue: GH #5728
|
||||
|
||||
LanceDB's ``where()`` accepts a raw Apache DataFusion SQL expression and does
|
||||
not support parameterized queries. Earlier versions of ``LanceDBStorage``
|
||||
interpolated caller-supplied scope paths and record IDs directly into the
|
||||
WHERE clause via f-strings, which allowed:
|
||||
|
||||
* an unprivileged caller to escape the configured ``scope`` sandbox and
|
||||
read / delete records belonging to any other scope, and
|
||||
* legitimate strings containing single quotes (e.g. ``"O'Brien"``) to crash
|
||||
with a SQL parse error.
|
||||
|
||||
These tests pin the hardened behaviour so the bug can never silently
|
||||
regress.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.memory.storage.lancedb_storage import LanceDBStorage
|
||||
from crewai.memory.types import MemoryRecord
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def storage(tmp_path: Path) -> LanceDBStorage:
|
||||
return LanceDBStorage(path=str(tmp_path / "mem"), vector_dim=4)
|
||||
|
||||
|
||||
def _seed(storage: LanceDBStorage) -> None:
|
||||
storage.save(
|
||||
[
|
||||
MemoryRecord(
|
||||
id="alpha-1",
|
||||
content="alpha",
|
||||
scope="/alpha",
|
||||
embedding=[0.1, 0.2, 0.3, 0.4],
|
||||
),
|
||||
MemoryRecord(
|
||||
id="bravo-1",
|
||||
content="bravo",
|
||||
scope="/bravo",
|
||||
embedding=[0.5, 0.6, 0.7, 0.8],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Helper unit tests
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_escape_sql_str_doubles_single_quotes() -> None:
|
||||
assert LanceDBStorage._escape_sql_str("O'Brien") == "O''Brien"
|
||||
assert LanceDBStorage._escape_sql_str("a'; DROP TABLE t;--") == "a''; DROP TABLE t;--"
|
||||
# Non-string inputs are coerced.
|
||||
assert LanceDBStorage._escape_sql_str(42) == "42"
|
||||
|
||||
|
||||
def test_escape_like_escapes_metacharacters() -> None:
|
||||
# Backslash is escaped first so subsequent escapes don't double-escape.
|
||||
assert LanceDBStorage._escape_like("a\\b") == "a\\\\b"
|
||||
assert LanceDBStorage._escape_like("a%b") == "a\\%b"
|
||||
assert LanceDBStorage._escape_like("a_b") == "a\\_b"
|
||||
assert LanceDBStorage._escape_like("O'Brien") == "O''Brien"
|
||||
# All metacharacters together.
|
||||
assert (
|
||||
LanceDBStorage._escape_like("100%_off'\\")
|
||||
== "100\\%\\_off''\\\\"
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Sink 1: search() must not leak across scopes via injected scope_prefix
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_search_scope_prefix_injection_returns_no_match(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
_seed(storage)
|
||||
# Classic ``' OR '1'='1`` style payload aimed at widening the LIKE.
|
||||
payload = "/alpha' OR '1'='1"
|
||||
results = storage.search([0.1, 0.2, 0.3, 0.4], scope_prefix=payload, limit=10)
|
||||
assert results == []
|
||||
|
||||
|
||||
def test_search_scope_prefix_with_apostrophe_does_not_crash(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
storage.save(
|
||||
[
|
||||
MemoryRecord(
|
||||
id="x-1",
|
||||
content="x",
|
||||
scope="/O'Brien",
|
||||
embedding=[0.1, 0.2, 0.3, 0.4],
|
||||
)
|
||||
]
|
||||
)
|
||||
# Must round-trip a legitimate scope containing an apostrophe.
|
||||
results = storage.search(
|
||||
[0.1, 0.2, 0.3, 0.4], scope_prefix="/O'Brien", limit=10
|
||||
)
|
||||
assert len(results) == 1
|
||||
assert results[0][0].scope == "/O'Brien"
|
||||
|
||||
|
||||
def test_search_scope_prefix_percent_is_literal_not_wildcard(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
"""A ``%`` in the user-supplied prefix must be treated as a literal,
|
||||
not as a SQL ``LIKE`` wildcard that would match unrelated scopes."""
|
||||
_seed(storage)
|
||||
# ``/%`` would, without escaping, match every scope that starts with ``/``.
|
||||
results = storage.search([0.1, 0.2, 0.3, 0.4], scope_prefix="/%", limit=10)
|
||||
assert results == []
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Sink 2: delete(scope_prefix=...) must not let an attacker wipe other scopes
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_delete_scope_prefix_injection_does_not_bypass_isolation(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
"""The most damaging payload from issue #5728: a malicious scope_prefix
|
||||
that, before the fix, deleted every record in the table by appending
|
||||
``OR scope LIKE '/%`` to the WHERE clause."""
|
||||
_seed(storage)
|
||||
assert storage.count() == 2
|
||||
|
||||
# Pre-fix, this WHERE evaluated to:
|
||||
# scope LIKE '/alpha' OR scope LIKE '/%' OR scope = '/'
|
||||
# which deletes /alpha AND /bravo. Post-fix the entire payload is
|
||||
# treated as a literal prefix and matches nothing.
|
||||
payload = "/alpha' OR scope LIKE '/%"
|
||||
n = storage.delete(scope_prefix=payload)
|
||||
|
||||
assert n == 0
|
||||
assert storage.count() == 2
|
||||
|
||||
# And the legitimate scoped delete must still work.
|
||||
n = storage.delete(scope_prefix="/alpha")
|
||||
assert n == 1
|
||||
assert storage.count() == 1
|
||||
remaining = storage.list_records()
|
||||
assert [r.scope for r in remaining] == ["/bravo"]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Sink 3: delete(record_ids=[...]) must escape IDs
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_delete_record_ids_injection_does_not_match_real_rows(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
_seed(storage)
|
||||
# An attacker-controlled "id" containing a quote used to either
|
||||
# crash the SQL tokenizer or, worse, evaluate a tautology.
|
||||
n = storage.delete(record_ids=["' OR '1'='1"])
|
||||
assert n == 0
|
||||
assert storage.count() == 2
|
||||
|
||||
|
||||
def test_delete_record_ids_with_apostrophe_round_trips(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
storage.save(
|
||||
[
|
||||
MemoryRecord(
|
||||
id="O'Reilly-42",
|
||||
content="ok",
|
||||
scope="/team",
|
||||
embedding=[0.0] * 4,
|
||||
)
|
||||
]
|
||||
)
|
||||
assert storage.count() == 1
|
||||
n = storage.delete(record_ids=["O'Reilly-42"])
|
||||
assert n == 1
|
||||
assert storage.count() == 0
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Sink 4: reset(scope_prefix=...) must not crash on apostrophes
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_reset_scope_prefix_with_apostrophe_does_not_crash(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
storage.save(
|
||||
[
|
||||
MemoryRecord(
|
||||
id="r-1",
|
||||
content="x",
|
||||
scope="/O'Brien/team",
|
||||
embedding=[0.0] * 4,
|
||||
),
|
||||
MemoryRecord(
|
||||
id="r-2",
|
||||
content="y",
|
||||
scope="/other",
|
||||
embedding=[0.0] * 4,
|
||||
),
|
||||
]
|
||||
)
|
||||
# Must not raise and must scope the reset correctly.
|
||||
storage.reset(scope_prefix="/O'Brien")
|
||||
remaining = storage.list_records()
|
||||
assert [r.scope for r in remaining] == ["/other"]
|
||||
|
||||
|
||||
def test_reset_scope_prefix_injection_does_not_drop_unrelated_scopes(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
_seed(storage)
|
||||
# ``' OR scope >= ''`` would, without escaping, broaden the range
|
||||
# comparison to every row.
|
||||
storage.reset(scope_prefix="/alpha' OR scope >= '")
|
||||
assert storage.count() == 2 # nothing should have been deleted
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Scan-based readers: list_records / list_scopes / get_scope_info /
|
||||
# list_categories / count all flow through ``_scan_rows`` and used to
|
||||
# crash on apostrophes and leak across scopes via ``%``/``_`` wildcards.
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_scan_methods_with_apostrophe_in_scope(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
storage.save(
|
||||
[
|
||||
MemoryRecord(
|
||||
id="s-1",
|
||||
content="x",
|
||||
scope="/O'Brien",
|
||||
categories=["c1"],
|
||||
embedding=[0.0] * 4,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
assert [r.id for r in storage.list_records(scope_prefix="/O'Brien")] == ["s-1"]
|
||||
info = storage.get_scope_info("/O'Brien")
|
||||
assert info.record_count == 1
|
||||
assert info.path == "/O'Brien"
|
||||
assert storage.list_scopes("/O'Brien") == []
|
||||
assert storage.list_categories(scope_prefix="/O'Brien") == {"c1": 1}
|
||||
assert storage.count(scope_prefix="/O'Brien") == 1
|
||||
|
||||
|
||||
def test_scan_methods_treat_percent_as_literal(
|
||||
storage: LanceDBStorage,
|
||||
) -> None:
|
||||
_seed(storage)
|
||||
# ``/%`` should NOT match every scope rooted at ``/``; it should match
|
||||
# only literal ``/<percent>...`` prefixes (of which there are none).
|
||||
assert storage.list_records(scope_prefix="/%") == []
|
||||
assert storage.count(scope_prefix="/%") == 0
|
||||
assert storage.list_categories(scope_prefix="/%") == {}
|
||||
Reference in New Issue
Block a user