fix: key artifact dedup map by (object, scope) and clear it in tests

Keying the object-identity dedup map on id(artifact) alone could orphan a
mapping if the same instance were ever stored under two scopes (the second
store overwrote the first's entry, and per-handle cleanup then skipped it).
Key on (id(artifact), scope) so each scope keeps its own handle and cleanup is
exact and unconditional. Also clear _handle_by_obj in the test fixture so stale
id->handle mappings don't accumulate across the session.
This commit is contained in:
Matt Aitchison
2026-06-04 20:33:08 -05:00
parent 8d2ca5ef4c
commit ad798afeca
2 changed files with 29 additions and 12 deletions

View File

@@ -117,7 +117,11 @@ class _ArtifactStore:
def __init__(self) -> None:
self._lock = threading.Lock()
self._entries: dict[str, _Entry] = {}
self._handle_by_obj: dict[int, str] = {}
# (id(artifact), scope) -> handle, so re-storing the same instance under
# the same scope reuses its handle. Keying on the scope too means storing
# an object under a different scope gets its own handle and its own
# cleanup entry rather than overwriting the first.
self._handle_by_obj: dict[tuple[int, str | None], str] = {}
def store(
self,
@@ -126,18 +130,14 @@ class _ArtifactStore:
ttl: int = DEFAULT_ARTIFACT_TTL,
) -> str:
norm_scope = str(scope_id) if scope_id is not None else None
obj_id = id(artifact)
obj_key = (id(artifact), norm_scope)
expires_at = (time.monotonic() + ttl) if ttl > 0 else None
with self._lock:
self._prune_locked()
existing = self._handle_by_obj.get(obj_id)
existing = self._handle_by_obj.get(obj_key)
if existing is not None:
entry = self._entries.get(existing)
if (
entry is not None
and entry.artifact is artifact
and entry.scope_id == norm_scope
):
if entry is not None and entry.artifact is artifact:
entry.expires_at = expires_at
return f"{_HANDLE_SCHEME}://{existing}"
handle_id = str(uuid4())
@@ -145,9 +145,9 @@ class _ArtifactStore:
artifact=artifact,
scope_id=norm_scope,
expires_at=expires_at,
obj_id=obj_id,
obj_id=id(artifact),
)
self._handle_by_obj[obj_id] = handle_id
self._handle_by_obj[obj_key] = handle_id
return f"{_HANDLE_SCHEME}://{handle_id}"
def resolve(self, handle_id: str) -> FileArtifact | None:
@@ -181,8 +181,8 @@ class _ArtifactStore:
def _delete_locked(self, handle_id: str) -> None:
"""Remove an entry and its object-identity mapping. Caller holds lock."""
entry = self._entries.pop(handle_id, None)
if entry is not None and self._handle_by_obj.get(entry.obj_id) == handle_id:
del self._handle_by_obj[entry.obj_id]
if entry is not None:
self._handle_by_obj.pop((entry.obj_id, entry.scope_id), None)
_store: Final[_ArtifactStore] = _ArtifactStore()

View File

@@ -26,8 +26,10 @@ _HANDLE = re.compile(r"crewai\+file://[0-9a-fA-F-]{36}")
def _clear_store():
"""Keep the process-local store empty between tests."""
_store._entries.clear()
_store._handle_by_obj.clear()
yield
_store._entries.clear()
_store._handle_by_obj.clear()
def _handle_in(text: str) -> str:
@@ -76,6 +78,21 @@ class TestStoreArtifact:
assert h1 == h2
assert len(_store._entries) == 1
def test_same_instance_different_scope_gets_own_handle_and_cleans_up(self) -> None:
# Storing one instance under two scopes must not orphan a mapping:
# each scope keeps its own handle, and clearing one leaves the other.
artifact = FileArtifact(data=b"x" * 100)
h_a = _handle_in(store_artifact(artifact, scope_id="A"))
h_b = _handle_in(store_artifact(artifact, scope_id="B"))
assert h_a != h_b
clear_artifact_scope("A")
assert resolve_artifact_handles(h_a) == h_a # A cleared
assert base64.b64decode(resolve_artifact_handles(h_b)) == b"x" * 100
# No dangling object-identity mapping for the cleared scope.
assert (id(artifact), "A") not in _store._handle_by_obj
clear_artifact_scope("B")
assert _store._handle_by_obj == {}
def test_placeholder_escapes_quotes_in_metadata(self) -> None:
artifact = FileArtifact(data=b"x", filename='a".pptx', mime_type='m"/x')
placeholder = store_artifact(artifact)