From ad798afecadeee0ec3ed6d6d5cc400e90843ae5e Mon Sep 17 00:00:00 2001 From: Matt Aitchison Date: Thu, 4 Jun 2026 20:33:08 -0500 Subject: [PATCH] fix: key artifact dedup map by (object, scope) and clear it in tests Keying the object-identity dedup map on id(artifact) alone could orphan a mapping if the same instance were ever stored under two scopes (the second store overwrote the first's entry, and per-handle cleanup then skipped it). Key on (id(artifact), scope) so each scope keeps its own handle and cleanup is exact and unconditional. Also clear _handle_by_obj in the test fixture so stale id->handle mappings don't accumulate across the session. --- lib/crewai/src/crewai/tools/file_artifact.py | 24 ++++++++++---------- lib/crewai/tests/tools/test_file_artifact.py | 17 ++++++++++++++ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/lib/crewai/src/crewai/tools/file_artifact.py b/lib/crewai/src/crewai/tools/file_artifact.py index 21d641f2d..57de55a4b 100644 --- a/lib/crewai/src/crewai/tools/file_artifact.py +++ b/lib/crewai/src/crewai/tools/file_artifact.py @@ -117,7 +117,11 @@ class _ArtifactStore: def __init__(self) -> None: self._lock = threading.Lock() self._entries: dict[str, _Entry] = {} - self._handle_by_obj: dict[int, str] = {} + # (id(artifact), scope) -> handle, so re-storing the same instance under + # the same scope reuses its handle. Keying on the scope too means storing + # an object under a different scope gets its own handle and its own + # cleanup entry rather than overwriting the first. + self._handle_by_obj: dict[tuple[int, str | None], str] = {} def store( self, @@ -126,18 +130,14 @@ class _ArtifactStore: ttl: int = DEFAULT_ARTIFACT_TTL, ) -> str: norm_scope = str(scope_id) if scope_id is not None else None - obj_id = id(artifact) + obj_key = (id(artifact), norm_scope) expires_at = (time.monotonic() + ttl) if ttl > 0 else None with self._lock: self._prune_locked() - existing = self._handle_by_obj.get(obj_id) + existing = self._handle_by_obj.get(obj_key) if existing is not None: entry = self._entries.get(existing) - if ( - entry is not None - and entry.artifact is artifact - and entry.scope_id == norm_scope - ): + if entry is not None and entry.artifact is artifact: entry.expires_at = expires_at return f"{_HANDLE_SCHEME}://{existing}" handle_id = str(uuid4()) @@ -145,9 +145,9 @@ class _ArtifactStore: artifact=artifact, scope_id=norm_scope, expires_at=expires_at, - obj_id=obj_id, + obj_id=id(artifact), ) - self._handle_by_obj[obj_id] = handle_id + self._handle_by_obj[obj_key] = handle_id return f"{_HANDLE_SCHEME}://{handle_id}" def resolve(self, handle_id: str) -> FileArtifact | None: @@ -181,8 +181,8 @@ class _ArtifactStore: def _delete_locked(self, handle_id: str) -> None: """Remove an entry and its object-identity mapping. Caller holds lock.""" entry = self._entries.pop(handle_id, None) - if entry is not None and self._handle_by_obj.get(entry.obj_id) == handle_id: - del self._handle_by_obj[entry.obj_id] + if entry is not None: + self._handle_by_obj.pop((entry.obj_id, entry.scope_id), None) _store: Final[_ArtifactStore] = _ArtifactStore() diff --git a/lib/crewai/tests/tools/test_file_artifact.py b/lib/crewai/tests/tools/test_file_artifact.py index ff169e35c..0485e2d2b 100644 --- a/lib/crewai/tests/tools/test_file_artifact.py +++ b/lib/crewai/tests/tools/test_file_artifact.py @@ -26,8 +26,10 @@ _HANDLE = re.compile(r"crewai\+file://[0-9a-fA-F-]{36}") def _clear_store(): """Keep the process-local store empty between tests.""" _store._entries.clear() + _store._handle_by_obj.clear() yield _store._entries.clear() + _store._handle_by_obj.clear() def _handle_in(text: str) -> str: @@ -76,6 +78,21 @@ class TestStoreArtifact: assert h1 == h2 assert len(_store._entries) == 1 + def test_same_instance_different_scope_gets_own_handle_and_cleans_up(self) -> None: + # Storing one instance under two scopes must not orphan a mapping: + # each scope keeps its own handle, and clearing one leaves the other. + artifact = FileArtifact(data=b"x" * 100) + h_a = _handle_in(store_artifact(artifact, scope_id="A")) + h_b = _handle_in(store_artifact(artifact, scope_id="B")) + assert h_a != h_b + clear_artifact_scope("A") + assert resolve_artifact_handles(h_a) == h_a # A cleared + assert base64.b64decode(resolve_artifact_handles(h_b)) == b"x" * 100 + # No dangling object-identity mapping for the cleared scope. + assert (id(artifact), "A") not in _store._handle_by_obj + clear_artifact_scope("B") + assert _store._handle_by_obj == {} + def test_placeholder_escapes_quotes_in_metadata(self) -> None: artifact = FileArtifact(data=b"x", filename='a".pptx', mime_type='m"/x') placeholder = store_artifact(artifact)