Compare commits

..

1 Commits

Author SHA1 Message Date
Devin AI
a32a8955ef fix: replace @lru_cache with instance-level caching in Repository.is_git_repo()
This fixes a memory leak where using @lru_cache on the is_git_repo() instance
method prevented garbage collection of Repository instances. The cache
dictionary held references to self, keeping instances alive indefinitely.

The fix replaces the @lru_cache decorator with instance-level caching using
a _is_git_repo_cache attribute. This maintains O(1) performance for repeated
calls while allowing proper garbage collection when instances go out of scope.

Fixes #4210

Co-Authored-By: João <joao@crewai.com>
2026-01-10 16:02:14 +00:00
4 changed files with 111 additions and 292 deletions

View File

@@ -1,10 +1,10 @@
from functools import lru_cache
import subprocess
class Repository:
def __init__(self, path: str = ".") -> None:
self.path = path
self._is_git_repo_cache: bool | None = None
if not self.is_git_installed():
raise ValueError("Git is not installed or not found in your PATH.")
@@ -40,22 +40,26 @@ class Repository:
encoding="utf-8",
).strip()
@lru_cache(maxsize=None) # noqa: B019
def is_git_repo(self) -> bool:
"""Check if the current directory is a git repository.
Notes:
- TODO: This method is cached to avoid redundant checks, but using lru_cache on methods can lead to memory leaks
The result is cached at the instance level to avoid redundant checks
while allowing proper garbage collection of Repository instances.
"""
if self._is_git_repo_cache is not None:
return self._is_git_repo_cache
try:
subprocess.check_output(
["git", "rev-parse", "--is-inside-work-tree"], # noqa: S607
cwd=self.path,
encoding="utf-8",
)
return True
self._is_git_repo_cache = True
except subprocess.CalledProcessError:
return False
self._is_git_repo_cache = False
return self._is_git_repo_cache
def has_uncommitted_changes(self) -> bool:
"""Check if the repository has uncommitted changes."""

View File

@@ -2,11 +2,8 @@ from datetime import datetime
import json
import os
import pickle
import tempfile
import threading
from typing import Any, TypedDict
import portalocker
from typing_extensions import Unpack
@@ -126,15 +123,10 @@ class FileHandler:
class PickleHandler:
"""Thread-safe handler for saving and loading data using pickle.
This class provides thread-safe file operations using portalocker for
cross-process file locking and atomic write operations to prevent
data corruption during concurrent access.
"""Handler for saving and loading data using pickle.
Attributes:
file_path: The path to the pickle file.
_lock: Threading lock for thread-safe operations within the same process.
"""
def __init__(self, file_name: str) -> None:
@@ -149,62 +141,34 @@ class PickleHandler:
file_name += ".pkl"
self.file_path = os.path.join(os.getcwd(), file_name)
self._lock = threading.Lock()
def initialize_file(self) -> None:
"""Initialize the file with an empty dictionary and overwrite any existing data."""
self.save({})
def save(self, data: Any) -> None:
"""Save the data to the specified file using pickle with thread-safe atomic writes.
This method uses a two-phase approach for thread safety:
1. Threading lock for same-process thread safety
2. Atomic write (write to temp file, then rename) for cross-process safety
and data integrity
"""
Save the data to the specified file using pickle.
Args:
data: The data to be saved to the file.
data: The data to be saved to the file.
"""
with self._lock:
dir_name = os.path.dirname(self.file_path) or os.getcwd()
fd, temp_path = tempfile.mkstemp(
suffix=".pkl.tmp", prefix="pickle_", dir=dir_name
)
try:
with os.fdopen(fd, "wb") as f:
pickle.dump(obj=data, file=f)
f.flush()
os.fsync(f.fileno())
os.replace(temp_path, self.file_path)
except Exception:
if os.path.exists(temp_path):
os.unlink(temp_path)
raise
with open(self.file_path, "wb") as f:
pickle.dump(obj=data, file=f)
def load(self) -> Any:
"""Load the data from the specified file using pickle with thread-safe locking.
This method uses portalocker for cross-process read locking to ensure
data consistency when multiple processes may be accessing the file.
"""Load the data from the specified file using pickle.
Returns:
The data loaded from the file, or an empty dictionary if the file
does not exist or is empty.
The data loaded from the file.
"""
with self._lock:
if (
not os.path.exists(self.file_path)
or os.path.getsize(self.file_path) == 0
):
return {}
if not os.path.exists(self.file_path) or os.path.getsize(self.file_path) == 0:
return {} # Return an empty dictionary if the file does not exist or is empty
with portalocker.Lock(
self.file_path, "rb", flags=portalocker.LOCK_SH
) as file:
try:
return pickle.load(file) # noqa: S301
except EOFError:
return {}
except Exception:
raise
with open(self.file_path, "rb") as file:
try:
return pickle.load(file) # noqa: S301
except EOFError:
return {} # Return an empty dictionary if the file is empty or corrupted
except Exception:
raise # Raise any other exceptions that occur during loading

View File

@@ -1,4 +1,8 @@
import gc
import weakref
import pytest
from crewai.cli.git import Repository
@@ -99,3 +103,82 @@ def test_origin_url(fp, repository):
stdout="https://github.com/user/repo.git\n",
)
assert repository.origin_url() == "https://github.com/user/repo.git"
def test_repository_garbage_collection(fp):
"""Test that Repository instances can be garbage collected.
This test verifies the fix for the memory leak issue where using
@lru_cache on the is_git_repo() method prevented garbage collection
of Repository instances.
"""
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
fp.register(["git", "fetch"], stdout="")
repo = Repository(path=".")
weak_ref = weakref.ref(repo)
assert weak_ref() is not None
del repo
gc.collect()
assert weak_ref() is None, (
"Repository instance was not garbage collected. "
"This indicates a memory leak, likely from @lru_cache on instance methods."
)
def test_is_git_repo_caching(fp):
"""Test that is_git_repo() result is cached at the instance level.
This verifies that the instance-level caching works correctly,
only calling the subprocess once per instance.
"""
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
fp.register(["git", "fetch"], stdout="")
repo = Repository(path=".")
assert repo._is_git_repo_cache is True
result1 = repo.is_git_repo()
result2 = repo.is_git_repo()
assert result1 is True
assert result2 is True
assert repo._is_git_repo_cache is True
def test_multiple_repository_instances_independent_caches(fp):
"""Test that multiple Repository instances have independent caches.
This verifies that the instance-level caching doesn't share state
between different Repository instances.
"""
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
fp.register(["git", "fetch"], stdout="")
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
fp.register(["git", "fetch"], stdout="")
repo1 = Repository(path=".")
repo2 = Repository(path=".")
assert repo1._is_git_repo_cache is True
assert repo2._is_git_repo_cache is True
assert repo1._is_git_repo_cache is not repo2._is_git_repo_cache or (
repo1._is_git_repo_cache == repo2._is_git_repo_cache
)
weak_ref1 = weakref.ref(repo1)
del repo1
gc.collect()
assert weak_ref1() is None
assert repo2._is_git_repo_cache is True

View File

@@ -1,8 +1,6 @@
import os
import threading
import unittest
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest
from crewai.utilities.file_handler import PickleHandler
@@ -10,6 +8,7 @@ from crewai.utilities.file_handler import PickleHandler
class TestPickleHandler(unittest.TestCase):
def setUp(self):
# Use a unique file name for each test to avoid race conditions in parallel test execution
unique_id = str(uuid.uuid4())
self.file_name = f"test_data_{unique_id}.pkl"
self.file_path = os.path.join(os.getcwd(), self.file_name)
@@ -48,234 +47,3 @@ class TestPickleHandler(unittest.TestCase):
assert str(exc.value) == "pickle data was truncated"
assert "<class '_pickle.UnpicklingError'>" == str(exc.type)
class TestPickleHandlerThreadSafety(unittest.TestCase):
"""Tests for thread-safety of PickleHandler operations."""
def setUp(self):
unique_id = str(uuid.uuid4())
self.file_name = f"test_thread_safe_{unique_id}.pkl"
self.file_path = os.path.join(os.getcwd(), self.file_name)
self.handler = PickleHandler(self.file_name)
def tearDown(self):
if os.path.exists(self.file_path):
os.remove(self.file_path)
def test_concurrent_writes_same_handler(self):
"""Test that concurrent writes from multiple threads using the same handler don't corrupt data."""
num_threads = 10
num_writes_per_thread = 20
errors: list[Exception] = []
write_count = 0
count_lock = threading.Lock()
def write_data(thread_id: int) -> None:
nonlocal write_count
for i in range(num_writes_per_thread):
try:
data = {"thread": thread_id, "iteration": i, "data": f"value_{thread_id}_{i}"}
self.handler.save(data)
with count_lock:
write_count += 1
except Exception as e:
errors.append(e)
threads = []
for i in range(num_threads):
t = threading.Thread(target=write_data, args=(i,))
threads.append(t)
t.start()
for t in threads:
t.join()
assert len(errors) == 0, f"Errors occurred during concurrent writes: {errors}"
assert write_count == num_threads * num_writes_per_thread
loaded_data = self.handler.load()
assert isinstance(loaded_data, dict)
assert "thread" in loaded_data
assert "iteration" in loaded_data
def test_concurrent_reads_same_handler(self):
"""Test that concurrent reads from multiple threads don't cause issues."""
test_data = {"key": "value", "nested": {"a": 1, "b": 2}}
self.handler.save(test_data)
num_threads = 20
results: list[dict] = []
errors: list[Exception] = []
results_lock = threading.Lock()
def read_data() -> None:
try:
data = self.handler.load()
with results_lock:
results.append(data)
except Exception as e:
errors.append(e)
threads = []
for _ in range(num_threads):
t = threading.Thread(target=read_data)
threads.append(t)
t.start()
for t in threads:
t.join()
assert len(errors) == 0, f"Errors occurred during concurrent reads: {errors}"
assert len(results) == num_threads
for result in results:
assert result == test_data
def test_concurrent_read_write_same_handler(self):
"""Test that concurrent reads and writes don't corrupt data or cause errors."""
initial_data = {"counter": 0}
self.handler.save(initial_data)
num_writers = 5
num_readers = 10
writes_per_thread = 10
reads_per_thread = 20
write_errors: list[Exception] = []
read_errors: list[Exception] = []
read_results: list[dict] = []
results_lock = threading.Lock()
def writer(thread_id: int) -> None:
for i in range(writes_per_thread):
try:
data = {"writer": thread_id, "write_num": i}
self.handler.save(data)
except Exception as e:
write_errors.append(e)
def reader() -> None:
for _ in range(reads_per_thread):
try:
data = self.handler.load()
with results_lock:
read_results.append(data)
except Exception as e:
read_errors.append(e)
threads = []
for i in range(num_writers):
t = threading.Thread(target=writer, args=(i,))
threads.append(t)
for _ in range(num_readers):
t = threading.Thread(target=reader)
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
assert len(write_errors) == 0, f"Write errors: {write_errors}"
assert len(read_errors) == 0, f"Read errors: {read_errors}"
for result in read_results:
assert isinstance(result, dict)
def test_atomic_write_no_partial_data(self):
"""Test that atomic writes prevent partial/corrupted data from being read."""
large_data = {"key": "x" * 100000, "numbers": list(range(10000))}
num_iterations = 50
errors: list[Exception] = []
corruption_detected = False
corruption_lock = threading.Lock()
def writer() -> None:
for _ in range(num_iterations):
try:
self.handler.save(large_data)
except Exception as e:
errors.append(e)
def reader() -> None:
nonlocal corruption_detected
for _ in range(num_iterations * 2):
try:
data = self.handler.load()
if data and data != {} and data != large_data:
with corruption_lock:
corruption_detected = True
except Exception as e:
errors.append(e)
writer_thread = threading.Thread(target=writer)
reader_thread = threading.Thread(target=reader)
writer_thread.start()
reader_thread.start()
writer_thread.join()
reader_thread.join()
assert len(errors) == 0, f"Errors occurred: {errors}"
assert not corruption_detected, "Partial/corrupted data was read"
def test_thread_pool_concurrent_operations(self):
"""Test thread safety using ThreadPoolExecutor for more realistic concurrent access."""
num_operations = 100
errors: list[Exception] = []
def operation(op_id: int) -> str:
try:
if op_id % 3 == 0:
self.handler.save({"op_id": op_id, "type": "write"})
return f"write_{op_id}"
else:
data = self.handler.load()
return f"read_{op_id}_{type(data).__name__}"
except Exception as e:
errors.append(e)
return f"error_{op_id}"
with ThreadPoolExecutor(max_workers=20) as executor:
futures = [executor.submit(operation, i) for i in range(num_operations)]
results = [f.result() for f in as_completed(futures)]
assert len(errors) == 0, f"Errors occurred: {errors}"
assert len(results) == num_operations
def test_multiple_handlers_same_file(self):
"""Test that multiple PickleHandler instances for the same file work correctly."""
handler1 = PickleHandler(self.file_name)
handler2 = PickleHandler(self.file_name)
num_operations = 50
errors: list[Exception] = []
def use_handler1() -> None:
for i in range(num_operations):
try:
handler1.save({"handler": 1, "iteration": i})
except Exception as e:
errors.append(e)
def use_handler2() -> None:
for i in range(num_operations):
try:
handler2.save({"handler": 2, "iteration": i})
except Exception as e:
errors.append(e)
t1 = threading.Thread(target=use_handler1)
t2 = threading.Thread(target=use_handler2)
t1.start()
t2.start()
t1.join()
t2.join()
assert len(errors) == 0, f"Errors occurred: {errors}"
final_data = self.handler.load()
assert isinstance(final_data, dict)
assert "handler" in final_data
assert "iteration" in final_data