mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-13 01:58:30 +00:00
fix: replace @lru_cache with instance-level caching in Repository.is_git_repo()
This fixes a memory leak where using @lru_cache on the is_git_repo() instance method prevented garbage collection of Repository instances. The cache dictionary held references to self, keeping instances alive indefinitely. The fix replaces the @lru_cache decorator with instance-level caching using a _is_git_repo_cache attribute. This maintains O(1) performance for repeated calls while allowing proper garbage collection when instances go out of scope. Fixes #4210 Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -1,10 +1,10 @@
|
||||
from functools import lru_cache
|
||||
import subprocess
|
||||
|
||||
|
||||
class Repository:
|
||||
def __init__(self, path: str = ".") -> None:
|
||||
self.path = path
|
||||
self._is_git_repo_cache: bool | None = None
|
||||
|
||||
if not self.is_git_installed():
|
||||
raise ValueError("Git is not installed or not found in your PATH.")
|
||||
@@ -40,22 +40,26 @@ class Repository:
|
||||
encoding="utf-8",
|
||||
).strip()
|
||||
|
||||
@lru_cache(maxsize=None) # noqa: B019
|
||||
def is_git_repo(self) -> bool:
|
||||
"""Check if the current directory is a git repository.
|
||||
|
||||
Notes:
|
||||
- TODO: This method is cached to avoid redundant checks, but using lru_cache on methods can lead to memory leaks
|
||||
The result is cached at the instance level to avoid redundant checks
|
||||
while allowing proper garbage collection of Repository instances.
|
||||
"""
|
||||
if self._is_git_repo_cache is not None:
|
||||
return self._is_git_repo_cache
|
||||
|
||||
try:
|
||||
subprocess.check_output(
|
||||
["git", "rev-parse", "--is-inside-work-tree"], # noqa: S607
|
||||
cwd=self.path,
|
||||
encoding="utf-8",
|
||||
)
|
||||
return True
|
||||
self._is_git_repo_cache = True
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
self._is_git_repo_cache = False
|
||||
|
||||
return self._is_git_repo_cache
|
||||
|
||||
def has_uncommitted_changes(self) -> bool:
|
||||
"""Check if the repository has uncommitted changes."""
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import gc
|
||||
import weakref
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.cli.git import Repository
|
||||
|
||||
|
||||
@@ -99,3 +103,82 @@ def test_origin_url(fp, repository):
|
||||
stdout="https://github.com/user/repo.git\n",
|
||||
)
|
||||
assert repository.origin_url() == "https://github.com/user/repo.git"
|
||||
|
||||
|
||||
def test_repository_garbage_collection(fp):
|
||||
"""Test that Repository instances can be garbage collected.
|
||||
|
||||
This test verifies the fix for the memory leak issue where using
|
||||
@lru_cache on the is_git_repo() method prevented garbage collection
|
||||
of Repository instances.
|
||||
"""
|
||||
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
|
||||
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
|
||||
fp.register(["git", "fetch"], stdout="")
|
||||
|
||||
repo = Repository(path=".")
|
||||
weak_ref = weakref.ref(repo)
|
||||
|
||||
assert weak_ref() is not None
|
||||
|
||||
del repo
|
||||
gc.collect()
|
||||
|
||||
assert weak_ref() is None, (
|
||||
"Repository instance was not garbage collected. "
|
||||
"This indicates a memory leak, likely from @lru_cache on instance methods."
|
||||
)
|
||||
|
||||
|
||||
def test_is_git_repo_caching(fp):
|
||||
"""Test that is_git_repo() result is cached at the instance level.
|
||||
|
||||
This verifies that the instance-level caching works correctly,
|
||||
only calling the subprocess once per instance.
|
||||
"""
|
||||
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
|
||||
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
|
||||
fp.register(["git", "fetch"], stdout="")
|
||||
|
||||
repo = Repository(path=".")
|
||||
|
||||
assert repo._is_git_repo_cache is True
|
||||
|
||||
result1 = repo.is_git_repo()
|
||||
result2 = repo.is_git_repo()
|
||||
|
||||
assert result1 is True
|
||||
assert result2 is True
|
||||
assert repo._is_git_repo_cache is True
|
||||
|
||||
|
||||
def test_multiple_repository_instances_independent_caches(fp):
|
||||
"""Test that multiple Repository instances have independent caches.
|
||||
|
||||
This verifies that the instance-level caching doesn't share state
|
||||
between different Repository instances.
|
||||
"""
|
||||
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
|
||||
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
|
||||
fp.register(["git", "fetch"], stdout="")
|
||||
|
||||
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
|
||||
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
|
||||
fp.register(["git", "fetch"], stdout="")
|
||||
|
||||
repo1 = Repository(path=".")
|
||||
repo2 = Repository(path=".")
|
||||
|
||||
assert repo1._is_git_repo_cache is True
|
||||
assert repo2._is_git_repo_cache is True
|
||||
|
||||
assert repo1._is_git_repo_cache is not repo2._is_git_repo_cache or (
|
||||
repo1._is_git_repo_cache == repo2._is_git_repo_cache
|
||||
)
|
||||
|
||||
weak_ref1 = weakref.ref(repo1)
|
||||
del repo1
|
||||
gc.collect()
|
||||
|
||||
assert weak_ref1() is None
|
||||
assert repo2._is_git_repo_cache is True
|
||||
|
||||
Reference in New Issue
Block a user