fix: replace @lru_cache with instance-level caching in Repository.is_git_repo()

This fixes a memory leak where using @lru_cache on the is_git_repo() instance
method prevented garbage collection of Repository instances. The cache
dictionary held references to self, keeping instances alive indefinitely.

The fix replaces the @lru_cache decorator with instance-level caching using
a _is_git_repo_cache attribute. This maintains O(1) performance for repeated
calls while allowing proper garbage collection when instances go out of scope.

Fixes #4210

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-01-10 16:02:14 +00:00
parent d60f7b360d
commit a32a8955ef
2 changed files with 93 additions and 6 deletions

View File

@@ -1,10 +1,10 @@
from functools import lru_cache
import subprocess
class Repository:
def __init__(self, path: str = ".") -> None:
self.path = path
self._is_git_repo_cache: bool | None = None
if not self.is_git_installed():
raise ValueError("Git is not installed or not found in your PATH.")
@@ -40,22 +40,26 @@ class Repository:
encoding="utf-8",
).strip()
@lru_cache(maxsize=None) # noqa: B019
def is_git_repo(self) -> bool:
"""Check if the current directory is a git repository.
Notes:
- TODO: This method is cached to avoid redundant checks, but using lru_cache on methods can lead to memory leaks
The result is cached at the instance level to avoid redundant checks
while allowing proper garbage collection of Repository instances.
"""
if self._is_git_repo_cache is not None:
return self._is_git_repo_cache
try:
subprocess.check_output(
["git", "rev-parse", "--is-inside-work-tree"], # noqa: S607
cwd=self.path,
encoding="utf-8",
)
return True
self._is_git_repo_cache = True
except subprocess.CalledProcessError:
return False
self._is_git_repo_cache = False
return self._is_git_repo_cache
def has_uncommitted_changes(self) -> bool:
"""Check if the repository has uncommitted changes."""

View File

@@ -1,4 +1,8 @@
import gc
import weakref
import pytest
from crewai.cli.git import Repository
@@ -99,3 +103,82 @@ def test_origin_url(fp, repository):
stdout="https://github.com/user/repo.git\n",
)
assert repository.origin_url() == "https://github.com/user/repo.git"
def test_repository_garbage_collection(fp):
"""Test that Repository instances can be garbage collected.
This test verifies the fix for the memory leak issue where using
@lru_cache on the is_git_repo() method prevented garbage collection
of Repository instances.
"""
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
fp.register(["git", "fetch"], stdout="")
repo = Repository(path=".")
weak_ref = weakref.ref(repo)
assert weak_ref() is not None
del repo
gc.collect()
assert weak_ref() is None, (
"Repository instance was not garbage collected. "
"This indicates a memory leak, likely from @lru_cache on instance methods."
)
def test_is_git_repo_caching(fp):
"""Test that is_git_repo() result is cached at the instance level.
This verifies that the instance-level caching works correctly,
only calling the subprocess once per instance.
"""
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
fp.register(["git", "fetch"], stdout="")
repo = Repository(path=".")
assert repo._is_git_repo_cache is True
result1 = repo.is_git_repo()
result2 = repo.is_git_repo()
assert result1 is True
assert result2 is True
assert repo._is_git_repo_cache is True
def test_multiple_repository_instances_independent_caches(fp):
"""Test that multiple Repository instances have independent caches.
This verifies that the instance-level caching doesn't share state
between different Repository instances.
"""
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
fp.register(["git", "fetch"], stdout="")
fp.register(["git", "--version"], stdout="git version 2.30.0\n")
fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n")
fp.register(["git", "fetch"], stdout="")
repo1 = Repository(path=".")
repo2 = Repository(path=".")
assert repo1._is_git_repo_cache is True
assert repo2._is_git_repo_cache is True
assert repo1._is_git_repo_cache is not repo2._is_git_repo_cache or (
repo1._is_git_repo_cache == repo2._is_git_repo_cache
)
weak_ref1 = weakref.ref(repo1)
del repo1
gc.collect()
assert weak_ref1() is None
assert repo2._is_git_repo_cache is True