From a32a8955ef88ba3ce059cba36c15aa5af8c8b700 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 10 Jan 2026 16:02:14 +0000 Subject: [PATCH] fix: replace @lru_cache with instance-level caching in Repository.is_git_repo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a memory leak where using @lru_cache on the is_git_repo() instance method prevented garbage collection of Repository instances. The cache dictionary held references to self, keeping instances alive indefinitely. The fix replaces the @lru_cache decorator with instance-level caching using a _is_git_repo_cache attribute. This maintains O(1) performance for repeated calls while allowing proper garbage collection when instances go out of scope. Fixes #4210 Co-Authored-By: João --- lib/crewai/src/crewai/cli/git.py | 16 +++--- lib/crewai/tests/cli/test_git.py | 83 ++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 6 deletions(-) diff --git a/lib/crewai/src/crewai/cli/git.py b/lib/crewai/src/crewai/cli/git.py index fb08c391a..d0f3bd3d4 100644 --- a/lib/crewai/src/crewai/cli/git.py +++ b/lib/crewai/src/crewai/cli/git.py @@ -1,10 +1,10 @@ -from functools import lru_cache import subprocess class Repository: def __init__(self, path: str = ".") -> None: self.path = path + self._is_git_repo_cache: bool | None = None if not self.is_git_installed(): raise ValueError("Git is not installed or not found in your PATH.") @@ -40,22 +40,26 @@ class Repository: encoding="utf-8", ).strip() - @lru_cache(maxsize=None) # noqa: B019 def is_git_repo(self) -> bool: """Check if the current directory is a git repository. - Notes: - - TODO: This method is cached to avoid redundant checks, but using lru_cache on methods can lead to memory leaks + The result is cached at the instance level to avoid redundant checks + while allowing proper garbage collection of Repository instances. """ + if self._is_git_repo_cache is not None: + return self._is_git_repo_cache + try: subprocess.check_output( ["git", "rev-parse", "--is-inside-work-tree"], # noqa: S607 cwd=self.path, encoding="utf-8", ) - return True + self._is_git_repo_cache = True except subprocess.CalledProcessError: - return False + self._is_git_repo_cache = False + + return self._is_git_repo_cache def has_uncommitted_changes(self) -> bool: """Check if the repository has uncommitted changes.""" diff --git a/lib/crewai/tests/cli/test_git.py b/lib/crewai/tests/cli/test_git.py index b77106d3f..6bd0b32a5 100644 --- a/lib/crewai/tests/cli/test_git.py +++ b/lib/crewai/tests/cli/test_git.py @@ -1,4 +1,8 @@ +import gc +import weakref + import pytest + from crewai.cli.git import Repository @@ -99,3 +103,82 @@ def test_origin_url(fp, repository): stdout="https://github.com/user/repo.git\n", ) assert repository.origin_url() == "https://github.com/user/repo.git" + + +def test_repository_garbage_collection(fp): + """Test that Repository instances can be garbage collected. + + This test verifies the fix for the memory leak issue where using + @lru_cache on the is_git_repo() method prevented garbage collection + of Repository instances. + """ + fp.register(["git", "--version"], stdout="git version 2.30.0\n") + fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n") + fp.register(["git", "fetch"], stdout="") + + repo = Repository(path=".") + weak_ref = weakref.ref(repo) + + assert weak_ref() is not None + + del repo + gc.collect() + + assert weak_ref() is None, ( + "Repository instance was not garbage collected. " + "This indicates a memory leak, likely from @lru_cache on instance methods." + ) + + +def test_is_git_repo_caching(fp): + """Test that is_git_repo() result is cached at the instance level. + + This verifies that the instance-level caching works correctly, + only calling the subprocess once per instance. + """ + fp.register(["git", "--version"], stdout="git version 2.30.0\n") + fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n") + fp.register(["git", "fetch"], stdout="") + + repo = Repository(path=".") + + assert repo._is_git_repo_cache is True + + result1 = repo.is_git_repo() + result2 = repo.is_git_repo() + + assert result1 is True + assert result2 is True + assert repo._is_git_repo_cache is True + + +def test_multiple_repository_instances_independent_caches(fp): + """Test that multiple Repository instances have independent caches. + + This verifies that the instance-level caching doesn't share state + between different Repository instances. + """ + fp.register(["git", "--version"], stdout="git version 2.30.0\n") + fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n") + fp.register(["git", "fetch"], stdout="") + + fp.register(["git", "--version"], stdout="git version 2.30.0\n") + fp.register(["git", "rev-parse", "--is-inside-work-tree"], stdout="true\n") + fp.register(["git", "fetch"], stdout="") + + repo1 = Repository(path=".") + repo2 = Repository(path=".") + + assert repo1._is_git_repo_cache is True + assert repo2._is_git_repo_cache is True + + assert repo1._is_git_repo_cache is not repo2._is_git_repo_cache or ( + repo1._is_git_repo_cache == repo2._is_git_repo_cache + ) + + weak_ref1 = weakref.ref(repo1) + del repo1 + gc.collect() + + assert weak_ref1() is None + assert repo2._is_git_repo_cache is True