mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-01 07:13:00 +00:00
fix: JSON checkpoint discovery now finds branch subdirectories (#5491)
_list_json() and _info_json_latest() in checkpoint_cli.py used flat globs (location/*.json) which missed checkpoints stored under branch subdirectories (e.g. main/, fork/exp1/) introduced by the fork-aware checkpoint storage. Changed both functions to use recursive globs (location/**/*.json) so they discover checkpoints in branch subdirectories as well as the legacy flat layout. Added tests covering: - Discovery in single and multiple branch subdirectories - Legacy flat layout backward compatibility - Mixed flat + branch layouts - Empty directory edge case - Latest checkpoint selection across branches Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -173,9 +173,9 @@ def _entity_summary(entities: list[dict[str, Any]]) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _list_json(location: str) -> list[dict[str, Any]]:
|
def _list_json(location: str) -> list[dict[str, Any]]:
|
||||||
pattern = os.path.join(location, "*.json")
|
pattern = os.path.join(location, "**", "*.json")
|
||||||
results = []
|
results = []
|
||||||
for path in sorted(glob.glob(pattern), key=os.path.getmtime, reverse=True):
|
for path in sorted(glob.glob(pattern, recursive=True), key=os.path.getmtime, reverse=True):
|
||||||
name = os.path.basename(path)
|
name = os.path.basename(path)
|
||||||
try:
|
try:
|
||||||
with open(path) as f:
|
with open(path) as f:
|
||||||
@@ -192,8 +192,8 @@ def _list_json(location: str) -> list[dict[str, Any]]:
|
|||||||
|
|
||||||
|
|
||||||
def _info_json_latest(location: str) -> dict[str, Any] | None:
|
def _info_json_latest(location: str) -> dict[str, Any] | None:
|
||||||
pattern = os.path.join(location, "*.json")
|
pattern = os.path.join(location, "**", "*.json")
|
||||||
files = sorted(glob.glob(pattern), key=os.path.getmtime, reverse=True)
|
files = sorted(glob.glob(pattern, recursive=True), key=os.path.getmtime, reverse=True)
|
||||||
if not files:
|
if not files:
|
||||||
return None
|
return None
|
||||||
path = files[0]
|
path = files[0]
|
||||||
|
|||||||
@@ -537,3 +537,131 @@ class TestKickoffFromCheckpoint:
|
|||||||
)
|
)
|
||||||
assert mock_restored.checkpoint.restore_from is None
|
assert mock_restored.checkpoint.restore_from is None
|
||||||
assert result == "flow_result"
|
assert result == "flow_result"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- JSON checkpoint CLI discovery (issue #5491) ----------
|
||||||
|
|
||||||
|
|
||||||
|
_MINIMAL_CHECKPOINT = json.dumps({"entities": [], "event_record": {}})
|
||||||
|
|
||||||
|
|
||||||
|
class TestListJsonBranchDiscovery:
|
||||||
|
"""Verify _list_json discovers checkpoints inside branch subdirectories."""
|
||||||
|
|
||||||
|
def _write_checkpoint(self, base: str, branch: str, name: str) -> str:
|
||||||
|
branch_dir = os.path.join(base, branch)
|
||||||
|
os.makedirs(branch_dir, exist_ok=True)
|
||||||
|
path = os.path.join(branch_dir, name)
|
||||||
|
with open(path, "w") as f:
|
||||||
|
f.write(_MINIMAL_CHECKPOINT)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def test_discovers_main_branch(self) -> None:
|
||||||
|
from crewai.cli.checkpoint_cli import _list_json
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
self._write_checkpoint(d, "main", "20260416T120000_abcd1234_p-none.json")
|
||||||
|
results = _list_json(d)
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0]["name"] == "20260416T120000_abcd1234_p-none.json"
|
||||||
|
|
||||||
|
def test_discovers_multiple_branches(self) -> None:
|
||||||
|
from crewai.cli.checkpoint_cli import _list_json
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
self._write_checkpoint(d, "main", "20260416T120000_aaaa1111_p-none.json")
|
||||||
|
self._write_checkpoint(d, "fork/exp1", "20260416T120001_bbbb2222_p-none.json")
|
||||||
|
results = _list_json(d)
|
||||||
|
assert len(results) == 2
|
||||||
|
names = {r["name"] for r in results}
|
||||||
|
assert "20260416T120000_aaaa1111_p-none.json" in names
|
||||||
|
assert "20260416T120001_bbbb2222_p-none.json" in names
|
||||||
|
|
||||||
|
def test_discovers_flat_layout(self) -> None:
|
||||||
|
"""Legacy flat layout (files directly in location/) still works."""
|
||||||
|
from crewai.cli.checkpoint_cli import _list_json
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
path = os.path.join(d, "20260416T120000_cccc3333_p-none.json")
|
||||||
|
with open(path, "w") as f:
|
||||||
|
f.write(_MINIMAL_CHECKPOINT)
|
||||||
|
results = _list_json(d)
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0]["name"] == "20260416T120000_cccc3333_p-none.json"
|
||||||
|
|
||||||
|
def test_empty_directory(self) -> None:
|
||||||
|
from crewai.cli.checkpoint_cli import _list_json
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
results = _list_json(d)
|
||||||
|
assert results == []
|
||||||
|
|
||||||
|
def test_mixed_flat_and_branch(self) -> None:
|
||||||
|
from crewai.cli.checkpoint_cli import _list_json
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
# Flat file
|
||||||
|
flat_path = os.path.join(d, "20260416T110000_flat0000_p-none.json")
|
||||||
|
with open(flat_path, "w") as f:
|
||||||
|
f.write(_MINIMAL_CHECKPOINT)
|
||||||
|
# Branch file
|
||||||
|
self._write_checkpoint(d, "main", "20260416T120000_brnc1111_p-none.json")
|
||||||
|
results = _list_json(d)
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
|
||||||
|
class TestInfoJsonLatestBranchDiscovery:
|
||||||
|
"""Verify _info_json_latest discovers the latest checkpoint across branches."""
|
||||||
|
|
||||||
|
def test_latest_from_branch_subdir(self) -> None:
|
||||||
|
from crewai.cli.checkpoint_cli import _info_json_latest
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
branch_dir = os.path.join(d, "main")
|
||||||
|
os.makedirs(branch_dir)
|
||||||
|
path = os.path.join(branch_dir, "20260416T120000_abcd1234_p-none.json")
|
||||||
|
with open(path, "w") as f:
|
||||||
|
f.write(_MINIMAL_CHECKPOINT)
|
||||||
|
result = _info_json_latest(d)
|
||||||
|
assert result is not None
|
||||||
|
assert result["name"] == "20260416T120000_abcd1234_p-none.json"
|
||||||
|
|
||||||
|
def test_latest_across_branches(self) -> None:
|
||||||
|
from crewai.cli.checkpoint_cli import _info_json_latest
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
# Older checkpoint on main
|
||||||
|
main_dir = os.path.join(d, "main")
|
||||||
|
os.makedirs(main_dir)
|
||||||
|
older = os.path.join(main_dir, "20260416T110000_aaaa1111_p-none.json")
|
||||||
|
with open(older, "w") as f:
|
||||||
|
f.write(_MINIMAL_CHECKPOINT)
|
||||||
|
time.sleep(0.05)
|
||||||
|
# Newer checkpoint on fork branch
|
||||||
|
fork_dir = os.path.join(d, "fork", "exp1")
|
||||||
|
os.makedirs(fork_dir, exist_ok=True)
|
||||||
|
newer = os.path.join(fork_dir, "20260416T120000_bbbb2222_p-none.json")
|
||||||
|
with open(newer, "w") as f:
|
||||||
|
f.write(_MINIMAL_CHECKPOINT)
|
||||||
|
result = _info_json_latest(d)
|
||||||
|
assert result is not None
|
||||||
|
assert result["name"] == "20260416T120000_bbbb2222_p-none.json"
|
||||||
|
|
||||||
|
def test_empty_returns_none(self) -> None:
|
||||||
|
from crewai.cli.checkpoint_cli import _info_json_latest
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
result = _info_json_latest(d)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_latest_flat_layout(self) -> None:
|
||||||
|
"""Legacy flat layout still returns the latest file."""
|
||||||
|
from crewai.cli.checkpoint_cli import _info_json_latest
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as d:
|
||||||
|
path = os.path.join(d, "20260416T120000_flat0000_p-none.json")
|
||||||
|
with open(path, "w") as f:
|
||||||
|
f.write(_MINIMAL_CHECKPOINT)
|
||||||
|
result = _info_json_latest(d)
|
||||||
|
assert result is not None
|
||||||
|
assert result["name"] == "20260416T120000_flat0000_p-none.json"
|
||||||
|
|||||||
Reference in New Issue
Block a user