From 385507667026bb721b08d2749165c5df9b0e7073 Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Tue, 5 May 2026 14:45:52 -0700 Subject: [PATCH] self improve with skills --- lib/crewai/src/crewai/agent/core.py | 20 +++-- lib/crewai/src/crewai/cli/skills_proposals.py | 32 ++++---- .../crewai/skills/self_improve/acceptance.py | 17 +++- .../crewai/skills/self_improve/collector.py | 30 ++++--- .../src/crewai/skills/self_improve/models.py | 42 +++++++++- .../crewai/skills/self_improve/reviewer.py | 14 +++- .../src/crewai/skills/self_improve/storage.py | 11 ++- .../skills/self_improve/test_acceptance.py | 78 +++++++++++++++++++ .../tests/skills/self_improve/test_models.py | 24 ++++++ .../skills/self_improve/test_reviewer.py | 16 ++++ 10 files changed, 238 insertions(+), 46 deletions(-) diff --git a/lib/crewai/src/crewai/agent/core.py b/lib/crewai/src/crewai/agent/core.py index d3555ff3a..5ffdaae09 100644 --- a/lib/crewai/src/crewai/agent/core.py +++ b/lib/crewai/src/crewai/agent/core.py @@ -378,14 +378,6 @@ class Agent(BaseAgent): collector.attach(crewai_event_bus) self._self_improve_collector = collector - def _self_improve_config(self) -> SelfImprovementConfig | None: - """Return the active SelfImprovementConfig, or None when disabled.""" - if not self.self_improve: - return None - if isinstance(self.self_improve, SelfImprovementConfig): - return self.self_improve - return SelfImprovementConfig() - if self.reasoning and self.planning_config is None: warnings.warn( "The 'reasoning' parameter is deprecated. Use 'planning_config=PlanningConfig()' instead.", @@ -398,6 +390,14 @@ class Agent(BaseAgent): return self + def _self_improve_config(self) -> SelfImprovementConfig | None: + """Return the active SelfImprovementConfig, or None when disabled.""" + if not self.self_improve: + return None + if isinstance(self.self_improve, SelfImprovementConfig): + return self.self_improve + return SelfImprovementConfig() + @property def planning_enabled(self) -> bool: """Check if planning is enabled for this agent.""" @@ -464,9 +464,7 @@ class Agent(BaseAgent): else: candidate = SkillStore().role_dir(self.role) if candidate.is_dir() and any( - (c / "SKILL.md").is_file() - for c in candidate.iterdir() - if c.is_dir() + (c / "SKILL.md").is_file() for c in candidate.iterdir() if c.is_dir() ): self_improve_dir = candidate diff --git a/lib/crewai/src/crewai/cli/skills_proposals.py b/lib/crewai/src/crewai/cli/skills_proposals.py index 7784f8d49..937128390 100644 --- a/lib/crewai/src/crewai/cli/skills_proposals.py +++ b/lib/crewai/src/crewai/cli/skills_proposals.py @@ -4,6 +4,7 @@ from __future__ import annotations from collections import defaultdict from pathlib import Path +from typing import TYPE_CHECKING import click @@ -16,18 +17,16 @@ from crewai.skills.self_improve import ( ) +if TYPE_CHECKING: + from crewai.skills.self_improve.models import RunTrace, SkillProposal + + _DEFAULT_REVIEW_MODEL = "anthropic/claude-haiku-4-5" -def _print_proposal_summary(p) -> None: - kind = ( - f"PATCH→{p.target_skill}" - if p.proposal_kind == "patch_existing" - else "NEW" - ) - click.echo( - f" {p.id} {kind:<28} conf={p.confidence:.2f} role={p.agent_role}" - ) +def _print_proposal_summary(p: SkillProposal) -> None: + kind = f"PATCH→{p.target_skill}" if p.proposal_kind == "patch_existing" else "NEW" + click.echo(f" {p.id} {kind:<28} conf={p.confidence:.2f} role={p.agent_role}") click.echo(f" {p.name}: {p.description}") @@ -76,7 +75,7 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) - raise SystemExit(1) # Group traces by role from disk; the role-slug dirs come from the store. - by_role: dict[str, list] = defaultdict(list) + by_role: dict[str, list[RunTrace]] = defaultdict(list) role_dirs = ( [trace_store.role_dir(role)] if role else sorted(trace_store.root.iterdir()) ) @@ -88,7 +87,9 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) - by_role[t.agent_role].append(t) if not by_role: - click.echo("No traces found." if role is None else f"No traces for role={role!r}.") + click.echo( + "No traces found." if role is None else f"No traces for role={role!r}." + ) return reviewer_llm = LLM(model=model) @@ -97,8 +98,7 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) - for agent_role, traces in by_role.items(): if len(traces) < min_traces: click.echo( - f"Skipping {agent_role!r}: {len(traces)} trace(s), " - f"need {min_traces}." + f"Skipping {agent_role!r}: {len(traces)} trace(s), need {min_traces}." ) continue @@ -147,8 +147,6 @@ def proposals() -> None: def proposals_list(role: str | None) -> None: """List pending proposals across all roles.""" store = ProposalStore() - items = store.list_for_role(role) if role else None - if role: records = [store.load(p) for p in store.list_for_role(role)] else: @@ -190,7 +188,9 @@ def proposals_show(proposal_id: str) -> None: @proposals.command(name="accept") @click.argument("proposal_id") -@click.option("--force", is_flag=True, help="Overwrite an existing skill of the same name.") +@click.option( + "--force", is_flag=True, help="Overwrite an existing skill of the same name." +) @click.option( "--skills-dir", type=click.Path(file_okay=False, path_type=Path), diff --git a/lib/crewai/src/crewai/skills/self_improve/acceptance.py b/lib/crewai/src/crewai/skills/self_improve/acceptance.py index a3433488a..513d88995 100644 --- a/lib/crewai/src/crewai/skills/self_improve/acceptance.py +++ b/lib/crewai/src/crewai/skills/self_improve/acceptance.py @@ -47,10 +47,18 @@ def accept_proposal( ) -> Path: """Write the proposal as a SKILL.md and remove it from the queue. + When ``skill_store`` is not provided, the destination is selected in + this order: + + 1. ``proposal.skills_dir`` — set by the reviewer from the agent's + ``SelfImprovementConfig.skills_dir``. This is the common case; it + keeps accept aligned with where the agent reads from. + 2. Platform default — ``/self_improve/skills/``. + Args: proposal: The proposal to materialize. force: When True, overwrite an existing SKILL.md at the target path. - skill_store: Override for the live-skills store (test injection). + skill_store: Explicit override; bypasses the proposal hint. proposal_store: Override for the proposals store (test injection). Returns: @@ -60,7 +68,12 @@ def accept_proposal( FileExistsError: When a SKILL.md already exists at the target and ``force=False``. """ - skill_store = skill_store or SkillStore() + if skill_store is None: + skill_store = ( + SkillStore(skills_root=proposal.skills_dir) + if proposal.skills_dir is not None + else SkillStore() + ) proposal_store = proposal_store or ProposalStore() target_dir = skill_store.skill_dir(proposal.agent_role, proposal.name) diff --git a/lib/crewai/src/crewai/skills/self_improve/collector.py b/lib/crewai/src/crewai/skills/self_improve/collector.py index e3a41d407..6d62c0a24 100644 --- a/lib/crewai/src/crewai/skills/self_improve/collector.py +++ b/lib/crewai/src/crewai/skills/self_improve/collector.py @@ -10,6 +10,7 @@ from __future__ import annotations import atexit from datetime import UTC, datetime import logging +from pathlib import Path import threading from typing import TYPE_CHECKING, Any @@ -127,6 +128,7 @@ class TraceCollector: agent_id=str(getattr(self._agent, "id", "") or ""), agent_role=self._agent.role, agent_goal=getattr(self._agent, "goal", "") or "", + agent_skills_dir=self._agent_skills_dir(), task_id=str(getattr(task, "id", "") or "") or None, task_description=getattr(task, "description", None), loaded_skills=self._collect_loaded_skills(), @@ -187,9 +189,7 @@ class TraceCollector: with self._lock: if self._current is None: return - self._current.output_summary = _truncate( - event.output, _OUTPUT_TRUNCATE - ) + self._current.output_summary = _truncate(event.output, _OUTPUT_TRUNCATE) self._finalize_locked() @bus.on(AgentExecutionErrorEvent) @@ -224,6 +224,7 @@ class TraceCollector: agent_id=str(getattr(self._agent, "id", "") or ""), agent_role=self._agent.role, agent_goal=getattr(self._agent, "goal", "") or "", + agent_skills_dir=self._agent_skills_dir(), task_description=_truncate(task_desc, _OUTPUT_TRUNCATE), loaded_skills=self._collect_loaded_skills(), ) @@ -237,15 +238,11 @@ class TraceCollector: with self._lock: if self._current is None: return - self._current.output_summary = _truncate( - event.output, _OUTPUT_TRUNCATE - ) + self._current.output_summary = _truncate(event.output, _OUTPUT_TRUNCATE) self._finalize_locked() @bus.on(LiteAgentExecutionErrorEvent) - def _on_lite_error( - _source: Any, event: LiteAgentExecutionErrorEvent - ) -> None: + def _on_lite_error(_source: Any, event: LiteAgentExecutionErrorEvent) -> None: if not self._is_my_id(event.agent_info.get("id")): return with self._lock: @@ -260,6 +257,21 @@ class TraceCollector: return None return max(0, int((finished_at - started).total_seconds() * 1000)) + def _agent_skills_dir(self) -> Path | None: + """Read skills_dir from the agent's SelfImprovementConfig if set. + + We use ``getattr`` + duck typing instead of importing Agent so the + collector stays usable in tests with stub agents. + """ + config_getter = getattr(self._agent, "_self_improve_config", None) + if not callable(config_getter): + return None + try: + config = config_getter() + except Exception: + return None + return getattr(config, "skills_dir", None) if config is not None else None + def _collect_loaded_skills(self) -> list[str]: skills = getattr(self._agent, "skills", None) or [] names: list[str] = [] diff --git a/lib/crewai/src/crewai/skills/self_improve/models.py b/lib/crewai/src/crewai/skills/self_improve/models.py index e367ad7bb..642c6f4ac 100644 --- a/lib/crewai/src/crewai/skills/self_improve/models.py +++ b/lib/crewai/src/crewai/skills/self_improve/models.py @@ -4,10 +4,29 @@ from __future__ import annotations from datetime import UTC, datetime from pathlib import Path +import re from typing import Literal import uuid -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, field_validator + + +_SLUG_NON_ALNUM = re.compile(r"[^a-z0-9-]+") +_SLUG_DASHES = re.compile(r"-+") + + +def _slugify(value: str) -> str: + """Force a string into the kebab-case shape the skill loader requires. + + The reviewer LLM is told to emit kebab-case names, but doesn't always + comply (e.g. it emits Title Case with spaces). Without this normalization + the accepted SKILL.md fails the loader's name pattern and is silently + skipped, breaking the closed loop. + """ + s = value.strip().lower().replace("_", "-").replace(" ", "-") + s = _SLUG_NON_ALNUM.sub("", s) + s = _SLUG_DASHES.sub("-", s).strip("-") + return s[:64] or "skill" def _now() -> datetime: @@ -72,6 +91,14 @@ class RunTrace(BaseModel): agent_id: str | None = None agent_role: str agent_goal: str = "" + agent_skills_dir: Path | None = Field( + default=None, + description=( + "Resolved skills_dir from the agent's SelfImprovementConfig at " + "trace time. Carried into proposals so accept writes back to " + "the same place the agent reads from." + ), + ) task_id: str | None = None task_description: str | None = None started_at: datetime = Field(default_factory=_now) @@ -113,4 +140,17 @@ class SkillProposal(BaseModel): proposal_kind: ProposalKind = "new" target_skill: str | None = None derived_from_runs: list[str] = Field(default_factory=list) + skills_dir: Path | None = Field( + default=None, + description=( + "Where to write the SKILL.md when this proposal is accepted, " + "carried over from the agent's SelfImprovementConfig at trace " + "time. Falls back to platform default when None." + ), + ) created_at: datetime = Field(default_factory=_now) + + @field_validator("name", mode="before") + @classmethod + def _force_slug(cls, v: str | None) -> str | None: + return _slugify(v) if isinstance(v, str) else v diff --git a/lib/crewai/src/crewai/skills/self_improve/reviewer.py b/lib/crewai/src/crewai/skills/self_improve/reviewer.py index 2be92f64a..0558771d7 100644 --- a/lib/crewai/src/crewai/skills/self_improve/reviewer.py +++ b/lib/crewai/src/crewai/skills/self_improve/reviewer.py @@ -199,9 +199,21 @@ class SkillReviewer: return [] run_ids = [t.id for t in traces] + # Carry the agent's skills_dir from the latest trace forward so the + # accept step writes to the same path the agent reads from. We pick + # the *last* trace's value because configuration drift (a user + # changing skills_dir between runs) should land at the most recent. + skills_dir = next( + (t.agent_skills_dir for t in reversed(traces) if t.agent_skills_dir), + None, + ) return [ p.model_copy( - update={"agent_role": self.agent_role, "derived_from_runs": run_ids} + update={ + "agent_role": self.agent_role, + "derived_from_runs": run_ids, + "skills_dir": skills_dir, + } ) for p in result.proposals if p.confidence >= self.confidence_floor diff --git a/lib/crewai/src/crewai/skills/self_improve/storage.py b/lib/crewai/src/crewai/skills/self_improve/storage.py index 595388cbd..67430500d 100644 --- a/lib/crewai/src/crewai/skills/self_improve/storage.py +++ b/lib/crewai/src/crewai/skills/self_improve/storage.py @@ -132,8 +132,7 @@ class ProposalStore: for role_dir in sorted(self.root.iterdir()): if not role_dir.is_dir(): continue - for path in sorted(role_dir.glob("*.json")): - out.append(self.load(path)) + out.extend(self.load(path) for path in sorted(role_dir.glob("*.json"))) return out @@ -160,9 +159,7 @@ class SkillStore: skills_root: Path | None = None, ) -> None: self.root = ( - skills_root - if skills_root is not None - else _resolve_root(root) / "skills" + skills_root if skills_root is not None else _resolve_root(root) / "skills" ) def role_dir(self, role: str) -> Path: @@ -175,4 +172,6 @@ class SkillStore: d = self.role_dir(role) if not d.exists(): return False - return any((child / "SKILL.md").is_file() for child in d.iterdir() if child.is_dir()) + return any( + (child / "SKILL.md").is_file() for child in d.iterdir() if child.is_dir() + ) diff --git a/lib/crewai/tests/skills/self_improve/test_acceptance.py b/lib/crewai/tests/skills/self_improve/test_acceptance.py index b183f4689..a4a43e1a1 100644 --- a/lib/crewai/tests/skills/self_improve/test_acceptance.py +++ b/lib/crewai/tests/skills/self_improve/test_acceptance.py @@ -152,6 +152,84 @@ class TestRejectProposal: assert reject_proposal(proposal, proposal_store=proposal_store) is False +class TestSkillsDirPlumbing: + """The agent's ``SelfImprovementConfig.skills_dir`` should flow through + trace → proposal → accept so the SKILL.md lands at the same place the + agent reads from, regardless of which CLI/TUI path triggered the accept. + """ + + def test_proposal_skills_dir_overrides_default(self, tmp_path: Path) -> None: + # Simulate the reviewer setting skills_dir on a proposal (carried + # over from the trace, which captured it from the agent config). + project_skills = tmp_path / "project" / "skills" / "learned" + proposal = SkillProposal( + agent_role="researcher", + name="cite-sources", + description="Always cite sources", + body="# Cite Sources\n", + rationale="r", + confidence=0.8, + skills_dir=project_skills, + ) + + # No skill_store passed — accept should honor proposal.skills_dir. + proposal_store = ProposalStore(root=tmp_path / "queue") + proposal_store.save(proposal) + path = accept_proposal(proposal, proposal_store=proposal_store) + + # SKILL.md is at ///SKILL.md, NOT at + # the default platform path. + assert project_skills in path.parents + assert path.name == "SKILL.md" + assert "researcher" in str(path) + assert "cite-sources" in str(path) + + def test_explicit_skill_store_overrides_proposal_hint( + self, tmp_path: Path + ) -> None: + # If a caller passes skill_store explicitly (e.g. CLI --skills-dir + # flag), it wins over the proposal's stored hint. + proposal_skills = tmp_path / "from-proposal" + cli_skills = tmp_path / "from-cli" + proposal = SkillProposal( + agent_role="researcher", + name="cite-sources", + description="d", + body="b", + rationale="r", + confidence=0.8, + skills_dir=proposal_skills, + ) + + proposal_store = ProposalStore(root=tmp_path / "queue") + proposal_store.save(proposal) + skill_store = SkillStore(skills_root=cli_skills) + path = accept_proposal( + proposal, proposal_store=proposal_store, skill_store=skill_store + ) + + assert cli_skills in path.parents + assert proposal_skills not in path.parents + + def test_proposal_without_skills_dir_uses_platform_default( + self, tmp_path: Path, monkeypatch + ) -> None: + monkeypatch.setenv("CREWAI_SELF_IMPROVE_DIR", str(tmp_path / "platform")) + proposal = SkillProposal( + agent_role="researcher", + name="cite-sources", + description="d", + body="b", + rationale="r", + confidence=0.8, + ) + proposal_store = ProposalStore(root=tmp_path / "queue") + proposal_store.save(proposal) + path = accept_proposal(proposal, proposal_store=proposal_store) + + assert tmp_path / "platform" in path.parents + + class TestSkillStore: def test_has_any_detects_at_least_one_skill( self, stores, proposal: SkillProposal diff --git a/lib/crewai/tests/skills/self_improve/test_models.py b/lib/crewai/tests/skills/self_improve/test_models.py index feb5f8aec..063ee9b93 100644 --- a/lib/crewai/tests/skills/self_improve/test_models.py +++ b/lib/crewai/tests/skills/self_improve/test_models.py @@ -70,6 +70,30 @@ class TestSkillProposal: assert prop.proposal_kind == "new" assert prop.id.startswith("prop_") + def test_name_is_slugified(self) -> None: + # The reviewer LLM sometimes emits Title Case with spaces; we force + # kebab-case so the existing skill loader's name validator passes. + prop = SkillProposal( + agent_role="r", + name="Provide Direct Answer with Supporting Historical Context", + description="d", + body="b", + rationale="r", + confidence=0.7, + ) + assert prop.name == "provide-direct-answer-with-supporting-historical-context" + + def test_name_strips_punctuation_and_underscores(self) -> None: + prop = SkillProposal( + agent_role="r", + name="My Skill: V2 (the_good_one!)", + description="d", + body="b", + rationale="r", + confidence=0.7, + ) + assert prop.name == "my-skill-v2-the-good-one" + def test_confidence_must_be_in_range(self) -> None: with pytest.raises(ValidationError): SkillProposal( diff --git a/lib/crewai/tests/skills/self_improve/test_reviewer.py b/lib/crewai/tests/skills/self_improve/test_reviewer.py index 9f63c421a..ba8818dbd 100644 --- a/lib/crewai/tests/skills/self_improve/test_reviewer.py +++ b/lib/crewai/tests/skills/self_improve/test_reviewer.py @@ -132,6 +132,22 @@ class TestSkillReviewer: out = reviewer.review([_trace(), _trace(), _trace()]) assert out == [] + def test_skills_dir_propagates_from_trace_to_proposal( + self, reviewer_factory, tmp_path + ) -> None: + from pathlib import Path + + skills_dir = Path(tmp_path) / "project" / "skills" / "learned" + llm = _stub_llm(_ReviewerOutput(proposals=[_llm_proposal(name="cite")])) + reviewer = reviewer_factory(llm, min_traces=2) + traces = [ + _trace(agent_skills_dir=skills_dir), + _trace(agent_skills_dir=skills_dir), + _trace(agent_skills_dir=skills_dir), + ] + [prop] = reviewer.review(traces) + assert prop.skills_dir == skills_dir + def test_pending_proposals_appear_in_prompt(self, reviewer_factory) -> None: llm = _stub_llm(_ReviewerOutput(proposals=[])) reviewer = reviewer_factory(llm, min_traces=2)