self improve with skills

This commit is contained in:
lorenzejay
2026-05-05 14:45:52 -07:00
parent 4af40c64f2
commit 3855076670
10 changed files with 238 additions and 46 deletions

View File

@@ -378,14 +378,6 @@ class Agent(BaseAgent):
collector.attach(crewai_event_bus)
self._self_improve_collector = collector
def _self_improve_config(self) -> SelfImprovementConfig | None:
"""Return the active SelfImprovementConfig, or None when disabled."""
if not self.self_improve:
return None
if isinstance(self.self_improve, SelfImprovementConfig):
return self.self_improve
return SelfImprovementConfig()
if self.reasoning and self.planning_config is None:
warnings.warn(
"The 'reasoning' parameter is deprecated. Use 'planning_config=PlanningConfig()' instead.",
@@ -398,6 +390,14 @@ class Agent(BaseAgent):
return self
def _self_improve_config(self) -> SelfImprovementConfig | None:
"""Return the active SelfImprovementConfig, or None when disabled."""
if not self.self_improve:
return None
if isinstance(self.self_improve, SelfImprovementConfig):
return self.self_improve
return SelfImprovementConfig()
@property
def planning_enabled(self) -> bool:
"""Check if planning is enabled for this agent."""
@@ -464,9 +464,7 @@ class Agent(BaseAgent):
else:
candidate = SkillStore().role_dir(self.role)
if candidate.is_dir() and any(
(c / "SKILL.md").is_file()
for c in candidate.iterdir()
if c.is_dir()
(c / "SKILL.md").is_file() for c in candidate.iterdir() if c.is_dir()
):
self_improve_dir = candidate

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
import click
@@ -16,18 +17,16 @@ from crewai.skills.self_improve import (
)
if TYPE_CHECKING:
from crewai.skills.self_improve.models import RunTrace, SkillProposal
_DEFAULT_REVIEW_MODEL = "anthropic/claude-haiku-4-5"
def _print_proposal_summary(p) -> None:
kind = (
f"PATCH→{p.target_skill}"
if p.proposal_kind == "patch_existing"
else "NEW"
)
click.echo(
f" {p.id} {kind:<28} conf={p.confidence:.2f} role={p.agent_role}"
)
def _print_proposal_summary(p: SkillProposal) -> None:
kind = f"PATCH→{p.target_skill}" if p.proposal_kind == "patch_existing" else "NEW"
click.echo(f" {p.id} {kind:<28} conf={p.confidence:.2f} role={p.agent_role}")
click.echo(f" {p.name}: {p.description}")
@@ -76,7 +75,7 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) -
raise SystemExit(1)
# Group traces by role from disk; the role-slug dirs come from the store.
by_role: dict[str, list] = defaultdict(list)
by_role: dict[str, list[RunTrace]] = defaultdict(list)
role_dirs = (
[trace_store.role_dir(role)] if role else sorted(trace_store.root.iterdir())
)
@@ -88,7 +87,9 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) -
by_role[t.agent_role].append(t)
if not by_role:
click.echo("No traces found." if role is None else f"No traces for role={role!r}.")
click.echo(
"No traces found." if role is None else f"No traces for role={role!r}."
)
return
reviewer_llm = LLM(model=model)
@@ -97,8 +98,7 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) -
for agent_role, traces in by_role.items():
if len(traces) < min_traces:
click.echo(
f"Skipping {agent_role!r}: {len(traces)} trace(s), "
f"need {min_traces}."
f"Skipping {agent_role!r}: {len(traces)} trace(s), need {min_traces}."
)
continue
@@ -147,8 +147,6 @@ def proposals() -> None:
def proposals_list(role: str | None) -> None:
"""List pending proposals across all roles."""
store = ProposalStore()
items = store.list_for_role(role) if role else None
if role:
records = [store.load(p) for p in store.list_for_role(role)]
else:
@@ -190,7 +188,9 @@ def proposals_show(proposal_id: str) -> None:
@proposals.command(name="accept")
@click.argument("proposal_id")
@click.option("--force", is_flag=True, help="Overwrite an existing skill of the same name.")
@click.option(
"--force", is_flag=True, help="Overwrite an existing skill of the same name."
)
@click.option(
"--skills-dir",
type=click.Path(file_okay=False, path_type=Path),

View File

@@ -47,10 +47,18 @@ def accept_proposal(
) -> Path:
"""Write the proposal as a SKILL.md and remove it from the queue.
When ``skill_store`` is not provided, the destination is selected in
this order:
1. ``proposal.skills_dir`` — set by the reviewer from the agent's
``SelfImprovementConfig.skills_dir``. This is the common case; it
keeps accept aligned with where the agent reads from.
2. Platform default — ``<db_storage_path>/self_improve/skills/``.
Args:
proposal: The proposal to materialize.
force: When True, overwrite an existing SKILL.md at the target path.
skill_store: Override for the live-skills store (test injection).
skill_store: Explicit override; bypasses the proposal hint.
proposal_store: Override for the proposals store (test injection).
Returns:
@@ -60,7 +68,12 @@ def accept_proposal(
FileExistsError: When a SKILL.md already exists at the target and
``force=False``.
"""
skill_store = skill_store or SkillStore()
if skill_store is None:
skill_store = (
SkillStore(skills_root=proposal.skills_dir)
if proposal.skills_dir is not None
else SkillStore()
)
proposal_store = proposal_store or ProposalStore()
target_dir = skill_store.skill_dir(proposal.agent_role, proposal.name)

View File

@@ -10,6 +10,7 @@ from __future__ import annotations
import atexit
from datetime import UTC, datetime
import logging
from pathlib import Path
import threading
from typing import TYPE_CHECKING, Any
@@ -127,6 +128,7 @@ class TraceCollector:
agent_id=str(getattr(self._agent, "id", "") or ""),
agent_role=self._agent.role,
agent_goal=getattr(self._agent, "goal", "") or "",
agent_skills_dir=self._agent_skills_dir(),
task_id=str(getattr(task, "id", "") or "") or None,
task_description=getattr(task, "description", None),
loaded_skills=self._collect_loaded_skills(),
@@ -187,9 +189,7 @@ class TraceCollector:
with self._lock:
if self._current is None:
return
self._current.output_summary = _truncate(
event.output, _OUTPUT_TRUNCATE
)
self._current.output_summary = _truncate(event.output, _OUTPUT_TRUNCATE)
self._finalize_locked()
@bus.on(AgentExecutionErrorEvent)
@@ -224,6 +224,7 @@ class TraceCollector:
agent_id=str(getattr(self._agent, "id", "") or ""),
agent_role=self._agent.role,
agent_goal=getattr(self._agent, "goal", "") or "",
agent_skills_dir=self._agent_skills_dir(),
task_description=_truncate(task_desc, _OUTPUT_TRUNCATE),
loaded_skills=self._collect_loaded_skills(),
)
@@ -237,15 +238,11 @@ class TraceCollector:
with self._lock:
if self._current is None:
return
self._current.output_summary = _truncate(
event.output, _OUTPUT_TRUNCATE
)
self._current.output_summary = _truncate(event.output, _OUTPUT_TRUNCATE)
self._finalize_locked()
@bus.on(LiteAgentExecutionErrorEvent)
def _on_lite_error(
_source: Any, event: LiteAgentExecutionErrorEvent
) -> None:
def _on_lite_error(_source: Any, event: LiteAgentExecutionErrorEvent) -> None:
if not self._is_my_id(event.agent_info.get("id")):
return
with self._lock:
@@ -260,6 +257,21 @@ class TraceCollector:
return None
return max(0, int((finished_at - started).total_seconds() * 1000))
def _agent_skills_dir(self) -> Path | None:
"""Read skills_dir from the agent's SelfImprovementConfig if set.
We use ``getattr`` + duck typing instead of importing Agent so the
collector stays usable in tests with stub agents.
"""
config_getter = getattr(self._agent, "_self_improve_config", None)
if not callable(config_getter):
return None
try:
config = config_getter()
except Exception:
return None
return getattr(config, "skills_dir", None) if config is not None else None
def _collect_loaded_skills(self) -> list[str]:
skills = getattr(self._agent, "skills", None) or []
names: list[str] = []

View File

@@ -4,10 +4,29 @@ from __future__ import annotations
from datetime import UTC, datetime
from pathlib import Path
import re
from typing import Literal
import uuid
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, field_validator
_SLUG_NON_ALNUM = re.compile(r"[^a-z0-9-]+")
_SLUG_DASHES = re.compile(r"-+")
def _slugify(value: str) -> str:
"""Force a string into the kebab-case shape the skill loader requires.
The reviewer LLM is told to emit kebab-case names, but doesn't always
comply (e.g. it emits Title Case with spaces). Without this normalization
the accepted SKILL.md fails the loader's name pattern and is silently
skipped, breaking the closed loop.
"""
s = value.strip().lower().replace("_", "-").replace(" ", "-")
s = _SLUG_NON_ALNUM.sub("", s)
s = _SLUG_DASHES.sub("-", s).strip("-")
return s[:64] or "skill"
def _now() -> datetime:
@@ -72,6 +91,14 @@ class RunTrace(BaseModel):
agent_id: str | None = None
agent_role: str
agent_goal: str = ""
agent_skills_dir: Path | None = Field(
default=None,
description=(
"Resolved skills_dir from the agent's SelfImprovementConfig at "
"trace time. Carried into proposals so accept writes back to "
"the same place the agent reads from."
),
)
task_id: str | None = None
task_description: str | None = None
started_at: datetime = Field(default_factory=_now)
@@ -113,4 +140,17 @@ class SkillProposal(BaseModel):
proposal_kind: ProposalKind = "new"
target_skill: str | None = None
derived_from_runs: list[str] = Field(default_factory=list)
skills_dir: Path | None = Field(
default=None,
description=(
"Where to write the SKILL.md when this proposal is accepted, "
"carried over from the agent's SelfImprovementConfig at trace "
"time. Falls back to platform default when None."
),
)
created_at: datetime = Field(default_factory=_now)
@field_validator("name", mode="before")
@classmethod
def _force_slug(cls, v: str | None) -> str | None:
return _slugify(v) if isinstance(v, str) else v

View File

@@ -199,9 +199,21 @@ class SkillReviewer:
return []
run_ids = [t.id for t in traces]
# Carry the agent's skills_dir from the latest trace forward so the
# accept step writes to the same path the agent reads from. We pick
# the *last* trace's value because configuration drift (a user
# changing skills_dir between runs) should land at the most recent.
skills_dir = next(
(t.agent_skills_dir for t in reversed(traces) if t.agent_skills_dir),
None,
)
return [
p.model_copy(
update={"agent_role": self.agent_role, "derived_from_runs": run_ids}
update={
"agent_role": self.agent_role,
"derived_from_runs": run_ids,
"skills_dir": skills_dir,
}
)
for p in result.proposals
if p.confidence >= self.confidence_floor

View File

@@ -132,8 +132,7 @@ class ProposalStore:
for role_dir in sorted(self.root.iterdir()):
if not role_dir.is_dir():
continue
for path in sorted(role_dir.glob("*.json")):
out.append(self.load(path))
out.extend(self.load(path) for path in sorted(role_dir.glob("*.json")))
return out
@@ -160,9 +159,7 @@ class SkillStore:
skills_root: Path | None = None,
) -> None:
self.root = (
skills_root
if skills_root is not None
else _resolve_root(root) / "skills"
skills_root if skills_root is not None else _resolve_root(root) / "skills"
)
def role_dir(self, role: str) -> Path:
@@ -175,4 +172,6 @@ class SkillStore:
d = self.role_dir(role)
if not d.exists():
return False
return any((child / "SKILL.md").is_file() for child in d.iterdir() if child.is_dir())
return any(
(child / "SKILL.md").is_file() for child in d.iterdir() if child.is_dir()
)

View File

@@ -152,6 +152,84 @@ class TestRejectProposal:
assert reject_proposal(proposal, proposal_store=proposal_store) is False
class TestSkillsDirPlumbing:
"""The agent's ``SelfImprovementConfig.skills_dir`` should flow through
trace → proposal → accept so the SKILL.md lands at the same place the
agent reads from, regardless of which CLI/TUI path triggered the accept.
"""
def test_proposal_skills_dir_overrides_default(self, tmp_path: Path) -> None:
# Simulate the reviewer setting skills_dir on a proposal (carried
# over from the trace, which captured it from the agent config).
project_skills = tmp_path / "project" / "skills" / "learned"
proposal = SkillProposal(
agent_role="researcher",
name="cite-sources",
description="Always cite sources",
body="# Cite Sources\n",
rationale="r",
confidence=0.8,
skills_dir=project_skills,
)
# No skill_store passed — accept should honor proposal.skills_dir.
proposal_store = ProposalStore(root=tmp_path / "queue")
proposal_store.save(proposal)
path = accept_proposal(proposal, proposal_store=proposal_store)
# SKILL.md is at <project_skills>/<role>/<name>/SKILL.md, NOT at
# the default platform path.
assert project_skills in path.parents
assert path.name == "SKILL.md"
assert "researcher" in str(path)
assert "cite-sources" in str(path)
def test_explicit_skill_store_overrides_proposal_hint(
self, tmp_path: Path
) -> None:
# If a caller passes skill_store explicitly (e.g. CLI --skills-dir
# flag), it wins over the proposal's stored hint.
proposal_skills = tmp_path / "from-proposal"
cli_skills = tmp_path / "from-cli"
proposal = SkillProposal(
agent_role="researcher",
name="cite-sources",
description="d",
body="b",
rationale="r",
confidence=0.8,
skills_dir=proposal_skills,
)
proposal_store = ProposalStore(root=tmp_path / "queue")
proposal_store.save(proposal)
skill_store = SkillStore(skills_root=cli_skills)
path = accept_proposal(
proposal, proposal_store=proposal_store, skill_store=skill_store
)
assert cli_skills in path.parents
assert proposal_skills not in path.parents
def test_proposal_without_skills_dir_uses_platform_default(
self, tmp_path: Path, monkeypatch
) -> None:
monkeypatch.setenv("CREWAI_SELF_IMPROVE_DIR", str(tmp_path / "platform"))
proposal = SkillProposal(
agent_role="researcher",
name="cite-sources",
description="d",
body="b",
rationale="r",
confidence=0.8,
)
proposal_store = ProposalStore(root=tmp_path / "queue")
proposal_store.save(proposal)
path = accept_proposal(proposal, proposal_store=proposal_store)
assert tmp_path / "platform" in path.parents
class TestSkillStore:
def test_has_any_detects_at_least_one_skill(
self, stores, proposal: SkillProposal

View File

@@ -70,6 +70,30 @@ class TestSkillProposal:
assert prop.proposal_kind == "new"
assert prop.id.startswith("prop_")
def test_name_is_slugified(self) -> None:
# The reviewer LLM sometimes emits Title Case with spaces; we force
# kebab-case so the existing skill loader's name validator passes.
prop = SkillProposal(
agent_role="r",
name="Provide Direct Answer with Supporting Historical Context",
description="d",
body="b",
rationale="r",
confidence=0.7,
)
assert prop.name == "provide-direct-answer-with-supporting-historical-context"
def test_name_strips_punctuation_and_underscores(self) -> None:
prop = SkillProposal(
agent_role="r",
name="My Skill: V2 (the_good_one!)",
description="d",
body="b",
rationale="r",
confidence=0.7,
)
assert prop.name == "my-skill-v2-the-good-one"
def test_confidence_must_be_in_range(self) -> None:
with pytest.raises(ValidationError):
SkillProposal(

View File

@@ -132,6 +132,22 @@ class TestSkillReviewer:
out = reviewer.review([_trace(), _trace(), _trace()])
assert out == []
def test_skills_dir_propagates_from_trace_to_proposal(
self, reviewer_factory, tmp_path
) -> None:
from pathlib import Path
skills_dir = Path(tmp_path) / "project" / "skills" / "learned"
llm = _stub_llm(_ReviewerOutput(proposals=[_llm_proposal(name="cite")]))
reviewer = reviewer_factory(llm, min_traces=2)
traces = [
_trace(agent_skills_dir=skills_dir),
_trace(agent_skills_dir=skills_dir),
_trace(agent_skills_dir=skills_dir),
]
[prop] = reviewer.review(traces)
assert prop.skills_dir == skills_dir
def test_pending_proposals_appear_in_prompt(self, reviewer_factory) -> None:
llm = _stub_llm(_ReviewerOutput(proposals=[]))
reviewer = reviewer_factory(llm, min_traces=2)