mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-06 01:32:36 +00:00
self improve with skills
This commit is contained in:
@@ -378,14 +378,6 @@ class Agent(BaseAgent):
|
||||
collector.attach(crewai_event_bus)
|
||||
self._self_improve_collector = collector
|
||||
|
||||
def _self_improve_config(self) -> SelfImprovementConfig | None:
|
||||
"""Return the active SelfImprovementConfig, or None when disabled."""
|
||||
if not self.self_improve:
|
||||
return None
|
||||
if isinstance(self.self_improve, SelfImprovementConfig):
|
||||
return self.self_improve
|
||||
return SelfImprovementConfig()
|
||||
|
||||
if self.reasoning and self.planning_config is None:
|
||||
warnings.warn(
|
||||
"The 'reasoning' parameter is deprecated. Use 'planning_config=PlanningConfig()' instead.",
|
||||
@@ -398,6 +390,14 @@ class Agent(BaseAgent):
|
||||
|
||||
return self
|
||||
|
||||
def _self_improve_config(self) -> SelfImprovementConfig | None:
|
||||
"""Return the active SelfImprovementConfig, or None when disabled."""
|
||||
if not self.self_improve:
|
||||
return None
|
||||
if isinstance(self.self_improve, SelfImprovementConfig):
|
||||
return self.self_improve
|
||||
return SelfImprovementConfig()
|
||||
|
||||
@property
|
||||
def planning_enabled(self) -> bool:
|
||||
"""Check if planning is enabled for this agent."""
|
||||
@@ -464,9 +464,7 @@ class Agent(BaseAgent):
|
||||
else:
|
||||
candidate = SkillStore().role_dir(self.role)
|
||||
if candidate.is_dir() and any(
|
||||
(c / "SKILL.md").is_file()
|
||||
for c in candidate.iterdir()
|
||||
if c.is_dir()
|
||||
(c / "SKILL.md").is_file() for c in candidate.iterdir() if c.is_dir()
|
||||
):
|
||||
self_improve_dir = candidate
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import click
|
||||
|
||||
@@ -16,18 +17,16 @@ from crewai.skills.self_improve import (
|
||||
)
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai.skills.self_improve.models import RunTrace, SkillProposal
|
||||
|
||||
|
||||
_DEFAULT_REVIEW_MODEL = "anthropic/claude-haiku-4-5"
|
||||
|
||||
|
||||
def _print_proposal_summary(p) -> None:
|
||||
kind = (
|
||||
f"PATCH→{p.target_skill}"
|
||||
if p.proposal_kind == "patch_existing"
|
||||
else "NEW"
|
||||
)
|
||||
click.echo(
|
||||
f" {p.id} {kind:<28} conf={p.confidence:.2f} role={p.agent_role}"
|
||||
)
|
||||
def _print_proposal_summary(p: SkillProposal) -> None:
|
||||
kind = f"PATCH→{p.target_skill}" if p.proposal_kind == "patch_existing" else "NEW"
|
||||
click.echo(f" {p.id} {kind:<28} conf={p.confidence:.2f} role={p.agent_role}")
|
||||
click.echo(f" {p.name}: {p.description}")
|
||||
|
||||
|
||||
@@ -76,7 +75,7 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) -
|
||||
raise SystemExit(1)
|
||||
|
||||
# Group traces by role from disk; the role-slug dirs come from the store.
|
||||
by_role: dict[str, list] = defaultdict(list)
|
||||
by_role: dict[str, list[RunTrace]] = defaultdict(list)
|
||||
role_dirs = (
|
||||
[trace_store.role_dir(role)] if role else sorted(trace_store.root.iterdir())
|
||||
)
|
||||
@@ -88,7 +87,9 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) -
|
||||
by_role[t.agent_role].append(t)
|
||||
|
||||
if not by_role:
|
||||
click.echo("No traces found." if role is None else f"No traces for role={role!r}.")
|
||||
click.echo(
|
||||
"No traces found." if role is None else f"No traces for role={role!r}."
|
||||
)
|
||||
return
|
||||
|
||||
reviewer_llm = LLM(model=model)
|
||||
@@ -97,8 +98,7 @@ def skills_review(role: str | None, model: str, min_traces: int, floor: float) -
|
||||
for agent_role, traces in by_role.items():
|
||||
if len(traces) < min_traces:
|
||||
click.echo(
|
||||
f"Skipping {agent_role!r}: {len(traces)} trace(s), "
|
||||
f"need {min_traces}."
|
||||
f"Skipping {agent_role!r}: {len(traces)} trace(s), need {min_traces}."
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -147,8 +147,6 @@ def proposals() -> None:
|
||||
def proposals_list(role: str | None) -> None:
|
||||
"""List pending proposals across all roles."""
|
||||
store = ProposalStore()
|
||||
items = store.list_for_role(role) if role else None
|
||||
|
||||
if role:
|
||||
records = [store.load(p) for p in store.list_for_role(role)]
|
||||
else:
|
||||
@@ -190,7 +188,9 @@ def proposals_show(proposal_id: str) -> None:
|
||||
|
||||
@proposals.command(name="accept")
|
||||
@click.argument("proposal_id")
|
||||
@click.option("--force", is_flag=True, help="Overwrite an existing skill of the same name.")
|
||||
@click.option(
|
||||
"--force", is_flag=True, help="Overwrite an existing skill of the same name."
|
||||
)
|
||||
@click.option(
|
||||
"--skills-dir",
|
||||
type=click.Path(file_okay=False, path_type=Path),
|
||||
|
||||
@@ -47,10 +47,18 @@ def accept_proposal(
|
||||
) -> Path:
|
||||
"""Write the proposal as a SKILL.md and remove it from the queue.
|
||||
|
||||
When ``skill_store`` is not provided, the destination is selected in
|
||||
this order:
|
||||
|
||||
1. ``proposal.skills_dir`` — set by the reviewer from the agent's
|
||||
``SelfImprovementConfig.skills_dir``. This is the common case; it
|
||||
keeps accept aligned with where the agent reads from.
|
||||
2. Platform default — ``<db_storage_path>/self_improve/skills/``.
|
||||
|
||||
Args:
|
||||
proposal: The proposal to materialize.
|
||||
force: When True, overwrite an existing SKILL.md at the target path.
|
||||
skill_store: Override for the live-skills store (test injection).
|
||||
skill_store: Explicit override; bypasses the proposal hint.
|
||||
proposal_store: Override for the proposals store (test injection).
|
||||
|
||||
Returns:
|
||||
@@ -60,7 +68,12 @@ def accept_proposal(
|
||||
FileExistsError: When a SKILL.md already exists at the target and
|
||||
``force=False``.
|
||||
"""
|
||||
skill_store = skill_store or SkillStore()
|
||||
if skill_store is None:
|
||||
skill_store = (
|
||||
SkillStore(skills_root=proposal.skills_dir)
|
||||
if proposal.skills_dir is not None
|
||||
else SkillStore()
|
||||
)
|
||||
proposal_store = proposal_store or ProposalStore()
|
||||
|
||||
target_dir = skill_store.skill_dir(proposal.agent_role, proposal.name)
|
||||
|
||||
@@ -10,6 +10,7 @@ from __future__ import annotations
|
||||
import atexit
|
||||
from datetime import UTC, datetime
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import threading
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
@@ -127,6 +128,7 @@ class TraceCollector:
|
||||
agent_id=str(getattr(self._agent, "id", "") or ""),
|
||||
agent_role=self._agent.role,
|
||||
agent_goal=getattr(self._agent, "goal", "") or "",
|
||||
agent_skills_dir=self._agent_skills_dir(),
|
||||
task_id=str(getattr(task, "id", "") or "") or None,
|
||||
task_description=getattr(task, "description", None),
|
||||
loaded_skills=self._collect_loaded_skills(),
|
||||
@@ -187,9 +189,7 @@ class TraceCollector:
|
||||
with self._lock:
|
||||
if self._current is None:
|
||||
return
|
||||
self._current.output_summary = _truncate(
|
||||
event.output, _OUTPUT_TRUNCATE
|
||||
)
|
||||
self._current.output_summary = _truncate(event.output, _OUTPUT_TRUNCATE)
|
||||
self._finalize_locked()
|
||||
|
||||
@bus.on(AgentExecutionErrorEvent)
|
||||
@@ -224,6 +224,7 @@ class TraceCollector:
|
||||
agent_id=str(getattr(self._agent, "id", "") or ""),
|
||||
agent_role=self._agent.role,
|
||||
agent_goal=getattr(self._agent, "goal", "") or "",
|
||||
agent_skills_dir=self._agent_skills_dir(),
|
||||
task_description=_truncate(task_desc, _OUTPUT_TRUNCATE),
|
||||
loaded_skills=self._collect_loaded_skills(),
|
||||
)
|
||||
@@ -237,15 +238,11 @@ class TraceCollector:
|
||||
with self._lock:
|
||||
if self._current is None:
|
||||
return
|
||||
self._current.output_summary = _truncate(
|
||||
event.output, _OUTPUT_TRUNCATE
|
||||
)
|
||||
self._current.output_summary = _truncate(event.output, _OUTPUT_TRUNCATE)
|
||||
self._finalize_locked()
|
||||
|
||||
@bus.on(LiteAgentExecutionErrorEvent)
|
||||
def _on_lite_error(
|
||||
_source: Any, event: LiteAgentExecutionErrorEvent
|
||||
) -> None:
|
||||
def _on_lite_error(_source: Any, event: LiteAgentExecutionErrorEvent) -> None:
|
||||
if not self._is_my_id(event.agent_info.get("id")):
|
||||
return
|
||||
with self._lock:
|
||||
@@ -260,6 +257,21 @@ class TraceCollector:
|
||||
return None
|
||||
return max(0, int((finished_at - started).total_seconds() * 1000))
|
||||
|
||||
def _agent_skills_dir(self) -> Path | None:
|
||||
"""Read skills_dir from the agent's SelfImprovementConfig if set.
|
||||
|
||||
We use ``getattr`` + duck typing instead of importing Agent so the
|
||||
collector stays usable in tests with stub agents.
|
||||
"""
|
||||
config_getter = getattr(self._agent, "_self_improve_config", None)
|
||||
if not callable(config_getter):
|
||||
return None
|
||||
try:
|
||||
config = config_getter()
|
||||
except Exception:
|
||||
return None
|
||||
return getattr(config, "skills_dir", None) if config is not None else None
|
||||
|
||||
def _collect_loaded_skills(self) -> list[str]:
|
||||
skills = getattr(self._agent, "skills", None) or []
|
||||
names: list[str] = []
|
||||
|
||||
@@ -4,10 +4,29 @@ from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
import re
|
||||
from typing import Literal
|
||||
import uuid
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
|
||||
_SLUG_NON_ALNUM = re.compile(r"[^a-z0-9-]+")
|
||||
_SLUG_DASHES = re.compile(r"-+")
|
||||
|
||||
|
||||
def _slugify(value: str) -> str:
|
||||
"""Force a string into the kebab-case shape the skill loader requires.
|
||||
|
||||
The reviewer LLM is told to emit kebab-case names, but doesn't always
|
||||
comply (e.g. it emits Title Case with spaces). Without this normalization
|
||||
the accepted SKILL.md fails the loader's name pattern and is silently
|
||||
skipped, breaking the closed loop.
|
||||
"""
|
||||
s = value.strip().lower().replace("_", "-").replace(" ", "-")
|
||||
s = _SLUG_NON_ALNUM.sub("", s)
|
||||
s = _SLUG_DASHES.sub("-", s).strip("-")
|
||||
return s[:64] or "skill"
|
||||
|
||||
|
||||
def _now() -> datetime:
|
||||
@@ -72,6 +91,14 @@ class RunTrace(BaseModel):
|
||||
agent_id: str | None = None
|
||||
agent_role: str
|
||||
agent_goal: str = ""
|
||||
agent_skills_dir: Path | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Resolved skills_dir from the agent's SelfImprovementConfig at "
|
||||
"trace time. Carried into proposals so accept writes back to "
|
||||
"the same place the agent reads from."
|
||||
),
|
||||
)
|
||||
task_id: str | None = None
|
||||
task_description: str | None = None
|
||||
started_at: datetime = Field(default_factory=_now)
|
||||
@@ -113,4 +140,17 @@ class SkillProposal(BaseModel):
|
||||
proposal_kind: ProposalKind = "new"
|
||||
target_skill: str | None = None
|
||||
derived_from_runs: list[str] = Field(default_factory=list)
|
||||
skills_dir: Path | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Where to write the SKILL.md when this proposal is accepted, "
|
||||
"carried over from the agent's SelfImprovementConfig at trace "
|
||||
"time. Falls back to platform default when None."
|
||||
),
|
||||
)
|
||||
created_at: datetime = Field(default_factory=_now)
|
||||
|
||||
@field_validator("name", mode="before")
|
||||
@classmethod
|
||||
def _force_slug(cls, v: str | None) -> str | None:
|
||||
return _slugify(v) if isinstance(v, str) else v
|
||||
|
||||
@@ -199,9 +199,21 @@ class SkillReviewer:
|
||||
return []
|
||||
|
||||
run_ids = [t.id for t in traces]
|
||||
# Carry the agent's skills_dir from the latest trace forward so the
|
||||
# accept step writes to the same path the agent reads from. We pick
|
||||
# the *last* trace's value because configuration drift (a user
|
||||
# changing skills_dir between runs) should land at the most recent.
|
||||
skills_dir = next(
|
||||
(t.agent_skills_dir for t in reversed(traces) if t.agent_skills_dir),
|
||||
None,
|
||||
)
|
||||
return [
|
||||
p.model_copy(
|
||||
update={"agent_role": self.agent_role, "derived_from_runs": run_ids}
|
||||
update={
|
||||
"agent_role": self.agent_role,
|
||||
"derived_from_runs": run_ids,
|
||||
"skills_dir": skills_dir,
|
||||
}
|
||||
)
|
||||
for p in result.proposals
|
||||
if p.confidence >= self.confidence_floor
|
||||
|
||||
@@ -132,8 +132,7 @@ class ProposalStore:
|
||||
for role_dir in sorted(self.root.iterdir()):
|
||||
if not role_dir.is_dir():
|
||||
continue
|
||||
for path in sorted(role_dir.glob("*.json")):
|
||||
out.append(self.load(path))
|
||||
out.extend(self.load(path) for path in sorted(role_dir.glob("*.json")))
|
||||
return out
|
||||
|
||||
|
||||
@@ -160,9 +159,7 @@ class SkillStore:
|
||||
skills_root: Path | None = None,
|
||||
) -> None:
|
||||
self.root = (
|
||||
skills_root
|
||||
if skills_root is not None
|
||||
else _resolve_root(root) / "skills"
|
||||
skills_root if skills_root is not None else _resolve_root(root) / "skills"
|
||||
)
|
||||
|
||||
def role_dir(self, role: str) -> Path:
|
||||
@@ -175,4 +172,6 @@ class SkillStore:
|
||||
d = self.role_dir(role)
|
||||
if not d.exists():
|
||||
return False
|
||||
return any((child / "SKILL.md").is_file() for child in d.iterdir() if child.is_dir())
|
||||
return any(
|
||||
(child / "SKILL.md").is_file() for child in d.iterdir() if child.is_dir()
|
||||
)
|
||||
|
||||
@@ -152,6 +152,84 @@ class TestRejectProposal:
|
||||
assert reject_proposal(proposal, proposal_store=proposal_store) is False
|
||||
|
||||
|
||||
class TestSkillsDirPlumbing:
|
||||
"""The agent's ``SelfImprovementConfig.skills_dir`` should flow through
|
||||
trace → proposal → accept so the SKILL.md lands at the same place the
|
||||
agent reads from, regardless of which CLI/TUI path triggered the accept.
|
||||
"""
|
||||
|
||||
def test_proposal_skills_dir_overrides_default(self, tmp_path: Path) -> None:
|
||||
# Simulate the reviewer setting skills_dir on a proposal (carried
|
||||
# over from the trace, which captured it from the agent config).
|
||||
project_skills = tmp_path / "project" / "skills" / "learned"
|
||||
proposal = SkillProposal(
|
||||
agent_role="researcher",
|
||||
name="cite-sources",
|
||||
description="Always cite sources",
|
||||
body="# Cite Sources\n",
|
||||
rationale="r",
|
||||
confidence=0.8,
|
||||
skills_dir=project_skills,
|
||||
)
|
||||
|
||||
# No skill_store passed — accept should honor proposal.skills_dir.
|
||||
proposal_store = ProposalStore(root=tmp_path / "queue")
|
||||
proposal_store.save(proposal)
|
||||
path = accept_proposal(proposal, proposal_store=proposal_store)
|
||||
|
||||
# SKILL.md is at <project_skills>/<role>/<name>/SKILL.md, NOT at
|
||||
# the default platform path.
|
||||
assert project_skills in path.parents
|
||||
assert path.name == "SKILL.md"
|
||||
assert "researcher" in str(path)
|
||||
assert "cite-sources" in str(path)
|
||||
|
||||
def test_explicit_skill_store_overrides_proposal_hint(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
# If a caller passes skill_store explicitly (e.g. CLI --skills-dir
|
||||
# flag), it wins over the proposal's stored hint.
|
||||
proposal_skills = tmp_path / "from-proposal"
|
||||
cli_skills = tmp_path / "from-cli"
|
||||
proposal = SkillProposal(
|
||||
agent_role="researcher",
|
||||
name="cite-sources",
|
||||
description="d",
|
||||
body="b",
|
||||
rationale="r",
|
||||
confidence=0.8,
|
||||
skills_dir=proposal_skills,
|
||||
)
|
||||
|
||||
proposal_store = ProposalStore(root=tmp_path / "queue")
|
||||
proposal_store.save(proposal)
|
||||
skill_store = SkillStore(skills_root=cli_skills)
|
||||
path = accept_proposal(
|
||||
proposal, proposal_store=proposal_store, skill_store=skill_store
|
||||
)
|
||||
|
||||
assert cli_skills in path.parents
|
||||
assert proposal_skills not in path.parents
|
||||
|
||||
def test_proposal_without_skills_dir_uses_platform_default(
|
||||
self, tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
monkeypatch.setenv("CREWAI_SELF_IMPROVE_DIR", str(tmp_path / "platform"))
|
||||
proposal = SkillProposal(
|
||||
agent_role="researcher",
|
||||
name="cite-sources",
|
||||
description="d",
|
||||
body="b",
|
||||
rationale="r",
|
||||
confidence=0.8,
|
||||
)
|
||||
proposal_store = ProposalStore(root=tmp_path / "queue")
|
||||
proposal_store.save(proposal)
|
||||
path = accept_proposal(proposal, proposal_store=proposal_store)
|
||||
|
||||
assert tmp_path / "platform" in path.parents
|
||||
|
||||
|
||||
class TestSkillStore:
|
||||
def test_has_any_detects_at_least_one_skill(
|
||||
self, stores, proposal: SkillProposal
|
||||
|
||||
@@ -70,6 +70,30 @@ class TestSkillProposal:
|
||||
assert prop.proposal_kind == "new"
|
||||
assert prop.id.startswith("prop_")
|
||||
|
||||
def test_name_is_slugified(self) -> None:
|
||||
# The reviewer LLM sometimes emits Title Case with spaces; we force
|
||||
# kebab-case so the existing skill loader's name validator passes.
|
||||
prop = SkillProposal(
|
||||
agent_role="r",
|
||||
name="Provide Direct Answer with Supporting Historical Context",
|
||||
description="d",
|
||||
body="b",
|
||||
rationale="r",
|
||||
confidence=0.7,
|
||||
)
|
||||
assert prop.name == "provide-direct-answer-with-supporting-historical-context"
|
||||
|
||||
def test_name_strips_punctuation_and_underscores(self) -> None:
|
||||
prop = SkillProposal(
|
||||
agent_role="r",
|
||||
name="My Skill: V2 (the_good_one!)",
|
||||
description="d",
|
||||
body="b",
|
||||
rationale="r",
|
||||
confidence=0.7,
|
||||
)
|
||||
assert prop.name == "my-skill-v2-the-good-one"
|
||||
|
||||
def test_confidence_must_be_in_range(self) -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
SkillProposal(
|
||||
|
||||
@@ -132,6 +132,22 @@ class TestSkillReviewer:
|
||||
out = reviewer.review([_trace(), _trace(), _trace()])
|
||||
assert out == []
|
||||
|
||||
def test_skills_dir_propagates_from_trace_to_proposal(
|
||||
self, reviewer_factory, tmp_path
|
||||
) -> None:
|
||||
from pathlib import Path
|
||||
|
||||
skills_dir = Path(tmp_path) / "project" / "skills" / "learned"
|
||||
llm = _stub_llm(_ReviewerOutput(proposals=[_llm_proposal(name="cite")]))
|
||||
reviewer = reviewer_factory(llm, min_traces=2)
|
||||
traces = [
|
||||
_trace(agent_skills_dir=skills_dir),
|
||||
_trace(agent_skills_dir=skills_dir),
|
||||
_trace(agent_skills_dir=skills_dir),
|
||||
]
|
||||
[prop] = reviewer.review(traces)
|
||||
assert prop.skills_dir == skills_dir
|
||||
|
||||
def test_pending_proposals_appear_in_prompt(self, reviewer_factory) -> None:
|
||||
llm = _stub_llm(_ReviewerOutput(proposals=[]))
|
||||
reviewer = reviewer_factory(llm, min_traces=2)
|
||||
|
||||
Reference in New Issue
Block a user