mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-07 15:18:29 +00:00
Compare commits
25 Commits
lg-memory-
...
0.148.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2490e8cd46 | ||
|
|
9b67e5a15f | ||
|
|
6ebb6c9b63 | ||
|
|
53f674be60 | ||
|
|
11717a5213 | ||
|
|
b6d699f764 | ||
|
|
5b15061b87 | ||
|
|
1b6b2b36d9 | ||
|
|
3ada4053bd | ||
|
|
e7a5747c6b | ||
|
|
eec1262d4f | ||
|
|
c6caa763d7 | ||
|
|
08fa3797ca | ||
|
|
bf8fa3232b | ||
|
|
a6e60a5d42 | ||
|
|
7b0f3aabd9 | ||
|
|
f071966951 | ||
|
|
318310bb7a | ||
|
|
34a03f882c | ||
|
|
a0fcc0c8d1 | ||
|
|
748c25451c | ||
|
|
a77dcdd419 | ||
|
|
68f5bdf0d9 | ||
|
|
7f83947020 | ||
|
|
ceb310bcde |
286
.cursorrules
286
.cursorrules
@@ -260,7 +260,7 @@ def handle_success(self):
|
|||||||
# Handle success case
|
# Handle success case
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@listen("failure_path")
|
@listen("failure_path")
|
||||||
def handle_failure(self):
|
def handle_failure(self):
|
||||||
# Handle failure case
|
# Handle failure case
|
||||||
pass
|
pass
|
||||||
@@ -288,7 +288,7 @@ class SelectiveFlow(Flow):
|
|||||||
def critical_step(self):
|
def critical_step(self):
|
||||||
# Only this method's state is persisted
|
# Only this method's state is persisted
|
||||||
self.state["important_data"] = "value"
|
self.state["important_data"] = "value"
|
||||||
|
|
||||||
@start()
|
@start()
|
||||||
def temporary_step(self):
|
def temporary_step(self):
|
||||||
# This method's state is not persisted
|
# This method's state is not persisted
|
||||||
@@ -322,20 +322,20 @@ flow.plot("workflow_diagram") # Generates HTML visualization
|
|||||||
class CyclicFlow(Flow):
|
class CyclicFlow(Flow):
|
||||||
max_iterations = 5
|
max_iterations = 5
|
||||||
current_iteration = 0
|
current_iteration = 0
|
||||||
|
|
||||||
@start("loop")
|
@start("loop")
|
||||||
def process_iteration(self):
|
def process_iteration(self):
|
||||||
if self.current_iteration >= self.max_iterations:
|
if self.current_iteration >= self.max_iterations:
|
||||||
return
|
return
|
||||||
# Process current iteration
|
# Process current iteration
|
||||||
self.current_iteration += 1
|
self.current_iteration += 1
|
||||||
|
|
||||||
@router(process_iteration)
|
@router(process_iteration)
|
||||||
def check_continue(self):
|
def check_continue(self):
|
||||||
if self.current_iteration < self.max_iterations:
|
if self.current_iteration < self.max_iterations:
|
||||||
return "loop" # Continue cycling
|
return "loop" # Continue cycling
|
||||||
return "complete"
|
return "complete"
|
||||||
|
|
||||||
@listen("complete")
|
@listen("complete")
|
||||||
def finalize(self):
|
def finalize(self):
|
||||||
# Final processing
|
# Final processing
|
||||||
@@ -369,7 +369,7 @@ def risky_operation(self):
|
|||||||
self.state["success"] = False
|
self.state["success"] = False
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@listen(risky_operation)
|
@listen(risky_operation)
|
||||||
def handle_result(self, result):
|
def handle_result(self, result):
|
||||||
if self.state.get("success", False):
|
if self.state.get("success", False):
|
||||||
# Handle success case
|
# Handle success case
|
||||||
@@ -390,7 +390,7 @@ class CrewOrchestrationFlow(Flow[WorkflowState]):
|
|||||||
result = research_crew.crew().kickoff(inputs={"topic": self.state.research_topic})
|
result = research_crew.crew().kickoff(inputs={"topic": self.state.research_topic})
|
||||||
self.state.research_results = result.raw
|
self.state.research_results = result.raw
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@listen(research_phase)
|
@listen(research_phase)
|
||||||
def analysis_phase(self, research_results):
|
def analysis_phase(self, research_results):
|
||||||
analysis_crew = AnalysisCrew()
|
analysis_crew = AnalysisCrew()
|
||||||
@@ -400,13 +400,13 @@ class CrewOrchestrationFlow(Flow[WorkflowState]):
|
|||||||
})
|
})
|
||||||
self.state.analysis_results = result.raw
|
self.state.analysis_results = result.raw
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@router(analysis_phase)
|
@router(analysis_phase)
|
||||||
def decide_next_action(self):
|
def decide_next_action(self):
|
||||||
if self.state.analysis_results.confidence > 0.7:
|
if self.state.analysis_results.confidence > 0.7:
|
||||||
return "generate_report"
|
return "generate_report"
|
||||||
return "additional_research"
|
return "additional_research"
|
||||||
|
|
||||||
@listen("generate_report")
|
@listen("generate_report")
|
||||||
def final_report(self):
|
def final_report(self):
|
||||||
reporting_crew = ReportingCrew()
|
reporting_crew = ReportingCrew()
|
||||||
@@ -439,7 +439,7 @@ class CrewOrchestrationFlow(Flow[WorkflowState]):
|
|||||||
## CrewAI Version Compatibility:
|
## CrewAI Version Compatibility:
|
||||||
- Stay updated with CrewAI releases for new features and bug fixes
|
- Stay updated with CrewAI releases for new features and bug fixes
|
||||||
- Test crew functionality when upgrading CrewAI versions
|
- Test crew functionality when upgrading CrewAI versions
|
||||||
- Use version constraints in pyproject.toml (e.g., "crewai[tools]>=0.134.0,<1.0.0")
|
- Use version constraints in pyproject.toml (e.g., "crewai[tools]>=0.140.0,<1.0.0")
|
||||||
- Monitor deprecation warnings for future compatibility
|
- Monitor deprecation warnings for future compatibility
|
||||||
|
|
||||||
## Code Examples and Implementation Patterns
|
## Code Examples and Implementation Patterns
|
||||||
@@ -464,22 +464,22 @@ class ResearchOutput(BaseModel):
|
|||||||
@CrewBase
|
@CrewBase
|
||||||
class ResearchCrew():
|
class ResearchCrew():
|
||||||
"""Advanced research crew with structured outputs and validation"""
|
"""Advanced research crew with structured outputs and validation"""
|
||||||
|
|
||||||
agents: List[BaseAgent]
|
agents: List[BaseAgent]
|
||||||
tasks: List[Task]
|
tasks: List[Task]
|
||||||
|
|
||||||
@before_kickoff
|
@before_kickoff
|
||||||
def setup_environment(self):
|
def setup_environment(self):
|
||||||
"""Initialize environment before crew execution"""
|
"""Initialize environment before crew execution"""
|
||||||
print("🚀 Setting up research environment...")
|
print("🚀 Setting up research environment...")
|
||||||
# Validate API keys, create directories, etc.
|
# Validate API keys, create directories, etc.
|
||||||
|
|
||||||
@after_kickoff
|
@after_kickoff
|
||||||
def cleanup_and_report(self, output):
|
def cleanup_and_report(self, output):
|
||||||
"""Handle post-execution tasks"""
|
"""Handle post-execution tasks"""
|
||||||
print(f"✅ Research completed. Generated {len(output.tasks_output)} task outputs")
|
print(f"✅ Research completed. Generated {len(output.tasks_output)} task outputs")
|
||||||
print(f"📊 Token usage: {output.token_usage}")
|
print(f"📊 Token usage: {output.token_usage}")
|
||||||
|
|
||||||
@agent
|
@agent
|
||||||
def researcher(self) -> Agent:
|
def researcher(self) -> Agent:
|
||||||
return Agent(
|
return Agent(
|
||||||
@@ -490,7 +490,7 @@ class ResearchCrew():
|
|||||||
max_iter=15,
|
max_iter=15,
|
||||||
max_execution_time=1800
|
max_execution_time=1800
|
||||||
)
|
)
|
||||||
|
|
||||||
@agent
|
@agent
|
||||||
def analyst(self) -> Agent:
|
def analyst(self) -> Agent:
|
||||||
return Agent(
|
return Agent(
|
||||||
@@ -499,7 +499,7 @@ class ResearchCrew():
|
|||||||
verbose=True,
|
verbose=True,
|
||||||
memory=True
|
memory=True
|
||||||
)
|
)
|
||||||
|
|
||||||
@task
|
@task
|
||||||
def research_task(self) -> Task:
|
def research_task(self) -> Task:
|
||||||
return Task(
|
return Task(
|
||||||
@@ -507,7 +507,7 @@ class ResearchCrew():
|
|||||||
agent=self.researcher(),
|
agent=self.researcher(),
|
||||||
output_pydantic=ResearchOutput
|
output_pydantic=ResearchOutput
|
||||||
)
|
)
|
||||||
|
|
||||||
@task
|
@task
|
||||||
def validation_task(self) -> Task:
|
def validation_task(self) -> Task:
|
||||||
return Task(
|
return Task(
|
||||||
@@ -517,7 +517,7 @@ class ResearchCrew():
|
|||||||
guardrail=self.validate_research_quality,
|
guardrail=self.validate_research_quality,
|
||||||
max_retries=3
|
max_retries=3
|
||||||
)
|
)
|
||||||
|
|
||||||
def validate_research_quality(self, output) -> tuple[bool, str]:
|
def validate_research_quality(self, output) -> tuple[bool, str]:
|
||||||
"""Custom guardrail to ensure research quality"""
|
"""Custom guardrail to ensure research quality"""
|
||||||
content = output.raw
|
content = output.raw
|
||||||
@@ -526,7 +526,7 @@ class ResearchCrew():
|
|||||||
if not any(keyword in content.lower() for keyword in ['conclusion', 'finding', 'result']):
|
if not any(keyword in content.lower() for keyword in ['conclusion', 'finding', 'result']):
|
||||||
return False, "Missing key analytical elements."
|
return False, "Missing key analytical elements."
|
||||||
return True, content
|
return True, content
|
||||||
|
|
||||||
@crew
|
@crew
|
||||||
def crew(self) -> Crew:
|
def crew(self) -> Crew:
|
||||||
return Crew(
|
return Crew(
|
||||||
@@ -557,13 +557,13 @@ class RobustSearchTool(BaseTool):
|
|||||||
name: str = "robust_search"
|
name: str = "robust_search"
|
||||||
description: str = "Perform web search with retry logic and error handling"
|
description: str = "Perform web search with retry logic and error handling"
|
||||||
args_schema: Type[BaseModel] = SearchInput
|
args_schema: Type[BaseModel] = SearchInput
|
||||||
|
|
||||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.api_key = api_key or os.getenv("SEARCH_API_KEY")
|
self.api_key = api_key or os.getenv("SEARCH_API_KEY")
|
||||||
self.rate_limit_delay = 1.0
|
self.rate_limit_delay = 1.0
|
||||||
self.last_request_time = 0
|
self.last_request_time = 0
|
||||||
|
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=10)
|
wait=wait_exponential(multiplier=1, min=4, max=10)
|
||||||
@@ -575,43 +575,43 @@ class RobustSearchTool(BaseTool):
|
|||||||
time_since_last = time.time() - self.last_request_time
|
time_since_last = time.time() - self.last_request_time
|
||||||
if time_since_last < self.rate_limit_delay:
|
if time_since_last < self.rate_limit_delay:
|
||||||
time.sleep(self.rate_limit_delay - time_since_last)
|
time.sleep(self.rate_limit_delay - time_since_last)
|
||||||
|
|
||||||
# Input validation
|
# Input validation
|
||||||
if not query or len(query.strip()) == 0:
|
if not query or len(query.strip()) == 0:
|
||||||
return "Error: Empty search query provided"
|
return "Error: Empty search query provided"
|
||||||
|
|
||||||
if len(query) > 500:
|
if len(query) > 500:
|
||||||
return "Error: Search query too long (max 500 characters)"
|
return "Error: Search query too long (max 500 characters)"
|
||||||
|
|
||||||
# Perform search
|
# Perform search
|
||||||
results = self._perform_search(query, max_results, timeout)
|
results = self._perform_search(query, max_results, timeout)
|
||||||
self.last_request_time = time.time()
|
self.last_request_time = time.time()
|
||||||
|
|
||||||
return self._format_results(results)
|
return self._format_results(results)
|
||||||
|
|
||||||
except requests.exceptions.Timeout:
|
except requests.exceptions.Timeout:
|
||||||
return f"Search timed out after {timeout} seconds"
|
return f"Search timed out after {timeout} seconds"
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
return f"Search failed due to network error: {str(e)}"
|
return f"Search failed due to network error: {str(e)}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Unexpected error during search: {str(e)}"
|
return f"Unexpected error during search: {str(e)}"
|
||||||
|
|
||||||
def _perform_search(self, query: str, max_results: int, timeout: int) -> List[dict]:
|
def _perform_search(self, query: str, max_results: int, timeout: int) -> List[dict]:
|
||||||
"""Implement actual search logic here"""
|
"""Implement actual search logic here"""
|
||||||
# Your search API implementation
|
# Your search API implementation
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _format_results(self, results: List[dict]) -> str:
|
def _format_results(self, results: List[dict]) -> str:
|
||||||
"""Format search results for LLM consumption"""
|
"""Format search results for LLM consumption"""
|
||||||
if not results:
|
if not results:
|
||||||
return "No results found for the given query."
|
return "No results found for the given query."
|
||||||
|
|
||||||
formatted = "Search Results:\n\n"
|
formatted = "Search Results:\n\n"
|
||||||
for i, result in enumerate(results[:10], 1):
|
for i, result in enumerate(results[:10], 1):
|
||||||
formatted += f"{i}. {result.get('title', 'No title')}\n"
|
formatted += f"{i}. {result.get('title', 'No title')}\n"
|
||||||
formatted += f" URL: {result.get('url', 'No URL')}\n"
|
formatted += f" URL: {result.get('url', 'No URL')}\n"
|
||||||
formatted += f" Summary: {result.get('snippet', 'No summary')}\n\n"
|
formatted += f" Summary: {result.get('snippet', 'No summary')}\n\n"
|
||||||
|
|
||||||
return formatted
|
return formatted
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -623,20 +623,20 @@ from crewai.memory.storage.mem0_storage import Mem0Storage
|
|||||||
|
|
||||||
class AdvancedMemoryManager:
|
class AdvancedMemoryManager:
|
||||||
"""Enhanced memory management for CrewAI applications"""
|
"""Enhanced memory management for CrewAI applications"""
|
||||||
|
|
||||||
def __init__(self, crew, config: dict = None):
|
def __init__(self, crew, config: dict = None):
|
||||||
self.crew = crew
|
self.crew = crew
|
||||||
self.config = config or {}
|
self.config = config or {}
|
||||||
self.setup_memory_systems()
|
self.setup_memory_systems()
|
||||||
|
|
||||||
def setup_memory_systems(self):
|
def setup_memory_systems(self):
|
||||||
"""Configure multiple memory systems"""
|
"""Configure multiple memory systems"""
|
||||||
# Short-term memory for current session
|
# Short-term memory for current session
|
||||||
self.short_term = ShortTermMemory()
|
self.short_term = ShortTermMemory()
|
||||||
|
|
||||||
# Long-term memory for cross-session persistence
|
# Long-term memory for cross-session persistence
|
||||||
self.long_term = LongTermMemory()
|
self.long_term = LongTermMemory()
|
||||||
|
|
||||||
# External memory with Mem0 (if configured)
|
# External memory with Mem0 (if configured)
|
||||||
if self.config.get('use_external_memory'):
|
if self.config.get('use_external_memory'):
|
||||||
self.external = ExternalMemory.create_storage(
|
self.external = ExternalMemory.create_storage(
|
||||||
@@ -649,8 +649,8 @@ class AdvancedMemoryManager:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def save_with_context(self, content: str, memory_type: str = "short_term",
|
def save_with_context(self, content: str, memory_type: str = "short_term",
|
||||||
metadata: dict = None, agent: str = None):
|
metadata: dict = None, agent: str = None):
|
||||||
"""Save content with enhanced metadata"""
|
"""Save content with enhanced metadata"""
|
||||||
enhanced_metadata = {
|
enhanced_metadata = {
|
||||||
@@ -659,14 +659,14 @@ class AdvancedMemoryManager:
|
|||||||
"crew_type": self.crew.__class__.__name__,
|
"crew_type": self.crew.__class__.__name__,
|
||||||
**(metadata or {})
|
**(metadata or {})
|
||||||
}
|
}
|
||||||
|
|
||||||
if memory_type == "short_term":
|
if memory_type == "short_term":
|
||||||
self.short_term.save(content, enhanced_metadata, agent)
|
self.short_term.save(content, enhanced_metadata, agent)
|
||||||
elif memory_type == "long_term":
|
elif memory_type == "long_term":
|
||||||
self.long_term.save(content, enhanced_metadata, agent)
|
self.long_term.save(content, enhanced_metadata, agent)
|
||||||
elif memory_type == "external" and hasattr(self, 'external'):
|
elif memory_type == "external" and hasattr(self, 'external'):
|
||||||
self.external.save(content, enhanced_metadata, agent)
|
self.external.save(content, enhanced_metadata, agent)
|
||||||
|
|
||||||
def search_across_memories(self, query: str, limit: int = 5) -> dict:
|
def search_across_memories(self, query: str, limit: int = 5) -> dict:
|
||||||
"""Search across all memory systems"""
|
"""Search across all memory systems"""
|
||||||
results = {
|
results = {
|
||||||
@@ -674,23 +674,23 @@ class AdvancedMemoryManager:
|
|||||||
"long_term": [],
|
"long_term": [],
|
||||||
"external": []
|
"external": []
|
||||||
}
|
}
|
||||||
|
|
||||||
# Search short-term memory
|
# Search short-term memory
|
||||||
results["short_term"] = self.short_term.search(query, limit=limit)
|
results["short_term"] = self.short_term.search(query, limit=limit)
|
||||||
|
|
||||||
# Search long-term memory
|
# Search long-term memory
|
||||||
results["long_term"] = self.long_term.search(query, limit=limit)
|
results["long_term"] = self.long_term.search(query, limit=limit)
|
||||||
|
|
||||||
# Search external memory (if available)
|
# Search external memory (if available)
|
||||||
if hasattr(self, 'external'):
|
if hasattr(self, 'external'):
|
||||||
results["external"] = self.external.search(query, limit=limit)
|
results["external"] = self.external.search(query, limit=limit)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def cleanup_old_memories(self, days_threshold: int = 30):
|
def cleanup_old_memories(self, days_threshold: int = 30):
|
||||||
"""Clean up old memories based on age"""
|
"""Clean up old memories based on age"""
|
||||||
cutoff_time = time.time() - (days_threshold * 24 * 60 * 60)
|
cutoff_time = time.time() - (days_threshold * 24 * 60 * 60)
|
||||||
|
|
||||||
# Implement cleanup logic based on timestamps in metadata
|
# Implement cleanup logic based on timestamps in metadata
|
||||||
# This would vary based on your specific storage implementation
|
# This would vary based on your specific storage implementation
|
||||||
pass
|
pass
|
||||||
@@ -719,12 +719,12 @@ class TaskMetrics:
|
|||||||
|
|
||||||
class CrewMonitor:
|
class CrewMonitor:
|
||||||
"""Comprehensive monitoring for CrewAI applications"""
|
"""Comprehensive monitoring for CrewAI applications"""
|
||||||
|
|
||||||
def __init__(self, crew_name: str, log_level: str = "INFO"):
|
def __init__(self, crew_name: str, log_level: str = "INFO"):
|
||||||
self.crew_name = crew_name
|
self.crew_name = crew_name
|
||||||
self.metrics: List[TaskMetrics] = []
|
self.metrics: List[TaskMetrics] = []
|
||||||
self.session_start = time.time()
|
self.session_start = time.time()
|
||||||
|
|
||||||
# Setup logging
|
# Setup logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=getattr(logging, log_level),
|
level=getattr(logging, log_level),
|
||||||
@@ -735,7 +735,7 @@ class CrewMonitor:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
self.logger = logging.getLogger(f"CrewAI.{crew_name}")
|
self.logger = logging.getLogger(f"CrewAI.{crew_name}")
|
||||||
|
|
||||||
def start_task_monitoring(self, task_name: str, agent_name: str) -> dict:
|
def start_task_monitoring(self, task_name: str, agent_name: str) -> dict:
|
||||||
"""Start monitoring a task execution"""
|
"""Start monitoring a task execution"""
|
||||||
context = {
|
context = {
|
||||||
@@ -743,16 +743,16 @@ class CrewMonitor:
|
|||||||
"agent_name": agent_name,
|
"agent_name": agent_name,
|
||||||
"start_time": time.time()
|
"start_time": time.time()
|
||||||
}
|
}
|
||||||
|
|
||||||
self.logger.info(f"Task started: {task_name} by {agent_name}")
|
self.logger.info(f"Task started: {task_name} by {agent_name}")
|
||||||
return context
|
return context
|
||||||
|
|
||||||
def end_task_monitoring(self, context: dict, success: bool = True,
|
def end_task_monitoring(self, context: dict, success: bool = True,
|
||||||
tokens_used: int = 0, error: str = None):
|
tokens_used: int = 0, error: str = None):
|
||||||
"""End monitoring and record metrics"""
|
"""End monitoring and record metrics"""
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
duration = end_time - context["start_time"]
|
duration = end_time - context["start_time"]
|
||||||
|
|
||||||
# Get memory usage (if psutil is available)
|
# Get memory usage (if psutil is available)
|
||||||
memory_usage = None
|
memory_usage = None
|
||||||
try:
|
try:
|
||||||
@@ -761,7 +761,7 @@ class CrewMonitor:
|
|||||||
memory_usage = process.memory_info().rss / 1024 / 1024 # MB
|
memory_usage = process.memory_info().rss / 1024 / 1024 # MB
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
metrics = TaskMetrics(
|
metrics = TaskMetrics(
|
||||||
task_name=context["task_name"],
|
task_name=context["task_name"],
|
||||||
agent_name=context["agent_name"],
|
agent_name=context["agent_name"],
|
||||||
@@ -773,29 +773,29 @@ class CrewMonitor:
|
|||||||
error_message=error,
|
error_message=error,
|
||||||
memory_usage_mb=memory_usage
|
memory_usage_mb=memory_usage
|
||||||
)
|
)
|
||||||
|
|
||||||
self.metrics.append(metrics)
|
self.metrics.append(metrics)
|
||||||
|
|
||||||
# Log the completion
|
# Log the completion
|
||||||
status = "SUCCESS" if success else "FAILED"
|
status = "SUCCESS" if success else "FAILED"
|
||||||
self.logger.info(f"Task {status}: {context['task_name']} "
|
self.logger.info(f"Task {status}: {context['task_name']} "
|
||||||
f"(Duration: {duration:.2f}s, Tokens: {tokens_used})")
|
f"(Duration: {duration:.2f}s, Tokens: {tokens_used})")
|
||||||
|
|
||||||
if error:
|
if error:
|
||||||
self.logger.error(f"Task error: {error}")
|
self.logger.error(f"Task error: {error}")
|
||||||
|
|
||||||
def get_performance_summary(self) -> Dict[str, Any]:
|
def get_performance_summary(self) -> Dict[str, Any]:
|
||||||
"""Generate comprehensive performance summary"""
|
"""Generate comprehensive performance summary"""
|
||||||
if not self.metrics:
|
if not self.metrics:
|
||||||
return {"message": "No metrics recorded yet"}
|
return {"message": "No metrics recorded yet"}
|
||||||
|
|
||||||
successful_tasks = [m for m in self.metrics if m.success]
|
successful_tasks = [m for m in self.metrics if m.success]
|
||||||
failed_tasks = [m for m in self.metrics if not m.success]
|
failed_tasks = [m for m in self.metrics if not m.success]
|
||||||
|
|
||||||
total_duration = sum(m.duration for m in self.metrics)
|
total_duration = sum(m.duration for m in self.metrics)
|
||||||
total_tokens = sum(m.tokens_used for m in self.metrics)
|
total_tokens = sum(m.tokens_used for m in self.metrics)
|
||||||
avg_duration = total_duration / len(self.metrics)
|
avg_duration = total_duration / len(self.metrics)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"crew_name": self.crew_name,
|
"crew_name": self.crew_name,
|
||||||
"session_duration": time.time() - self.session_start,
|
"session_duration": time.time() - self.session_start,
|
||||||
@@ -811,7 +811,7 @@ class CrewMonitor:
|
|||||||
"most_token_intensive": max(self.metrics, key=lambda x: x.tokens_used).task_name if self.metrics else None,
|
"most_token_intensive": max(self.metrics, key=lambda x: x.tokens_used).task_name if self.metrics else None,
|
||||||
"common_errors": self._get_common_errors()
|
"common_errors": self._get_common_errors()
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_common_errors(self) -> Dict[str, int]:
|
def _get_common_errors(self) -> Dict[str, int]:
|
||||||
"""Get frequency of common errors"""
|
"""Get frequency of common errors"""
|
||||||
error_counts = {}
|
error_counts = {}
|
||||||
@@ -819,20 +819,20 @@ class CrewMonitor:
|
|||||||
if metric.error_message:
|
if metric.error_message:
|
||||||
error_counts[metric.error_message] = error_counts.get(metric.error_message, 0) + 1
|
error_counts[metric.error_message] = error_counts.get(metric.error_message, 0) + 1
|
||||||
return dict(sorted(error_counts.items(), key=lambda x: x[1], reverse=True))
|
return dict(sorted(error_counts.items(), key=lambda x: x[1], reverse=True))
|
||||||
|
|
||||||
def export_metrics(self, filename: str = None) -> str:
|
def export_metrics(self, filename: str = None) -> str:
|
||||||
"""Export metrics to JSON file"""
|
"""Export metrics to JSON file"""
|
||||||
if not filename:
|
if not filename:
|
||||||
filename = f"crew_metrics_{self.crew_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
filename = f"crew_metrics_{self.crew_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||||
|
|
||||||
export_data = {
|
export_data = {
|
||||||
"summary": self.get_performance_summary(),
|
"summary": self.get_performance_summary(),
|
||||||
"detailed_metrics": [asdict(m) for m in self.metrics]
|
"detailed_metrics": [asdict(m) for m in self.metrics]
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w') as f:
|
||||||
json.dump(export_data, f, indent=2, default=str)
|
json.dump(export_data, f, indent=2, default=str)
|
||||||
|
|
||||||
self.logger.info(f"Metrics exported to {filename}")
|
self.logger.info(f"Metrics exported to {filename}")
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
@@ -847,10 +847,10 @@ def monitored_research_task(self) -> Task:
|
|||||||
if context:
|
if context:
|
||||||
tokens = getattr(task_output, 'token_usage', {}).get('total', 0)
|
tokens = getattr(task_output, 'token_usage', {}).get('total', 0)
|
||||||
monitor.end_task_monitoring(context, success=True, tokens_used=tokens)
|
monitor.end_task_monitoring(context, success=True, tokens_used=tokens)
|
||||||
|
|
||||||
# Start monitoring would be called before task execution
|
# Start monitoring would be called before task execution
|
||||||
# This is a simplified example - in practice you'd integrate this into the task execution flow
|
# This is a simplified example - in practice you'd integrate this into the task execution flow
|
||||||
|
|
||||||
return Task(
|
return Task(
|
||||||
config=self.tasks_config['research_task'],
|
config=self.tasks_config['research_task'],
|
||||||
agent=self.researcher(),
|
agent=self.researcher(),
|
||||||
@@ -872,7 +872,7 @@ class ErrorSeverity(Enum):
|
|||||||
|
|
||||||
class CrewError(Exception):
|
class CrewError(Exception):
|
||||||
"""Base exception for CrewAI applications"""
|
"""Base exception for CrewAI applications"""
|
||||||
def __init__(self, message: str, severity: ErrorSeverity = ErrorSeverity.MEDIUM,
|
def __init__(self, message: str, severity: ErrorSeverity = ErrorSeverity.MEDIUM,
|
||||||
context: dict = None):
|
context: dict = None):
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
self.severity = severity
|
self.severity = severity
|
||||||
@@ -893,19 +893,19 @@ class ConfigurationError(CrewError):
|
|||||||
|
|
||||||
class ErrorHandler:
|
class ErrorHandler:
|
||||||
"""Centralized error handling for CrewAI applications"""
|
"""Centralized error handling for CrewAI applications"""
|
||||||
|
|
||||||
def __init__(self, crew_name: str):
|
def __init__(self, crew_name: str):
|
||||||
self.crew_name = crew_name
|
self.crew_name = crew_name
|
||||||
self.error_log: List[CrewError] = []
|
self.error_log: List[CrewError] = []
|
||||||
self.recovery_strategies: Dict[type, Callable] = {}
|
self.recovery_strategies: Dict[type, Callable] = {}
|
||||||
|
|
||||||
def register_recovery_strategy(self, error_type: type, strategy: Callable):
|
def register_recovery_strategy(self, error_type: type, strategy: Callable):
|
||||||
"""Register a recovery strategy for specific error types"""
|
"""Register a recovery strategy for specific error types"""
|
||||||
self.recovery_strategies[error_type] = strategy
|
self.recovery_strategies[error_type] = strategy
|
||||||
|
|
||||||
def handle_error(self, error: Exception, context: dict = None) -> Any:
|
def handle_error(self, error: Exception, context: dict = None) -> Any:
|
||||||
"""Handle errors with appropriate recovery strategies"""
|
"""Handle errors with appropriate recovery strategies"""
|
||||||
|
|
||||||
# Convert to CrewError if needed
|
# Convert to CrewError if needed
|
||||||
if not isinstance(error, CrewError):
|
if not isinstance(error, CrewError):
|
||||||
crew_error = CrewError(
|
crew_error = CrewError(
|
||||||
@@ -915,11 +915,11 @@ class ErrorHandler:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
crew_error = error
|
crew_error = error
|
||||||
|
|
||||||
# Log the error
|
# Log the error
|
||||||
self.error_log.append(crew_error)
|
self.error_log.append(crew_error)
|
||||||
self._log_error(crew_error)
|
self._log_error(crew_error)
|
||||||
|
|
||||||
# Apply recovery strategy if available
|
# Apply recovery strategy if available
|
||||||
error_type = type(error)
|
error_type = type(error)
|
||||||
if error_type in self.recovery_strategies:
|
if error_type in self.recovery_strategies:
|
||||||
@@ -931,21 +931,21 @@ class ErrorHandler:
|
|||||||
ErrorSeverity.HIGH,
|
ErrorSeverity.HIGH,
|
||||||
{"original_error": str(error), "recovery_error": str(recovery_error)}
|
{"original_error": str(error), "recovery_error": str(recovery_error)}
|
||||||
))
|
))
|
||||||
|
|
||||||
# If critical, re-raise
|
# If critical, re-raise
|
||||||
if crew_error.severity == ErrorSeverity.CRITICAL:
|
if crew_error.severity == ErrorSeverity.CRITICAL:
|
||||||
raise crew_error
|
raise crew_error
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _log_error(self, error: CrewError):
|
def _log_error(self, error: CrewError):
|
||||||
"""Log error with appropriate level based on severity"""
|
"""Log error with appropriate level based on severity"""
|
||||||
logger = logging.getLogger(f"CrewAI.{self.crew_name}.ErrorHandler")
|
logger = logging.getLogger(f"CrewAI.{self.crew_name}.ErrorHandler")
|
||||||
|
|
||||||
error_msg = f"[{error.severity.value.upper()}] {error}"
|
error_msg = f"[{error.severity.value.upper()}] {error}"
|
||||||
if error.context:
|
if error.context:
|
||||||
error_msg += f" | Context: {error.context}"
|
error_msg += f" | Context: {error.context}"
|
||||||
|
|
||||||
if error.severity in [ErrorSeverity.HIGH, ErrorSeverity.CRITICAL]:
|
if error.severity in [ErrorSeverity.HIGH, ErrorSeverity.CRITICAL]:
|
||||||
logger.error(error_msg)
|
logger.error(error_msg)
|
||||||
logger.error(f"Stack trace: {traceback.format_exc()}")
|
logger.error(f"Stack trace: {traceback.format_exc()}")
|
||||||
@@ -953,16 +953,16 @@ class ErrorHandler:
|
|||||||
logger.warning(error_msg)
|
logger.warning(error_msg)
|
||||||
else:
|
else:
|
||||||
logger.info(error_msg)
|
logger.info(error_msg)
|
||||||
|
|
||||||
def get_error_summary(self) -> Dict[str, Any]:
|
def get_error_summary(self) -> Dict[str, Any]:
|
||||||
"""Get summary of errors encountered"""
|
"""Get summary of errors encountered"""
|
||||||
if not self.error_log:
|
if not self.error_log:
|
||||||
return {"total_errors": 0}
|
return {"total_errors": 0}
|
||||||
|
|
||||||
severity_counts = {}
|
severity_counts = {}
|
||||||
for error in self.error_log:
|
for error in self.error_log:
|
||||||
severity_counts[error.severity.value] = severity_counts.get(error.severity.value, 0) + 1
|
severity_counts[error.severity.value] = severity_counts.get(error.severity.value, 0) + 1
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total_errors": len(self.error_log),
|
"total_errors": len(self.error_log),
|
||||||
"severity_breakdown": severity_counts,
|
"severity_breakdown": severity_counts,
|
||||||
@@ -1004,7 +1004,7 @@ def robust_task(self) -> Task:
|
|||||||
# Use fallback response
|
# Use fallback response
|
||||||
return "Task failed, using fallback response"
|
return "Task failed, using fallback response"
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
return Task(
|
return Task(
|
||||||
config=self.tasks_config['research_task'],
|
config=self.tasks_config['research_task'],
|
||||||
agent=self.researcher()
|
agent=self.researcher()
|
||||||
@@ -1020,60 +1020,60 @@ from pydantic import BaseSettings, Field, validator
|
|||||||
|
|
||||||
class Environment(str, Enum):
|
class Environment(str, Enum):
|
||||||
DEVELOPMENT = "development"
|
DEVELOPMENT = "development"
|
||||||
TESTING = "testing"
|
TESTING = "testing"
|
||||||
STAGING = "staging"
|
STAGING = "staging"
|
||||||
PRODUCTION = "production"
|
PRODUCTION = "production"
|
||||||
|
|
||||||
class CrewAISettings(BaseSettings):
|
class CrewAISettings(BaseSettings):
|
||||||
"""Comprehensive settings management for CrewAI applications"""
|
"""Comprehensive settings management for CrewAI applications"""
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
environment: Environment = Field(default=Environment.DEVELOPMENT)
|
environment: Environment = Field(default=Environment.DEVELOPMENT)
|
||||||
debug: bool = Field(default=True)
|
debug: bool = Field(default=True)
|
||||||
|
|
||||||
# API Keys (loaded from environment)
|
# API Keys (loaded from environment)
|
||||||
openai_api_key: Optional[str] = Field(default=None, env="OPENAI_API_KEY")
|
openai_api_key: Optional[str] = Field(default=None, env="OPENAI_API_KEY")
|
||||||
anthropic_api_key: Optional[str] = Field(default=None, env="ANTHROPIC_API_KEY")
|
anthropic_api_key: Optional[str] = Field(default=None, env="ANTHROPIC_API_KEY")
|
||||||
serper_api_key: Optional[str] = Field(default=None, env="SERPER_API_KEY")
|
serper_api_key: Optional[str] = Field(default=None, env="SERPER_API_KEY")
|
||||||
mem0_api_key: Optional[str] = Field(default=None, env="MEM0_API_KEY")
|
mem0_api_key: Optional[str] = Field(default=None, env="MEM0_API_KEY")
|
||||||
|
|
||||||
# CrewAI Configuration
|
# CrewAI Configuration
|
||||||
crew_max_rpm: int = Field(default=100)
|
crew_max_rpm: int = Field(default=100)
|
||||||
crew_max_execution_time: int = Field(default=3600) # 1 hour
|
crew_max_execution_time: int = Field(default=3600) # 1 hour
|
||||||
default_llm_model: str = Field(default="gpt-4")
|
default_llm_model: str = Field(default="gpt-4")
|
||||||
fallback_llm_model: str = Field(default="gpt-3.5-turbo")
|
fallback_llm_model: str = Field(default="gpt-3.5-turbo")
|
||||||
|
|
||||||
# Memory and Storage
|
# Memory and Storage
|
||||||
crewai_storage_dir: str = Field(default="./storage", env="CREWAI_STORAGE_DIR")
|
crewai_storage_dir: str = Field(default="./storage", env="CREWAI_STORAGE_DIR")
|
||||||
memory_enabled: bool = Field(default=True)
|
memory_enabled: bool = Field(default=True)
|
||||||
memory_cleanup_interval: int = Field(default=86400) # 24 hours in seconds
|
memory_cleanup_interval: int = Field(default=86400) # 24 hours in seconds
|
||||||
|
|
||||||
# Performance
|
# Performance
|
||||||
enable_caching: bool = Field(default=True)
|
enable_caching: bool = Field(default=True)
|
||||||
max_retries: int = Field(default=3)
|
max_retries: int = Field(default=3)
|
||||||
retry_delay: float = Field(default=1.0)
|
retry_delay: float = Field(default=1.0)
|
||||||
|
|
||||||
# Monitoring
|
# Monitoring
|
||||||
enable_monitoring: bool = Field(default=True)
|
enable_monitoring: bool = Field(default=True)
|
||||||
log_level: str = Field(default="INFO")
|
log_level: str = Field(default="INFO")
|
||||||
metrics_export_interval: int = Field(default=3600) # 1 hour
|
metrics_export_interval: int = Field(default=3600) # 1 hour
|
||||||
|
|
||||||
# Security
|
# Security
|
||||||
input_sanitization: bool = Field(default=True)
|
input_sanitization: bool = Field(default=True)
|
||||||
max_input_length: int = Field(default=10000)
|
max_input_length: int = Field(default=10000)
|
||||||
allowed_file_types: list = Field(default=["txt", "md", "pdf", "docx"])
|
allowed_file_types: list = Field(default=["txt", "md", "pdf", "docx"])
|
||||||
|
|
||||||
@validator('environment', pre=True)
|
@validator('environment', pre=True)
|
||||||
def set_debug_based_on_env(cls, v):
|
def set_debug_based_on_env(cls, v):
|
||||||
return v
|
return v
|
||||||
|
|
||||||
@validator('debug')
|
@validator('debug')
|
||||||
def set_debug_from_env(cls, v, values):
|
def set_debug_from_env(cls, v, values):
|
||||||
env = values.get('environment')
|
env = values.get('environment')
|
||||||
if env == Environment.PRODUCTION:
|
if env == Environment.PRODUCTION:
|
||||||
return False
|
return False
|
||||||
return v
|
return v
|
||||||
|
|
||||||
@validator('openai_api_key')
|
@validator('openai_api_key')
|
||||||
def validate_openai_key(cls, v):
|
def validate_openai_key(cls, v):
|
||||||
if not v:
|
if not v:
|
||||||
@@ -1081,15 +1081,15 @@ class CrewAISettings(BaseSettings):
|
|||||||
if not v.startswith('sk-'):
|
if not v.startswith('sk-'):
|
||||||
raise ValueError("Invalid OpenAI API key format")
|
raise ValueError("Invalid OpenAI API key format")
|
||||||
return v
|
return v
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_production(self) -> bool:
|
def is_production(self) -> bool:
|
||||||
return self.environment == Environment.PRODUCTION
|
return self.environment == Environment.PRODUCTION
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_development(self) -> bool:
|
def is_development(self) -> bool:
|
||||||
return self.environment == Environment.DEVELOPMENT
|
return self.environment == Environment.DEVELOPMENT
|
||||||
|
|
||||||
def get_llm_config(self) -> Dict[str, Any]:
|
def get_llm_config(self) -> Dict[str, Any]:
|
||||||
"""Get LLM configuration based on environment"""
|
"""Get LLM configuration based on environment"""
|
||||||
config = {
|
config = {
|
||||||
@@ -1098,12 +1098,12 @@ class CrewAISettings(BaseSettings):
|
|||||||
"max_tokens": 4000 if self.is_production else 2000,
|
"max_tokens": 4000 if self.is_production else 2000,
|
||||||
"timeout": 60
|
"timeout": 60
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.is_development:
|
if self.is_development:
|
||||||
config["model"] = self.fallback_llm_model
|
config["model"] = self.fallback_llm_model
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
def get_memory_config(self) -> Dict[str, Any]:
|
def get_memory_config(self) -> Dict[str, Any]:
|
||||||
"""Get memory configuration"""
|
"""Get memory configuration"""
|
||||||
return {
|
return {
|
||||||
@@ -1112,7 +1112,7 @@ class CrewAISettings(BaseSettings):
|
|||||||
"cleanup_interval": self.memory_cleanup_interval,
|
"cleanup_interval": self.memory_cleanup_interval,
|
||||||
"provider": "mem0" if self.mem0_api_key and self.is_production else "local"
|
"provider": "mem0" if self.mem0_api_key and self.is_production else "local"
|
||||||
}
|
}
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
env_file = ".env"
|
env_file = ".env"
|
||||||
env_file_encoding = 'utf-8'
|
env_file_encoding = 'utf-8'
|
||||||
@@ -1125,25 +1125,25 @@ settings = CrewAISettings()
|
|||||||
@CrewBase
|
@CrewBase
|
||||||
class ConfigurableCrew():
|
class ConfigurableCrew():
|
||||||
"""Crew that uses centralized configuration"""
|
"""Crew that uses centralized configuration"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
self.validate_configuration()
|
self.validate_configuration()
|
||||||
|
|
||||||
def validate_configuration(self):
|
def validate_configuration(self):
|
||||||
"""Validate configuration before crew execution"""
|
"""Validate configuration before crew execution"""
|
||||||
required_keys = [self.settings.openai_api_key]
|
required_keys = [self.settings.openai_api_key]
|
||||||
if not all(required_keys):
|
if not all(required_keys):
|
||||||
raise ConfigurationError("Missing required API keys")
|
raise ConfigurationError("Missing required API keys")
|
||||||
|
|
||||||
if not os.path.exists(self.settings.crewai_storage_dir):
|
if not os.path.exists(self.settings.crewai_storage_dir):
|
||||||
os.makedirs(self.settings.crewai_storage_dir, exist_ok=True)
|
os.makedirs(self.settings.crewai_storage_dir, exist_ok=True)
|
||||||
|
|
||||||
@agent
|
@agent
|
||||||
def adaptive_agent(self) -> Agent:
|
def adaptive_agent(self) -> Agent:
|
||||||
"""Agent that adapts to configuration"""
|
"""Agent that adapts to configuration"""
|
||||||
llm_config = self.settings.get_llm_config()
|
llm_config = self.settings.get_llm_config()
|
||||||
|
|
||||||
return Agent(
|
return Agent(
|
||||||
config=self.agents_config['researcher'],
|
config=self.agents_config['researcher'],
|
||||||
llm=llm_config["model"],
|
llm=llm_config["model"],
|
||||||
@@ -1163,7 +1163,7 @@ from crewai.tasks.task_output import TaskOutput
|
|||||||
|
|
||||||
class CrewAITestFramework:
|
class CrewAITestFramework:
|
||||||
"""Comprehensive testing framework for CrewAI applications"""
|
"""Comprehensive testing framework for CrewAI applications"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_mock_agent(role: str = "test_agent", tools: list = None) -> Mock:
|
def create_mock_agent(role: str = "test_agent", tools: list = None) -> Mock:
|
||||||
"""Create a mock agent for testing"""
|
"""Create a mock agent for testing"""
|
||||||
@@ -1175,9 +1175,9 @@ class CrewAITestFramework:
|
|||||||
mock_agent.llm = "gpt-3.5-turbo"
|
mock_agent.llm = "gpt-3.5-turbo"
|
||||||
mock_agent.verbose = False
|
mock_agent.verbose = False
|
||||||
return mock_agent
|
return mock_agent
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_mock_task_output(content: str, success: bool = True,
|
def create_mock_task_output(content: str, success: bool = True,
|
||||||
tokens: int = 100) -> TaskOutput:
|
tokens: int = 100) -> TaskOutput:
|
||||||
"""Create a mock task output for testing"""
|
"""Create a mock task output for testing"""
|
||||||
return TaskOutput(
|
return TaskOutput(
|
||||||
@@ -1187,13 +1187,13 @@ class CrewAITestFramework:
|
|||||||
pydantic=None,
|
pydantic=None,
|
||||||
json_dict=None
|
json_dict=None
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_test_crew(agents: list = None, tasks: list = None) -> Crew:
|
def create_test_crew(agents: list = None, tasks: list = None) -> Crew:
|
||||||
"""Create a test crew with mock components"""
|
"""Create a test crew with mock components"""
|
||||||
test_agents = agents or [CrewAITestFramework.create_mock_agent()]
|
test_agents = agents or [CrewAITestFramework.create_mock_agent()]
|
||||||
test_tasks = tasks or []
|
test_tasks = tasks or []
|
||||||
|
|
||||||
return Crew(
|
return Crew(
|
||||||
agents=test_agents,
|
agents=test_agents,
|
||||||
tasks=test_tasks,
|
tasks=test_tasks,
|
||||||
@@ -1203,53 +1203,53 @@ class CrewAITestFramework:
|
|||||||
# Example test cases
|
# Example test cases
|
||||||
class TestResearchCrew:
|
class TestResearchCrew:
|
||||||
"""Test cases for research crew functionality"""
|
"""Test cases for research crew functionality"""
|
||||||
|
|
||||||
def setup_method(self):
|
def setup_method(self):
|
||||||
"""Setup test environment"""
|
"""Setup test environment"""
|
||||||
self.framework = CrewAITestFramework()
|
self.framework = CrewAITestFramework()
|
||||||
self.mock_serper = Mock()
|
self.mock_serper = Mock()
|
||||||
|
|
||||||
@patch('crewai_tools.SerperDevTool')
|
@patch('crewai_tools.SerperDevTool')
|
||||||
def test_agent_creation(self, mock_serper_tool):
|
def test_agent_creation(self, mock_serper_tool):
|
||||||
"""Test agent creation with proper configuration"""
|
"""Test agent creation with proper configuration"""
|
||||||
mock_serper_tool.return_value = self.mock_serper
|
mock_serper_tool.return_value = self.mock_serper
|
||||||
|
|
||||||
crew = ResearchCrew()
|
crew = ResearchCrew()
|
||||||
researcher = crew.researcher()
|
researcher = crew.researcher()
|
||||||
|
|
||||||
assert researcher.role == "Senior Research Analyst"
|
assert researcher.role == "Senior Research Analyst"
|
||||||
assert len(researcher.tools) > 0
|
assert len(researcher.tools) > 0
|
||||||
assert researcher.verbose is True
|
assert researcher.verbose is True
|
||||||
|
|
||||||
def test_task_validation(self):
|
def test_task_validation(self):
|
||||||
"""Test task validation logic"""
|
"""Test task validation logic"""
|
||||||
crew = ResearchCrew()
|
crew = ResearchCrew()
|
||||||
|
|
||||||
# Test valid output
|
# Test valid output
|
||||||
valid_output = self.framework.create_mock_task_output(
|
valid_output = self.framework.create_mock_task_output(
|
||||||
"This is a comprehensive research summary with conclusions and findings."
|
"This is a comprehensive research summary with conclusions and findings."
|
||||||
)
|
)
|
||||||
is_valid, message = crew.validate_research_quality(valid_output)
|
is_valid, message = crew.validate_research_quality(valid_output)
|
||||||
assert is_valid is True
|
assert is_valid is True
|
||||||
|
|
||||||
# Test invalid output (too short)
|
# Test invalid output (too short)
|
||||||
invalid_output = self.framework.create_mock_task_output("Too short")
|
invalid_output = self.framework.create_mock_task_output("Too short")
|
||||||
is_valid, message = crew.validate_research_quality(invalid_output)
|
is_valid, message = crew.validate_research_quality(invalid_output)
|
||||||
assert is_valid is False
|
assert is_valid is False
|
||||||
assert "brief" in message.lower()
|
assert "brief" in message.lower()
|
||||||
|
|
||||||
@patch('requests.get')
|
@patch('requests.get')
|
||||||
def test_tool_error_handling(self, mock_requests):
|
def test_tool_error_handling(self, mock_requests):
|
||||||
"""Test tool error handling and recovery"""
|
"""Test tool error handling and recovery"""
|
||||||
# Simulate network error
|
# Simulate network error
|
||||||
mock_requests.side_effect = requests.exceptions.RequestException("Network error")
|
mock_requests.side_effect = requests.exceptions.RequestException("Network error")
|
||||||
|
|
||||||
tool = RobustSearchTool()
|
tool = RobustSearchTool()
|
||||||
result = tool._run("test query")
|
result = tool._run("test query")
|
||||||
|
|
||||||
assert "network error" in result.lower()
|
assert "network error" in result.lower()
|
||||||
assert "failed" in result.lower()
|
assert "failed" in result.lower()
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_crew_execution_flow(self):
|
async def test_crew_execution_flow(self):
|
||||||
"""Test complete crew execution with mocked dependencies"""
|
"""Test complete crew execution with mocked dependencies"""
|
||||||
@@ -1257,18 +1257,18 @@ class TestResearchCrew:
|
|||||||
mock_execute.return_value = self.framework.create_mock_task_output(
|
mock_execute.return_value = self.framework.create_mock_task_output(
|
||||||
"Research completed successfully with findings and recommendations."
|
"Research completed successfully with findings and recommendations."
|
||||||
)
|
)
|
||||||
|
|
||||||
crew = ResearchCrew()
|
crew = ResearchCrew()
|
||||||
result = crew.crew().kickoff(inputs={"topic": "AI testing"})
|
result = crew.crew().kickoff(inputs={"topic": "AI testing"})
|
||||||
|
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert "successfully" in result.raw.lower()
|
assert "successfully" in result.raw.lower()
|
||||||
|
|
||||||
def test_memory_integration(self):
|
def test_memory_integration(self):
|
||||||
"""Test memory system integration"""
|
"""Test memory system integration"""
|
||||||
crew = ResearchCrew()
|
crew = ResearchCrew()
|
||||||
memory_manager = AdvancedMemoryManager(crew)
|
memory_manager = AdvancedMemoryManager(crew)
|
||||||
|
|
||||||
# Test saving to memory
|
# Test saving to memory
|
||||||
test_content = "Important research finding about AI"
|
test_content = "Important research finding about AI"
|
||||||
memory_manager.save_with_context(
|
memory_manager.save_with_context(
|
||||||
@@ -1277,34 +1277,34 @@ class TestResearchCrew:
|
|||||||
metadata={"importance": "high"},
|
metadata={"importance": "high"},
|
||||||
agent="researcher"
|
agent="researcher"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Test searching memory
|
# Test searching memory
|
||||||
results = memory_manager.search_across_memories("AI research")
|
results = memory_manager.search_across_memories("AI research")
|
||||||
assert "short_term" in results
|
assert "short_term" in results
|
||||||
|
|
||||||
def test_error_handling_workflow(self):
|
def test_error_handling_workflow(self):
|
||||||
"""Test error handling and recovery mechanisms"""
|
"""Test error handling and recovery mechanisms"""
|
||||||
error_handler = ErrorHandler("test_crew")
|
error_handler = ErrorHandler("test_crew")
|
||||||
|
|
||||||
# Test error registration and handling
|
# Test error registration and handling
|
||||||
test_error = TaskExecutionError("Test task failed", ErrorSeverity.MEDIUM)
|
test_error = TaskExecutionError("Test task failed", ErrorSeverity.MEDIUM)
|
||||||
result = error_handler.handle_error(test_error)
|
result = error_handler.handle_error(test_error)
|
||||||
|
|
||||||
assert len(error_handler.error_log) == 1
|
assert len(error_handler.error_log) == 1
|
||||||
assert error_handler.error_log[0].severity == ErrorSeverity.MEDIUM
|
assert error_handler.error_log[0].severity == ErrorSeverity.MEDIUM
|
||||||
|
|
||||||
def test_configuration_validation(self):
|
def test_configuration_validation(self):
|
||||||
"""Test configuration validation"""
|
"""Test configuration validation"""
|
||||||
# Test with missing API key
|
# Test with missing API key
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
with patch.dict(os.environ, {}, clear=True):
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
settings = CrewAISettings()
|
settings = CrewAISettings()
|
||||||
|
|
||||||
# Test with valid configuration
|
# Test with valid configuration
|
||||||
with patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key"}):
|
with patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key"}):
|
||||||
settings = CrewAISettings()
|
settings = CrewAISettings()
|
||||||
assert settings.openai_api_key == "sk-test-key"
|
assert settings.openai_api_key == "sk-test-key"
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
def test_end_to_end_workflow(self):
|
def test_end_to_end_workflow(self):
|
||||||
"""Integration test for complete workflow"""
|
"""Integration test for complete workflow"""
|
||||||
@@ -1315,41 +1315,41 @@ class TestResearchCrew:
|
|||||||
# Performance testing
|
# Performance testing
|
||||||
class TestCrewPerformance:
|
class TestCrewPerformance:
|
||||||
"""Performance tests for CrewAI applications"""
|
"""Performance tests for CrewAI applications"""
|
||||||
|
|
||||||
def test_memory_usage(self):
|
def test_memory_usage(self):
|
||||||
"""Test memory usage during crew execution"""
|
"""Test memory usage during crew execution"""
|
||||||
import psutil
|
import psutil
|
||||||
import gc
|
import gc
|
||||||
|
|
||||||
process = psutil.Process()
|
process = psutil.Process()
|
||||||
initial_memory = process.memory_info().rss
|
initial_memory = process.memory_info().rss
|
||||||
|
|
||||||
# Create and run crew multiple times
|
# Create and run crew multiple times
|
||||||
for i in range(10):
|
for i in range(10):
|
||||||
crew = ResearchCrew()
|
crew = ResearchCrew()
|
||||||
# Simulate crew execution
|
# Simulate crew execution
|
||||||
del crew
|
del crew
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
final_memory = process.memory_info().rss
|
final_memory = process.memory_info().rss
|
||||||
memory_increase = final_memory - initial_memory
|
memory_increase = final_memory - initial_memory
|
||||||
|
|
||||||
# Assert memory increase is reasonable (less than 100MB)
|
# Assert memory increase is reasonable (less than 100MB)
|
||||||
assert memory_increase < 100 * 1024 * 1024
|
assert memory_increase < 100 * 1024 * 1024
|
||||||
|
|
||||||
def test_concurrent_execution(self):
|
def test_concurrent_execution(self):
|
||||||
"""Test concurrent crew execution"""
|
"""Test concurrent crew execution"""
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
|
||||||
def run_crew(crew_id):
|
def run_crew(crew_id):
|
||||||
crew = ResearchCrew()
|
crew = ResearchCrew()
|
||||||
# Simulate execution
|
# Simulate execution
|
||||||
return f"crew_{crew_id}_completed"
|
return f"crew_{crew_id}_completed"
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
futures = [executor.submit(run_crew, i) for i in range(5)]
|
futures = [executor.submit(run_crew, i) for i in range(5)]
|
||||||
results = [future.result() for future in futures]
|
results = [future.result() for future in futures]
|
||||||
|
|
||||||
assert len(results) == 5
|
assert len(results) == 5
|
||||||
assert all("completed" in result for result in results)
|
assert all("completed" in result for result in results)
|
||||||
|
|
||||||
@@ -1400,7 +1400,7 @@ class TestCrewPerformance:
|
|||||||
|
|
||||||
### Development:
|
### Development:
|
||||||
1. Always use .env files for sensitive configuration
|
1. Always use .env files for sensitive configuration
|
||||||
2. Implement comprehensive error handling and logging
|
2. Implement comprehensive error handling and logging
|
||||||
3. Use structured outputs with Pydantic for reliability
|
3. Use structured outputs with Pydantic for reliability
|
||||||
4. Test crew functionality with different input scenarios
|
4. Test crew functionality with different input scenarios
|
||||||
5. Follow CrewAI patterns and conventions consistently
|
5. Follow CrewAI patterns and conventions consistently
|
||||||
@@ -1426,4 +1426,4 @@ class TestCrewPerformance:
|
|||||||
5. Use async patterns for I/O-bound operations
|
5. Use async patterns for I/O-bound operations
|
||||||
6. Implement proper connection pooling and resource management
|
6. Implement proper connection pooling and resource management
|
||||||
7. Profile and optimize critical paths
|
7. Profile and optimize critical paths
|
||||||
8. Plan for horizontal scaling when needed
|
8. Plan for horizontal scaling when needed
|
||||||
|
|||||||
36
.github/workflows/tests.yml
vendored
36
.github/workflows/tests.yml
vendored
@@ -7,14 +7,18 @@ permissions:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
OPENAI_API_KEY: fake-api-key
|
OPENAI_API_KEY: fake-api-key
|
||||||
|
PYTHONUNBUFFERED: 1
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
tests:
|
tests:
|
||||||
|
name: tests (${{ matrix.python-version }})
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
timeout-minutes: 15
|
timeout-minutes: 15
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ['3.10', '3.11', '3.12', '3.13']
|
python-version: ['3.10', '3.11', '3.12', '3.13']
|
||||||
|
group: [1, 2, 3, 4, 5, 6, 7, 8]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -23,6 +27,9 @@ jobs:
|
|||||||
uses: astral-sh/setup-uv@v3
|
uses: astral-sh/setup-uv@v3
|
||||||
with:
|
with:
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
|
cache-dependency-glob: |
|
||||||
|
**/pyproject.toml
|
||||||
|
**/uv.lock
|
||||||
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
run: uv python install ${{ matrix.python-version }}
|
run: uv python install ${{ matrix.python-version }}
|
||||||
@@ -30,5 +37,30 @@ jobs:
|
|||||||
- name: Install the project
|
- name: Install the project
|
||||||
run: uv sync --dev --all-extras
|
run: uv sync --dev --all-extras
|
||||||
|
|
||||||
- name: Run tests
|
- name: Install SQLite with FTS5 support
|
||||||
run: uv run pytest --block-network --timeout=60 -vv
|
run: |
|
||||||
|
# WORKAROUND: GitHub Actions' Ubuntu runner uses SQLite without FTS5 support compiled in.
|
||||||
|
# This is a temporary fix until the runner includes SQLite with FTS5 or Python's sqlite3
|
||||||
|
# module is compiled with FTS5 support by default.
|
||||||
|
# TODO: Remove this workaround once GitHub Actions runners include SQLite FTS5 support
|
||||||
|
|
||||||
|
# Install pysqlite3-binary which has FTS5 support
|
||||||
|
uv pip install pysqlite3-binary
|
||||||
|
# Create a sitecustomize.py to override sqlite3 with pysqlite3
|
||||||
|
mkdir -p .pytest_sqlite_override
|
||||||
|
echo "import sys; import pysqlite3; sys.modules['sqlite3'] = pysqlite3" > .pytest_sqlite_override/sitecustomize.py
|
||||||
|
# Test FTS5 availability
|
||||||
|
PYTHONPATH=.pytest_sqlite_override uv run python -c "import sqlite3; print(f'SQLite version: {sqlite3.sqlite_version}')"
|
||||||
|
PYTHONPATH=.pytest_sqlite_override uv run python -c "import sqlite3; conn = sqlite3.connect(':memory:'); conn.execute('CREATE VIRTUAL TABLE test USING fts5(content)'); print('FTS5 module available')"
|
||||||
|
|
||||||
|
- name: Run tests (group ${{ matrix.group }} of 8)
|
||||||
|
run: |
|
||||||
|
PYTHONPATH=.pytest_sqlite_override uv run pytest \
|
||||||
|
--block-network \
|
||||||
|
--timeout=30 \
|
||||||
|
-vv \
|
||||||
|
--splits 8 \
|
||||||
|
--group ${{ matrix.group }} \
|
||||||
|
--durations=10 \
|
||||||
|
-n auto \
|
||||||
|
--maxfail=3
|
||||||
|
|||||||
@@ -9,12 +9,7 @@
|
|||||||
},
|
},
|
||||||
"favicon": "/images/favicon.svg",
|
"favicon": "/images/favicon.svg",
|
||||||
"contextual": {
|
"contextual": {
|
||||||
"options": [
|
"options": ["copy", "view", "chatgpt", "claude"]
|
||||||
"copy",
|
|
||||||
"view",
|
|
||||||
"chatgpt",
|
|
||||||
"claude"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"navigation": {
|
"navigation": {
|
||||||
"languages": [
|
"languages": [
|
||||||
@@ -55,32 +50,22 @@
|
|||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"group": "Get Started",
|
"group": "Get Started",
|
||||||
"pages": [
|
"pages": ["en/introduction", "en/installation", "en/quickstart"]
|
||||||
"en/introduction",
|
|
||||||
"en/installation",
|
|
||||||
"en/quickstart"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Guides",
|
"group": "Guides",
|
||||||
"pages": [
|
"pages": [
|
||||||
{
|
{
|
||||||
"group": "Strategy",
|
"group": "Strategy",
|
||||||
"pages": [
|
"pages": ["en/guides/concepts/evaluating-use-cases"]
|
||||||
"en/guides/concepts/evaluating-use-cases"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Agents",
|
"group": "Agents",
|
||||||
"pages": [
|
"pages": ["en/guides/agents/crafting-effective-agents"]
|
||||||
"en/guides/agents/crafting-effective-agents"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Crews",
|
"group": "Crews",
|
||||||
"pages": [
|
"pages": ["en/guides/crews/first-crew"]
|
||||||
"en/guides/crews/first-crew"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Flows",
|
"group": "Flows",
|
||||||
@@ -94,7 +79,6 @@
|
|||||||
"pages": [
|
"pages": [
|
||||||
"en/guides/advanced/customizing-prompts",
|
"en/guides/advanced/customizing-prompts",
|
||||||
"en/guides/advanced/fingerprinting"
|
"en/guides/advanced/fingerprinting"
|
||||||
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -241,6 +225,7 @@
|
|||||||
"en/observability/langtrace",
|
"en/observability/langtrace",
|
||||||
"en/observability/maxim",
|
"en/observability/maxim",
|
||||||
"en/observability/mlflow",
|
"en/observability/mlflow",
|
||||||
|
"en/observability/neatlogs",
|
||||||
"en/observability/openlit",
|
"en/observability/openlit",
|
||||||
"en/observability/opik",
|
"en/observability/opik",
|
||||||
"en/observability/patronus-evaluation",
|
"en/observability/patronus-evaluation",
|
||||||
@@ -274,9 +259,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Telemetry",
|
"group": "Telemetry",
|
||||||
"pages": [
|
"pages": ["en/telemetry"]
|
||||||
"en/telemetry"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -285,9 +268,7 @@
|
|||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"group": "Getting Started",
|
"group": "Getting Started",
|
||||||
"pages": [
|
"pages": ["en/enterprise/introduction"]
|
||||||
"en/enterprise/introduction"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Features",
|
"group": "Features",
|
||||||
@@ -296,7 +277,8 @@
|
|||||||
"en/enterprise/features/webhook-streaming",
|
"en/enterprise/features/webhook-streaming",
|
||||||
"en/enterprise/features/traces",
|
"en/enterprise/features/traces",
|
||||||
"en/enterprise/features/hallucination-guardrail",
|
"en/enterprise/features/hallucination-guardrail",
|
||||||
"en/enterprise/features/integrations"
|
"en/enterprise/features/integrations",
|
||||||
|
"en/enterprise/features/agent-repositories"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -341,9 +323,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Resources",
|
"group": "Resources",
|
||||||
"pages": [
|
"pages": ["en/enterprise/resources/frequently-asked-questions"]
|
||||||
"en/enterprise/resources/frequently-asked-questions"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -352,9 +332,7 @@
|
|||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"group": "Getting Started",
|
"group": "Getting Started",
|
||||||
"pages": [
|
"pages": ["en/api-reference/introduction"]
|
||||||
"en/api-reference/introduction"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Endpoints",
|
"group": "Endpoints",
|
||||||
@@ -364,16 +342,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"tab": "Examples",
|
"tab": "Examples",
|
||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"group": "Examples",
|
"group": "Examples",
|
||||||
"pages": [
|
"pages": ["en/examples/example"]
|
||||||
"en/examples/example"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -424,21 +399,15 @@
|
|||||||
"pages": [
|
"pages": [
|
||||||
{
|
{
|
||||||
"group": "Estratégia",
|
"group": "Estratégia",
|
||||||
"pages": [
|
"pages": ["pt-BR/guides/concepts/evaluating-use-cases"]
|
||||||
"pt-BR/guides/concepts/evaluating-use-cases"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Agentes",
|
"group": "Agentes",
|
||||||
"pages": [
|
"pages": ["pt-BR/guides/agents/crafting-effective-agents"]
|
||||||
"pt-BR/guides/agents/crafting-effective-agents"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Crews",
|
"group": "Crews",
|
||||||
"pages": [
|
"pages": ["pt-BR/guides/crews/first-crew"]
|
||||||
"pt-BR/guides/crews/first-crew"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Flows",
|
"group": "Flows",
|
||||||
@@ -631,9 +600,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Telemetria",
|
"group": "Telemetria",
|
||||||
"pages": [
|
"pages": ["pt-BR/telemetry"]
|
||||||
"pt-BR/telemetry"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -642,9 +609,7 @@
|
|||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"group": "Começando",
|
"group": "Começando",
|
||||||
"pages": [
|
"pages": ["pt-BR/enterprise/introduction"]
|
||||||
"pt-BR/enterprise/introduction"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Funcionalidades",
|
"group": "Funcionalidades",
|
||||||
@@ -709,9 +674,7 @@
|
|||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"group": "Começando",
|
"group": "Começando",
|
||||||
"pages": [
|
"pages": ["pt-BR/api-reference/introduction"]
|
||||||
"pt-BR/api-reference/introduction"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"group": "Endpoints",
|
"group": "Endpoints",
|
||||||
@@ -721,16 +684,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"tab": "Exemplos",
|
"tab": "Exemplos",
|
||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"group": "Exemplos",
|
"group": "Exemplos",
|
||||||
"pages": [
|
"pages": ["pt-BR/examples/example"]
|
||||||
"pt-BR/examples/example"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -774,7 +734,7 @@
|
|||||||
"destination": "/en/introduction"
|
"destination": "/en/introduction"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": "/installation",
|
"source": "/installation",
|
||||||
"destination": "/en/installation"
|
"destination": "/en/installation"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -526,6 +526,103 @@ agent = Agent(
|
|||||||
The context window management feature works automatically in the background. You don't need to call any special functions - just set `respect_context_window` to your preferred behavior and CrewAI handles the rest!
|
The context window management feature works automatically in the background. You don't need to call any special functions - just set `respect_context_window` to your preferred behavior and CrewAI handles the rest!
|
||||||
</Note>
|
</Note>
|
||||||
|
|
||||||
|
## Direct Agent Interaction with `kickoff()`
|
||||||
|
|
||||||
|
Agents can be used directly without going through a task or crew workflow using the `kickoff()` method. This provides a simpler way to interact with an agent when you don't need the full crew orchestration capabilities.
|
||||||
|
|
||||||
|
### How `kickoff()` Works
|
||||||
|
|
||||||
|
The `kickoff()` method allows you to send messages directly to an agent and get a response, similar to how you would interact with an LLM but with all the agent's capabilities (tools, reasoning, etc.).
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
from crewai import Agent
|
||||||
|
from crewai_tools import SerperDevTool
|
||||||
|
|
||||||
|
# Create an agent
|
||||||
|
researcher = Agent(
|
||||||
|
role="AI Technology Researcher",
|
||||||
|
goal="Research the latest AI developments",
|
||||||
|
tools=[SerperDevTool()],
|
||||||
|
verbose=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use kickoff() to interact directly with the agent
|
||||||
|
result = researcher.kickoff("What are the latest developments in language models?")
|
||||||
|
|
||||||
|
# Access the raw response
|
||||||
|
print(result.raw)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Parameters and Return Values
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
| :---------------- | :---------------------------------- | :------------------------------------------------------------------------ |
|
||||||
|
| `messages` | `Union[str, List[Dict[str, str]]]` | Either a string query or a list of message dictionaries with role/content |
|
||||||
|
| `response_format` | `Optional[Type[Any]]` | Optional Pydantic model for structured output |
|
||||||
|
|
||||||
|
The method returns a `LiteAgentOutput` object with the following properties:
|
||||||
|
|
||||||
|
- `raw`: String containing the raw output text
|
||||||
|
- `pydantic`: Parsed Pydantic model (if a `response_format` was provided)
|
||||||
|
- `agent_role`: Role of the agent that produced the output
|
||||||
|
- `usage_metrics`: Token usage metrics for the execution
|
||||||
|
|
||||||
|
### Structured Output
|
||||||
|
|
||||||
|
You can get structured output by providing a Pydantic model as the `response_format`:
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
class ResearchFindings(BaseModel):
|
||||||
|
main_points: List[str]
|
||||||
|
key_technologies: List[str]
|
||||||
|
future_predictions: str
|
||||||
|
|
||||||
|
# Get structured output
|
||||||
|
result = researcher.kickoff(
|
||||||
|
"Summarize the latest developments in AI for 2025",
|
||||||
|
response_format=ResearchFindings
|
||||||
|
)
|
||||||
|
|
||||||
|
# Access structured data
|
||||||
|
print(result.pydantic.main_points)
|
||||||
|
print(result.pydantic.future_predictions)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multiple Messages
|
||||||
|
|
||||||
|
You can also provide a conversation history as a list of message dictionaries:
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "I need information about large language models"},
|
||||||
|
{"role": "assistant", "content": "I'd be happy to help with that! What specifically would you like to know?"},
|
||||||
|
{"role": "user", "content": "What are the latest developments in 2025?"}
|
||||||
|
]
|
||||||
|
|
||||||
|
result = researcher.kickoff(messages)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Async Support
|
||||||
|
|
||||||
|
An asynchronous version is available via `kickoff_async()` with the same parameters:
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
result = await researcher.kickoff_async("What are the latest developments in AI?")
|
||||||
|
print(result.raw)
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
|
```
|
||||||
|
|
||||||
|
<Note>
|
||||||
|
The `kickoff()` method uses a `LiteAgent` internally, which provides a simpler execution flow while preserving all of the agent's configuration (role, goal, backstory, tools, etc.).
|
||||||
|
</Note>
|
||||||
|
|
||||||
## Important Considerations and Best Practices
|
## Important Considerations and Best Practices
|
||||||
|
|
||||||
### Security and Code Execution
|
### Security and Code Execution
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ description: Learn how to use the CrewAI CLI to interact with CrewAI.
|
|||||||
icon: terminal
|
icon: terminal
|
||||||
---
|
---
|
||||||
|
|
||||||
|
<Warning>Since release 0.140.0, CrewAI Enterprise started a process of migrating their login provider. As such, the authentication flow via CLI was updated. Users that use Google to login, or that created their account after July 3rd, 2025 will be unable to log in with older versions of the `crewai` library.</Warning>
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
The CrewAI CLI provides a set of commands to interact with CrewAI, allowing you to create, train, run, and manage crews & flows.
|
The CrewAI CLI provides a set of commands to interact with CrewAI, allowing you to create, train, run, and manage crews & flows.
|
||||||
@@ -186,10 +188,7 @@ def crew(self) -> Crew:
|
|||||||
Deploy the crew or flow to [CrewAI Enterprise](https://app.crewai.com).
|
Deploy the crew or flow to [CrewAI Enterprise](https://app.crewai.com).
|
||||||
|
|
||||||
- **Authentication**: You need to be authenticated to deploy to CrewAI Enterprise.
|
- **Authentication**: You need to be authenticated to deploy to CrewAI Enterprise.
|
||||||
```shell Terminal
|
You can login or create an account with:
|
||||||
crewai signup
|
|
||||||
```
|
|
||||||
If you already have an account, you can login with:
|
|
||||||
```shell Terminal
|
```shell Terminal
|
||||||
crewai login
|
crewai login
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ A crew in crewAI represents a collaborative group of agents working together to
|
|||||||
| **Prompt File** _(optional)_ | `prompt_file` | Path to the prompt JSON file to be used for the crew. |
|
| **Prompt File** _(optional)_ | `prompt_file` | Path to the prompt JSON file to be used for the crew. |
|
||||||
| **Planning** *(optional)* | `planning` | Adds planning ability to the Crew. When activated before each Crew iteration, all Crew data is sent to an AgentPlanner that will plan the tasks and this plan will be added to each task description. |
|
| **Planning** *(optional)* | `planning` | Adds planning ability to the Crew. When activated before each Crew iteration, all Crew data is sent to an AgentPlanner that will plan the tasks and this plan will be added to each task description. |
|
||||||
| **Planning LLM** *(optional)* | `planning_llm` | The language model used by the AgentPlanner in a planning process. |
|
| **Planning LLM** *(optional)* | `planning_llm` | The language model used by the AgentPlanner in a planning process. |
|
||||||
|
| **Knowledge Sources** _(optional)_ | `knowledge_sources` | Knowledge sources available at the crew level, accessible to all the agents. |
|
||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
**Crew Max RPM**: The `max_rpm` attribute sets the maximum number of requests per minute the crew can perform to avoid rate limits and will override individual agents' `max_rpm` settings if you set it.
|
**Crew Max RPM**: The `max_rpm` attribute sets the maximum number of requests per minute the crew can perform to avoid rate limits and will override individual agents' `max_rpm` settings if you set it.
|
||||||
|
|||||||
@@ -255,6 +255,17 @@ CrewAI provides a wide range of events that you can listen for:
|
|||||||
- **LLMCallFailedEvent**: Emitted when an LLM call fails
|
- **LLMCallFailedEvent**: Emitted when an LLM call fails
|
||||||
- **LLMStreamChunkEvent**: Emitted for each chunk received during streaming LLM responses
|
- **LLMStreamChunkEvent**: Emitted for each chunk received during streaming LLM responses
|
||||||
|
|
||||||
|
### Memory Events
|
||||||
|
|
||||||
|
- **MemoryQueryStartedEvent**: Emitted when a memory query is started. Contains the query, limit, and optional score threshold.
|
||||||
|
- **MemoryQueryCompletedEvent**: Emitted when a memory query is completed successfully. Contains the query, results, limit, score threshold, and query execution time.
|
||||||
|
- **MemoryQueryFailedEvent**: Emitted when a memory query fails. Contains the query, limit, score threshold, and error message.
|
||||||
|
- **MemorySaveStartedEvent**: Emitted when a memory save operation is started. Contains the value to be saved, metadata, and optional agent role.
|
||||||
|
- **MemorySaveCompletedEvent**: Emitted when a memory save operation is completed successfully. Contains the saved value, metadata, agent role, and save execution time.
|
||||||
|
- **MemorySaveFailedEvent**: Emitted when a memory save operation fails. Contains the value, metadata, agent role, and error message.
|
||||||
|
- **MemoryRetrievalStartedEvent**: Emitted when memory retrieval for a task prompt starts. Contains the optional task ID.
|
||||||
|
- **MemoryRetrievalCompletedEvent**: Emitted when memory retrieval for a task prompt completes successfully. Contains the task ID, memory content, and retrieval execution time.
|
||||||
|
|
||||||
## Event Handler Structure
|
## Event Handler Structure
|
||||||
|
|
||||||
Each event handler receives two parameters:
|
Each event handler receives two parameters:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ icon: database
|
|||||||
The CrewAI framework provides a sophisticated memory system designed to significantly enhance AI agent capabilities. CrewAI offers **three distinct memory approaches** that serve different use cases:
|
The CrewAI framework provides a sophisticated memory system designed to significantly enhance AI agent capabilities. CrewAI offers **three distinct memory approaches** that serve different use cases:
|
||||||
|
|
||||||
1. **Basic Memory System** - Built-in short-term, long-term, and entity memory
|
1. **Basic Memory System** - Built-in short-term, long-term, and entity memory
|
||||||
2. **User Memory** - User-specific memory with Mem0 integration (legacy approach)
|
2. **User Memory** - User-specific memory with Mem0 integration (legacy approach)
|
||||||
3. **External Memory** - Standalone external memory providers (new approach)
|
3. **External Memory** - Standalone external memory providers (new approach)
|
||||||
|
|
||||||
## Memory System Components
|
## Memory System Components
|
||||||
@@ -62,7 +62,7 @@ By default, CrewAI uses the `appdirs` library to determine storage locations fol
|
|||||||
```
|
```
|
||||||
~/Library/Application Support/CrewAI/{project_name}/
|
~/Library/Application Support/CrewAI/{project_name}/
|
||||||
├── knowledge/ # Knowledge base ChromaDB files
|
├── knowledge/ # Knowledge base ChromaDB files
|
||||||
├── short_term_memory/ # Short-term memory ChromaDB files
|
├── short_term_memory/ # Short-term memory ChromaDB files
|
||||||
├── long_term_memory/ # Long-term memory ChromaDB files
|
├── long_term_memory/ # Long-term memory ChromaDB files
|
||||||
├── entities/ # Entity memory ChromaDB files
|
├── entities/ # Entity memory ChromaDB files
|
||||||
└── long_term_memory_storage.db # SQLite database
|
└── long_term_memory_storage.db # SQLite database
|
||||||
@@ -252,7 +252,7 @@ chroma_path = os.path.join(storage_path, "knowledge")
|
|||||||
if os.path.exists(chroma_path):
|
if os.path.exists(chroma_path):
|
||||||
client = chromadb.PersistentClient(path=chroma_path)
|
client = chromadb.PersistentClient(path=chroma_path)
|
||||||
collections = client.list_collections()
|
collections = client.list_collections()
|
||||||
|
|
||||||
print("ChromaDB Collections:")
|
print("ChromaDB Collections:")
|
||||||
for collection in collections:
|
for collection in collections:
|
||||||
print(f" - {collection.name}: {collection.count()} documents")
|
print(f" - {collection.name}: {collection.count()} documents")
|
||||||
@@ -269,7 +269,7 @@ crew = Crew(agents=[...], tasks=[...], memory=True)
|
|||||||
|
|
||||||
# Reset specific memory types
|
# Reset specific memory types
|
||||||
crew.reset_memories(command_type='short') # Short-term memory
|
crew.reset_memories(command_type='short') # Short-term memory
|
||||||
crew.reset_memories(command_type='long') # Long-term memory
|
crew.reset_memories(command_type='long') # Long-term memory
|
||||||
crew.reset_memories(command_type='entity') # Entity memory
|
crew.reset_memories(command_type='entity') # Entity memory
|
||||||
crew.reset_memories(command_type='knowledge') # Knowledge storage
|
crew.reset_memories(command_type='knowledge') # Knowledge storage
|
||||||
```
|
```
|
||||||
@@ -596,7 +596,7 @@ providers_to_test = [
|
|||||||
{
|
{
|
||||||
"name": "Ollama",
|
"name": "Ollama",
|
||||||
"config": {
|
"config": {
|
||||||
"provider": "ollama",
|
"provider": "ollama",
|
||||||
"config": {"model": "mxbai-embed-large"}
|
"config": {"model": "mxbai-embed-large"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -604,7 +604,7 @@ providers_to_test = [
|
|||||||
|
|
||||||
for provider in providers_to_test:
|
for provider in providers_to_test:
|
||||||
print(f"\nTesting {provider['name']} embeddings...")
|
print(f"\nTesting {provider['name']} embeddings...")
|
||||||
|
|
||||||
# Create crew with specific embedder
|
# Create crew with specific embedder
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=[...],
|
agents=[...],
|
||||||
@@ -612,7 +612,7 @@ for provider in providers_to_test:
|
|||||||
memory=True,
|
memory=True,
|
||||||
embedder=provider['config']
|
embedder=provider['config']
|
||||||
)
|
)
|
||||||
|
|
||||||
# Run your test and measure performance
|
# Run your test and measure performance
|
||||||
result = crew.kickoff()
|
result = crew.kickoff()
|
||||||
print(f"{provider['name']} completed successfully")
|
print(f"{provider['name']} completed successfully")
|
||||||
@@ -655,17 +655,17 @@ import time
|
|||||||
|
|
||||||
def test_embedding_performance(embedder_config, test_text="This is a test document"):
|
def test_embedding_performance(embedder_config, test_text="This is a test document"):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=[...],
|
agents=[...],
|
||||||
tasks=[...],
|
tasks=[...],
|
||||||
memory=True,
|
memory=True,
|
||||||
embedder=embedder_config
|
embedder=embedder_config
|
||||||
)
|
)
|
||||||
|
|
||||||
# Simulate memory operation
|
# Simulate memory operation
|
||||||
crew.kickoff()
|
crew.kickoff()
|
||||||
|
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
return end_time - start_time
|
return end_time - start_time
|
||||||
|
|
||||||
@@ -676,7 +676,7 @@ openai_time = test_embedding_performance({
|
|||||||
})
|
})
|
||||||
|
|
||||||
ollama_time = test_embedding_performance({
|
ollama_time = test_embedding_performance({
|
||||||
"provider": "ollama",
|
"provider": "ollama",
|
||||||
"config": {"model": "mxbai-embed-large"}
|
"config": {"model": "mxbai-embed-large"}
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -783,7 +783,7 @@ os.environ["MEM0_API_KEY"] = "your-api-key"
|
|||||||
# Create external memory instance
|
# Create external memory instance
|
||||||
external_memory = ExternalMemory(
|
external_memory = ExternalMemory(
|
||||||
embedder_config={
|
embedder_config={
|
||||||
"provider": "mem0",
|
"provider": "mem0",
|
||||||
"config": {"user_id": "U-123"}
|
"config": {"user_id": "U-123"}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -808,8 +808,8 @@ class CustomStorage(Storage):
|
|||||||
|
|
||||||
def save(self, value, metadata=None, agent=None):
|
def save(self, value, metadata=None, agent=None):
|
||||||
self.memories.append({
|
self.memories.append({
|
||||||
"value": value,
|
"value": value,
|
||||||
"metadata": metadata,
|
"metadata": metadata,
|
||||||
"agent": agent
|
"agent": agent
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -986,7 +986,201 @@ crew = Crew(
|
|||||||
- 🫡 **Enhanced Personalization:** Memory enables agents to remember user preferences and historical interactions, leading to personalized experiences.
|
- 🫡 **Enhanced Personalization:** Memory enables agents to remember user preferences and historical interactions, leading to personalized experiences.
|
||||||
- 🧠 **Improved Problem Solving:** Access to a rich memory store aids agents in making more informed decisions, drawing on past learnings and contextual insights.
|
- 🧠 **Improved Problem Solving:** Access to a rich memory store aids agents in making more informed decisions, drawing on past learnings and contextual insights.
|
||||||
|
|
||||||
|
## Memory Events
|
||||||
|
|
||||||
|
CrewAI's event system provides powerful insights into memory operations. By leveraging memory events, you can monitor, debug, and optimize your memory system's performance and behavior.
|
||||||
|
|
||||||
|
### Available Memory Events
|
||||||
|
|
||||||
|
CrewAI emits the following memory-related events:
|
||||||
|
|
||||||
|
| Event | Description | Key Properties |
|
||||||
|
| :---- | :---------- | :------------- |
|
||||||
|
| **MemoryQueryStartedEvent** | Emitted when a memory query begins | `query`, `limit`, `score_threshold` |
|
||||||
|
| **MemoryQueryCompletedEvent** | Emitted when a memory query completes successfully | `query`, `results`, `limit`, `score_threshold`, `query_time_ms` |
|
||||||
|
| **MemoryQueryFailedEvent** | Emitted when a memory query fails | `query`, `limit`, `score_threshold`, `error` |
|
||||||
|
| **MemorySaveStartedEvent** | Emitted when a memory save operation begins | `value`, `metadata`, `agent_role` |
|
||||||
|
| **MemorySaveCompletedEvent** | Emitted when a memory save operation completes successfully | `value`, `metadata`, `agent_role`, `save_time_ms` |
|
||||||
|
| **MemorySaveFailedEvent** | Emitted when a memory save operation fails | `value`, `metadata`, `agent_role`, `error` |
|
||||||
|
| **MemoryRetrievalStartedEvent** | Emitted when memory retrieval for a task prompt starts | `task_id` |
|
||||||
|
| **MemoryRetrievalCompletedEvent** | Emitted when memory retrieval completes successfully | `task_id`, `memory_content`, `retrieval_time_ms` |
|
||||||
|
|
||||||
|
### Practical Applications
|
||||||
|
|
||||||
|
#### 1. Memory Performance Monitoring
|
||||||
|
|
||||||
|
Track memory operation timing to optimize your application:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai.utilities.events.base_event_listener import BaseEventListener
|
||||||
|
from crewai.utilities.events import (
|
||||||
|
MemoryQueryCompletedEvent,
|
||||||
|
MemorySaveCompletedEvent
|
||||||
|
)
|
||||||
|
import time
|
||||||
|
|
||||||
|
class MemoryPerformanceMonitor(BaseEventListener):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.query_times = []
|
||||||
|
self.save_times = []
|
||||||
|
|
||||||
|
def setup_listeners(self, crewai_event_bus):
|
||||||
|
@crewai_event_bus.on(MemoryQueryCompletedEvent)
|
||||||
|
def on_memory_query_completed(source, event: MemoryQueryCompletedEvent):
|
||||||
|
self.query_times.append(event.query_time_ms)
|
||||||
|
print(f"Memory query completed in {event.query_time_ms:.2f}ms. Query: '{event.query}'")
|
||||||
|
print(f"Average query time: {sum(self.query_times)/len(self.query_times):.2f}ms")
|
||||||
|
|
||||||
|
@crewai_event_bus.on(MemorySaveCompletedEvent)
|
||||||
|
def on_memory_save_completed(source, event: MemorySaveCompletedEvent):
|
||||||
|
self.save_times.append(event.save_time_ms)
|
||||||
|
print(f"Memory save completed in {event.save_time_ms:.2f}ms")
|
||||||
|
print(f"Average save time: {sum(self.save_times)/len(self.save_times):.2f}ms")
|
||||||
|
|
||||||
|
# Create an instance of your listener
|
||||||
|
memory_monitor = MemoryPerformanceMonitor()
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Memory Content Logging
|
||||||
|
|
||||||
|
Log memory operations for debugging and insights:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai.utilities.events.base_event_listener import BaseEventListener
|
||||||
|
from crewai.utilities.events import (
|
||||||
|
MemorySaveStartedEvent,
|
||||||
|
MemoryQueryStartedEvent,
|
||||||
|
MemoryRetrievalCompletedEvent
|
||||||
|
)
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger('memory_events')
|
||||||
|
|
||||||
|
class MemoryLogger(BaseEventListener):
|
||||||
|
def setup_listeners(self, crewai_event_bus):
|
||||||
|
@crewai_event_bus.on(MemorySaveStartedEvent)
|
||||||
|
def on_memory_save_started(source, event: MemorySaveStartedEvent):
|
||||||
|
if event.agent_role:
|
||||||
|
logger.info(f"Agent '{event.agent_role}' saving memory: {event.value[:50]}...")
|
||||||
|
else:
|
||||||
|
logger.info(f"Saving memory: {event.value[:50]}...")
|
||||||
|
|
||||||
|
@crewai_event_bus.on(MemoryQueryStartedEvent)
|
||||||
|
def on_memory_query_started(source, event: MemoryQueryStartedEvent):
|
||||||
|
logger.info(f"Memory query started: '{event.query}' (limit: {event.limit})")
|
||||||
|
|
||||||
|
@crewai_event_bus.on(MemoryRetrievalCompletedEvent)
|
||||||
|
def on_memory_retrieval_completed(source, event: MemoryRetrievalCompletedEvent):
|
||||||
|
if event.task_id:
|
||||||
|
logger.info(f"Memory retrieved for task {event.task_id} in {event.retrieval_time_ms:.2f}ms")
|
||||||
|
else:
|
||||||
|
logger.info(f"Memory retrieved in {event.retrieval_time_ms:.2f}ms")
|
||||||
|
logger.debug(f"Memory content: {event.memory_content}")
|
||||||
|
|
||||||
|
# Create an instance of your listener
|
||||||
|
memory_logger = MemoryLogger()
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Error Tracking and Notifications
|
||||||
|
|
||||||
|
Capture and respond to memory errors:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai.utilities.events.base_event_listener import BaseEventListener
|
||||||
|
from crewai.utilities.events import (
|
||||||
|
MemorySaveFailedEvent,
|
||||||
|
MemoryQueryFailedEvent
|
||||||
|
)
|
||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger('memory_errors')
|
||||||
|
|
||||||
|
class MemoryErrorTracker(BaseEventListener):
|
||||||
|
def __init__(self, notify_email: Optional[str] = None):
|
||||||
|
super().__init__()
|
||||||
|
self.notify_email = notify_email
|
||||||
|
self.error_count = 0
|
||||||
|
|
||||||
|
def setup_listeners(self, crewai_event_bus):
|
||||||
|
@crewai_event_bus.on(MemorySaveFailedEvent)
|
||||||
|
def on_memory_save_failed(source, event: MemorySaveFailedEvent):
|
||||||
|
self.error_count += 1
|
||||||
|
agent_info = f"Agent '{event.agent_role}'" if event.agent_role else "Unknown agent"
|
||||||
|
error_message = f"Memory save failed: {event.error}. {agent_info}"
|
||||||
|
logger.error(error_message)
|
||||||
|
|
||||||
|
if self.notify_email and self.error_count % 5 == 0:
|
||||||
|
self._send_notification(error_message)
|
||||||
|
|
||||||
|
@crewai_event_bus.on(MemoryQueryFailedEvent)
|
||||||
|
def on_memory_query_failed(source, event: MemoryQueryFailedEvent):
|
||||||
|
self.error_count += 1
|
||||||
|
error_message = f"Memory query failed: {event.error}. Query: '{event.query}'"
|
||||||
|
logger.error(error_message)
|
||||||
|
|
||||||
|
if self.notify_email and self.error_count % 5 == 0:
|
||||||
|
self._send_notification(error_message)
|
||||||
|
|
||||||
|
def _send_notification(self, message):
|
||||||
|
# Implement your notification system (email, Slack, etc.)
|
||||||
|
print(f"[NOTIFICATION] Would send to {self.notify_email}: {message}")
|
||||||
|
|
||||||
|
# Create an instance of your listener
|
||||||
|
error_tracker = MemoryErrorTracker(notify_email="admin@example.com")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Integrating with Analytics Platforms
|
||||||
|
|
||||||
|
Memory events can be forwarded to analytics and monitoring platforms to track performance metrics, detect anomalies, and visualize memory usage patterns:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai.utilities.events.base_event_listener import BaseEventListener
|
||||||
|
from crewai.utilities.events import (
|
||||||
|
MemoryQueryCompletedEvent,
|
||||||
|
MemorySaveCompletedEvent
|
||||||
|
)
|
||||||
|
|
||||||
|
class MemoryAnalyticsForwarder(BaseEventListener):
|
||||||
|
def __init__(self, analytics_client):
|
||||||
|
super().__init__()
|
||||||
|
self.client = analytics_client
|
||||||
|
|
||||||
|
def setup_listeners(self, crewai_event_bus):
|
||||||
|
@crewai_event_bus.on(MemoryQueryCompletedEvent)
|
||||||
|
def on_memory_query_completed(source, event: MemoryQueryCompletedEvent):
|
||||||
|
# Forward query metrics to analytics platform
|
||||||
|
self.client.track_metric({
|
||||||
|
"event_type": "memory_query",
|
||||||
|
"query": event.query,
|
||||||
|
"duration_ms": event.query_time_ms,
|
||||||
|
"result_count": len(event.results) if hasattr(event.results, "__len__") else 0,
|
||||||
|
"timestamp": event.timestamp
|
||||||
|
})
|
||||||
|
|
||||||
|
@crewai_event_bus.on(MemorySaveCompletedEvent)
|
||||||
|
def on_memory_save_completed(source, event: MemorySaveCompletedEvent):
|
||||||
|
# Forward save metrics to analytics platform
|
||||||
|
self.client.track_metric({
|
||||||
|
"event_type": "memory_save",
|
||||||
|
"agent_role": event.agent_role,
|
||||||
|
"duration_ms": event.save_time_ms,
|
||||||
|
"timestamp": event.timestamp
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
### Best Practices for Memory Event Listeners
|
||||||
|
|
||||||
|
1. **Keep handlers lightweight**: Avoid complex processing in event handlers to prevent performance impacts
|
||||||
|
2. **Use appropriate logging levels**: Use INFO for normal operations, DEBUG for details, ERROR for issues
|
||||||
|
3. **Batch metrics when possible**: Accumulate metrics before sending to external systems
|
||||||
|
4. **Handle exceptions gracefully**: Ensure your event handlers don't crash due to unexpected data
|
||||||
|
5. **Consider memory consumption**: Be mindful of storing large amounts of event data
|
||||||
|
|
||||||
## Conclusion
|
## Conclusion
|
||||||
|
|
||||||
Integrating CrewAI's memory system into your projects is straightforward. By leveraging the provided memory components and configurations,
|
Integrating CrewAI's memory system into your projects is straightforward. By leveraging the provided memory components and configurations,
|
||||||
you can quickly empower your agents with the ability to remember, reason, and learn from their interactions, unlocking new levels of intelligence and capability.
|
you can quickly empower your agents with the ability to remember, reason, and learn from their interactions, unlocking new levels of intelligence and capability.
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ crew = Crew(
|
|||||||
| **Output JSON** _(optional)_ | `output_json` | `Optional[Type[BaseModel]]` | A Pydantic model to structure the JSON output. |
|
| **Output JSON** _(optional)_ | `output_json` | `Optional[Type[BaseModel]]` | A Pydantic model to structure the JSON output. |
|
||||||
| **Output Pydantic** _(optional)_ | `output_pydantic` | `Optional[Type[BaseModel]]` | A Pydantic model for task output. |
|
| **Output Pydantic** _(optional)_ | `output_pydantic` | `Optional[Type[BaseModel]]` | A Pydantic model for task output. |
|
||||||
| **Callback** _(optional)_ | `callback` | `Optional[Any]` | Function/object to be executed after task completion. |
|
| **Callback** _(optional)_ | `callback` | `Optional[Any]` | Function/object to be executed after task completion. |
|
||||||
|
| **Guardrail** _(optional)_ | `guardrail` | `Optional[Union[Callable, str]]` | Function or string description to validate task output before proceeding to next task. |
|
||||||
|
|
||||||
## Creating Tasks
|
## Creating Tasks
|
||||||
|
|
||||||
@@ -86,6 +87,7 @@ research_task:
|
|||||||
expected_output: >
|
expected_output: >
|
||||||
A list with 10 bullet points of the most relevant information about {topic}
|
A list with 10 bullet points of the most relevant information about {topic}
|
||||||
agent: researcher
|
agent: researcher
|
||||||
|
guardrail: ensure each bullet contains a minimum of 100 words
|
||||||
|
|
||||||
reporting_task:
|
reporting_task:
|
||||||
description: >
|
description: >
|
||||||
@@ -332,9 +334,13 @@ Task guardrails provide a way to validate and transform task outputs before they
|
|||||||
are passed to the next task. This feature helps ensure data quality and provides
|
are passed to the next task. This feature helps ensure data quality and provides
|
||||||
feedback to agents when their output doesn't meet specific criteria.
|
feedback to agents when their output doesn't meet specific criteria.
|
||||||
|
|
||||||
### Using Task Guardrails
|
**Guardrails can be defined in two ways:**
|
||||||
|
1. **Function-based guardrails**: Python functions that implement custom validation logic
|
||||||
|
2. **String-based guardrails**: Natural language descriptions that are automatically converted to LLM-powered validation
|
||||||
|
|
||||||
To add a guardrail to a task, provide a validation function through the `guardrail` parameter:
|
### Function-Based Guardrails
|
||||||
|
|
||||||
|
To add a function-based guardrail to a task, provide a validation function through the `guardrail` parameter:
|
||||||
|
|
||||||
```python Code
|
```python Code
|
||||||
from typing import Tuple, Union, Dict, Any
|
from typing import Tuple, Union, Dict, Any
|
||||||
@@ -372,9 +378,82 @@ blog_task = Task(
|
|||||||
- On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)`
|
- On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)`
|
||||||
- On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`
|
- On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`
|
||||||
|
|
||||||
### LLMGuardrail
|
### String-Based Guardrails
|
||||||
|
|
||||||
The `LLMGuardrail` class offers a robust mechanism for validating task outputs.
|
String-based guardrails allow you to describe validation criteria in natural language. When you provide a string instead of a function, CrewAI automatically converts it to an `LLMGuardrail` that uses an AI agent to validate the task output.
|
||||||
|
|
||||||
|
#### Using String Guardrails in Python
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
from crewai import Task
|
||||||
|
|
||||||
|
# Simple string-based guardrail
|
||||||
|
blog_task = Task(
|
||||||
|
description="Write a blog post about AI",
|
||||||
|
expected_output="A blog post under 200 words",
|
||||||
|
agent=blog_agent,
|
||||||
|
guardrail="Ensure the blog post is under 200 words and includes practical examples"
|
||||||
|
)
|
||||||
|
|
||||||
|
# More complex validation criteria
|
||||||
|
research_task = Task(
|
||||||
|
description="Research AI trends for 2025",
|
||||||
|
expected_output="A comprehensive research report",
|
||||||
|
agent=research_agent,
|
||||||
|
guardrail="Ensure each finding includes a credible source and is backed by recent data from 2024-2025"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using String Guardrails in YAML
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
research_task:
|
||||||
|
description: Research the latest AI developments
|
||||||
|
expected_output: A list of 10 bullet points about AI
|
||||||
|
agent: researcher
|
||||||
|
guardrail: ensure each bullet contains a minimum of 100 words
|
||||||
|
|
||||||
|
validation_task:
|
||||||
|
description: Validate the research findings
|
||||||
|
expected_output: A validation report
|
||||||
|
agent: validator
|
||||||
|
guardrail: confirm all sources are from reputable publications and published within the last 2 years
|
||||||
|
```
|
||||||
|
|
||||||
|
#### How String Guardrails Work
|
||||||
|
|
||||||
|
When you provide a string guardrail, CrewAI automatically:
|
||||||
|
1. Creates an `LLMGuardrail` instance using the string as validation criteria
|
||||||
|
2. Uses the task's agent LLM to power the validation
|
||||||
|
3. Creates a temporary validation agent that checks the output against your criteria
|
||||||
|
4. Returns detailed feedback if validation fails
|
||||||
|
|
||||||
|
This approach is ideal when you want to use natural language to describe validation rules without writing custom validation functions.
|
||||||
|
|
||||||
|
### LLMGuardrail Class
|
||||||
|
|
||||||
|
The `LLMGuardrail` class is the underlying mechanism that powers string-based guardrails. You can also use it directly for more advanced control:
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
from crewai import Task
|
||||||
|
from crewai.tasks.llm_guardrail import LLMGuardrail
|
||||||
|
from crewai.llm import LLM
|
||||||
|
|
||||||
|
# Create a custom LLMGuardrail with specific LLM
|
||||||
|
custom_guardrail = LLMGuardrail(
|
||||||
|
description="Ensure the response contains exactly 5 bullet points with proper citations",
|
||||||
|
llm=LLM(model="gpt-4o-mini")
|
||||||
|
)
|
||||||
|
|
||||||
|
task = Task(
|
||||||
|
description="Research AI safety measures",
|
||||||
|
expected_output="A detailed analysis with bullet points",
|
||||||
|
agent=research_agent,
|
||||||
|
guardrail=custom_guardrail
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: When you use a string guardrail, CrewAI automatically creates an `LLMGuardrail` instance using your task's agent LLM. Using `LLMGuardrail` directly gives you more control over the validation process and LLM selection.
|
||||||
|
|
||||||
### Error Handling Best Practices
|
### Error Handling Best Practices
|
||||||
|
|
||||||
@@ -798,166 +877,7 @@ While creating and executing tasks, certain validation mechanisms are in place t
|
|||||||
|
|
||||||
These validations help in maintaining the consistency and reliability of task executions within the crewAI framework.
|
These validations help in maintaining the consistency and reliability of task executions within the crewAI framework.
|
||||||
|
|
||||||
## Task Guardrails
|
|
||||||
|
|
||||||
Task guardrails provide a powerful way to validate, transform, or filter task outputs before they are passed to the next task. Guardrails are optional functions that execute before the next task starts, allowing you to ensure that task outputs meet specific requirements or formats.
|
|
||||||
|
|
||||||
### Basic Usage
|
|
||||||
|
|
||||||
#### Define your own logic to validate
|
|
||||||
|
|
||||||
```python Code
|
|
||||||
from typing import Tuple, Union
|
|
||||||
from crewai import Task
|
|
||||||
|
|
||||||
def validate_json_output(result: str) -> Tuple[bool, Union[dict, str]]:
|
|
||||||
"""Validate that the output is valid JSON."""
|
|
||||||
try:
|
|
||||||
json_data = json.loads(result)
|
|
||||||
return (True, json_data)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return (False, "Output must be valid JSON")
|
|
||||||
|
|
||||||
task = Task(
|
|
||||||
description="Generate JSON data",
|
|
||||||
expected_output="Valid JSON object",
|
|
||||||
guardrail=validate_json_output
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Leverage a no-code approach for validation
|
|
||||||
|
|
||||||
```python Code
|
|
||||||
from crewai import Task
|
|
||||||
|
|
||||||
task = Task(
|
|
||||||
description="Generate JSON data",
|
|
||||||
expected_output="Valid JSON object",
|
|
||||||
guardrail="Ensure the response is a valid JSON object"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Using YAML
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
research_task:
|
|
||||||
...
|
|
||||||
guardrail: make sure each bullet contains a minimum of 100 words
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
```python Code
|
|
||||||
@CrewBase
|
|
||||||
class InternalCrew:
|
|
||||||
agents_config = "config/agents.yaml"
|
|
||||||
tasks_config = "config/tasks.yaml"
|
|
||||||
|
|
||||||
...
|
|
||||||
@task
|
|
||||||
def research_task(self):
|
|
||||||
return Task(config=self.tasks_config["research_task"]) # type: ignore[index]
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#### Use custom models for code generation
|
|
||||||
|
|
||||||
```python Code
|
|
||||||
from crewai import Task
|
|
||||||
from crewai.llm import LLM
|
|
||||||
|
|
||||||
task = Task(
|
|
||||||
description="Generate JSON data",
|
|
||||||
expected_output="Valid JSON object",
|
|
||||||
guardrail=LLMGuardrail(
|
|
||||||
description="Ensure the response is a valid JSON object",
|
|
||||||
llm=LLM(model="gpt-4o-mini"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### How Guardrails Work
|
|
||||||
|
|
||||||
1. **Optional Attribute**: Guardrails are an optional attribute at the task level, allowing you to add validation only where needed.
|
|
||||||
2. **Execution Timing**: The guardrail function is executed before the next task starts, ensuring valid data flow between tasks.
|
|
||||||
3. **Return Format**: Guardrails must return a tuple of `(success, data)`:
|
|
||||||
- If `success` is `True`, `data` is the validated/transformed result
|
|
||||||
- If `success` is `False`, `data` is the error message
|
|
||||||
4. **Result Routing**:
|
|
||||||
- On success (`True`), the result is automatically passed to the next task
|
|
||||||
- On failure (`False`), the error is sent back to the agent to generate a new answer
|
|
||||||
|
|
||||||
### Common Use Cases
|
|
||||||
|
|
||||||
#### Data Format Validation
|
|
||||||
```python Code
|
|
||||||
def validate_email_format(result: str) -> Tuple[bool, Union[str, str]]:
|
|
||||||
"""Ensure the output contains a valid email address."""
|
|
||||||
import re
|
|
||||||
email_pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'
|
|
||||||
if re.match(email_pattern, result.strip()):
|
|
||||||
return (True, result.strip())
|
|
||||||
return (False, "Output must be a valid email address")
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Content Filtering
|
|
||||||
```python Code
|
|
||||||
def filter_sensitive_info(result: str) -> Tuple[bool, Union[str, str]]:
|
|
||||||
"""Remove or validate sensitive information."""
|
|
||||||
sensitive_patterns = ['SSN:', 'password:', 'secret:']
|
|
||||||
for pattern in sensitive_patterns:
|
|
||||||
if pattern.lower() in result.lower():
|
|
||||||
return (False, f"Output contains sensitive information ({pattern})")
|
|
||||||
return (True, result)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Data Transformation
|
|
||||||
```python Code
|
|
||||||
def normalize_phone_number(result: str) -> Tuple[bool, Union[str, str]]:
|
|
||||||
"""Ensure phone numbers are in a consistent format."""
|
|
||||||
import re
|
|
||||||
digits = re.sub(r'\D', '', result)
|
|
||||||
if len(digits) == 10:
|
|
||||||
formatted = f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
|
|
||||||
return (True, formatted)
|
|
||||||
return (False, "Output must be a 10-digit phone number")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Advanced Features
|
|
||||||
|
|
||||||
#### Chaining Multiple Validations
|
|
||||||
```python Code
|
|
||||||
def chain_validations(*validators):
|
|
||||||
"""Chain multiple validators together."""
|
|
||||||
def combined_validator(result):
|
|
||||||
for validator in validators:
|
|
||||||
success, data = validator(result)
|
|
||||||
if not success:
|
|
||||||
return (False, data)
|
|
||||||
result = data
|
|
||||||
return (True, result)
|
|
||||||
return combined_validator
|
|
||||||
|
|
||||||
# Usage
|
|
||||||
task = Task(
|
|
||||||
description="Get user contact info",
|
|
||||||
expected_output="Email and phone",
|
|
||||||
guardrail=chain_validations(
|
|
||||||
validate_email_format,
|
|
||||||
filter_sensitive_info
|
|
||||||
)
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Custom Retry Logic
|
|
||||||
```python Code
|
|
||||||
task = Task(
|
|
||||||
description="Generate data",
|
|
||||||
expected_output="Valid data",
|
|
||||||
guardrail=validate_data,
|
|
||||||
max_retries=5 # Override default retry limit
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Creating Directories when Saving Files
|
## Creating Directories when Saving Files
|
||||||
|
|
||||||
|
|||||||
155
docs/en/enterprise/features/agent-repositories.mdx
Normal file
155
docs/en/enterprise/features/agent-repositories.mdx
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
---
|
||||||
|
title: 'Agent Repositories'
|
||||||
|
description: 'Learn how to use Agent Repositories to share and reuse your agents across teams and projects'
|
||||||
|
icon: 'database'
|
||||||
|
---
|
||||||
|
|
||||||
|
Agent Repositories allow enterprise users to store, share, and reuse agent definitions across teams and projects. This feature enables organizations to maintain a centralized library of standardized agents, promoting consistency and reducing duplication of effort.
|
||||||
|
|
||||||
|
## Benefits of Agent Repositories
|
||||||
|
|
||||||
|
- **Standardization**: Maintain consistent agent definitions across your organization
|
||||||
|
- **Reusability**: Create an agent once and use it in multiple crews and projects
|
||||||
|
- **Governance**: Implement organization-wide policies for agent configurations
|
||||||
|
- **Collaboration**: Enable teams to share and build upon each other's work
|
||||||
|
|
||||||
|
## Using Agent Repositories
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
1. You must have an account at CrewAI, try the [free plan](https://app.crewai.com).
|
||||||
|
2. You need to be authenticated using the CrewAI CLI.
|
||||||
|
3. If you have more than one organization, make sure you are switched to the correct organization using the CLI command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
crewai org switch <org_id>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Creating and Managing Agents in Repositories
|
||||||
|
|
||||||
|
To create and manage agents in repositories,Enterprise Dashboard.
|
||||||
|
|
||||||
|
### Loading Agents from Repositories
|
||||||
|
|
||||||
|
You can load agents from repositories in your code using the `from_repository` parameter:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai import Agent
|
||||||
|
|
||||||
|
# Create an agent by loading it from a repository
|
||||||
|
# The agent is loaded with all its predefined configurations
|
||||||
|
researcher = Agent(
|
||||||
|
from_repository="market-research-agent"
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Overriding Repository Settings
|
||||||
|
|
||||||
|
You can override specific settings from the repository by providing them in the configuration:
|
||||||
|
|
||||||
|
```python
|
||||||
|
researcher = Agent(
|
||||||
|
from_repository="market-research-agent",
|
||||||
|
goal="Research the latest trends in AI development", # Override the repository goal
|
||||||
|
verbose=True # Add a setting not in the repository
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: Creating a Crew with Repository Agents
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai import Crew, Agent, Task
|
||||||
|
|
||||||
|
# Load agents from repositories
|
||||||
|
researcher = Agent(
|
||||||
|
from_repository="market-research-agent"
|
||||||
|
)
|
||||||
|
|
||||||
|
writer = Agent(
|
||||||
|
from_repository="content-writer-agent"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create tasks
|
||||||
|
research_task = Task(
|
||||||
|
description="Research the latest trends in AI",
|
||||||
|
agent=researcher
|
||||||
|
)
|
||||||
|
|
||||||
|
writing_task = Task(
|
||||||
|
description="Write a comprehensive report based on the research",
|
||||||
|
agent=writer
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the crew
|
||||||
|
crew = Crew(
|
||||||
|
agents=[researcher, writer],
|
||||||
|
tasks=[research_task, writing_task],
|
||||||
|
verbose=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run the crew
|
||||||
|
result = crew.kickoff()
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example: Using `kickoff()` with Repository Agents
|
||||||
|
|
||||||
|
You can also use repository agents directly with the `kickoff()` method for simpler interactions:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai import Agent
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
# Define a structured output format
|
||||||
|
class MarketAnalysis(BaseModel):
|
||||||
|
key_trends: List[str]
|
||||||
|
opportunities: List[str]
|
||||||
|
recommendation: str
|
||||||
|
|
||||||
|
# Load an agent from repository
|
||||||
|
analyst = Agent(
|
||||||
|
from_repository="market-analyst-agent",
|
||||||
|
verbose=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get a free-form response
|
||||||
|
result = analyst.kickoff("Analyze the AI market in 2025")
|
||||||
|
print(result.raw) # Access the raw response
|
||||||
|
|
||||||
|
# Get structured output
|
||||||
|
structured_result = analyst.kickoff(
|
||||||
|
"Provide a structured analysis of the AI market in 2025",
|
||||||
|
response_format=MarketAnalysis
|
||||||
|
)
|
||||||
|
|
||||||
|
# Access structured data
|
||||||
|
print(f"Key Trends: {structured_result.pydantic.key_trends}")
|
||||||
|
print(f"Recommendation: {structured_result.pydantic.recommendation}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Naming Convention**: Use clear, descriptive names for your repository agents
|
||||||
|
2. **Documentation**: Include comprehensive descriptions for each agent
|
||||||
|
3. **Tool Management**: Ensure that tools referenced by repository agents are available in your environment
|
||||||
|
4. **Access Control**: Manage permissions to ensure only authorized team members can modify repository agents
|
||||||
|
|
||||||
|
## Organization Management
|
||||||
|
|
||||||
|
To switch between organizations or see your current organization, use the CrewAI CLI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View current organization
|
||||||
|
crewai org current
|
||||||
|
|
||||||
|
# Switch to a different organization
|
||||||
|
crewai org switch <org_id>
|
||||||
|
|
||||||
|
# List all available organizations
|
||||||
|
crewai org list
|
||||||
|
```
|
||||||
|
|
||||||
|
<Note>
|
||||||
|
When loading agents from repositories, you must be authenticated and switched to the correct organization. If you receive errors, check your authentication status and organization settings using the CLI commands above.
|
||||||
|
</Note>
|
||||||
@@ -41,11 +41,8 @@ The CLI provides the fastest way to deploy locally developed crews to the Enterp
|
|||||||
First, you need to authenticate your CLI with the CrewAI Enterprise platform:
|
First, you need to authenticate your CLI with the CrewAI Enterprise platform:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# If you already have a CrewAI Enterprise account
|
# If you already have a CrewAI Enterprise account, or want to create one:
|
||||||
crewai login
|
crewai login
|
||||||
|
|
||||||
# If you're creating a new account
|
|
||||||
crewai signup
|
|
||||||
```
|
```
|
||||||
|
|
||||||
When you run either command, the CLI will:
|
When you run either command, the CLI will:
|
||||||
|
|||||||
140
docs/en/observability/neatlogs.mdx
Normal file
140
docs/en/observability/neatlogs.mdx
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
---
|
||||||
|
title: Neatlogs Integration
|
||||||
|
description: Understand, debug, and share your CrewAI agent runs
|
||||||
|
icon: magnifying-glass-chart
|
||||||
|
---
|
||||||
|
|
||||||
|
# Introduction
|
||||||
|
|
||||||
|
Neatlogs helps you **see what your agent did**, **why**, and **share it**.
|
||||||
|
|
||||||
|
It captures every step: thoughts, tool calls, responses, evaluations. No raw logs. Just clear, structured traces. Great for debugging and collaboration.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Why use Neatlogs?
|
||||||
|
|
||||||
|
CrewAI agents use multiple tools and reasoning steps. When something goes wrong, you need context — not just errors.
|
||||||
|
|
||||||
|
Neatlogs lets you:
|
||||||
|
|
||||||
|
- Follow the full decision path
|
||||||
|
- Add feedback directly on steps
|
||||||
|
- Chat with the trace using AI assistant
|
||||||
|
- Share runs publicly for feedback
|
||||||
|
- Turn insights into tasks
|
||||||
|
|
||||||
|
All in one place.
|
||||||
|
|
||||||
|
Manage your traces effortlessly
|
||||||
|
|
||||||
|

|
||||||
|

|
||||||
|
|
||||||
|
The best UX to view a CrewAI trace. Post comments anywhere you want. Use AI to debug.
|
||||||
|
|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Core Features
|
||||||
|
|
||||||
|
- **Trace Viewer**: Track thoughts, tools, and decisions in sequence
|
||||||
|
- **Inline Comments**: Tag teammates on any trace step
|
||||||
|
- **Feedback & Evaluation**: Mark outputs as correct or incorrect
|
||||||
|
- **Error Highlighting**: Automatic flagging of API/tool failures
|
||||||
|
- **Task Conversion**: Convert comments into assigned tasks
|
||||||
|
- **Ask the Trace (AI)**: Chat with your trace using Neatlogs AI bot
|
||||||
|
- **Public Sharing**: Publish trace links to your community
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Setup with CrewAI
|
||||||
|
|
||||||
|
<Steps>
|
||||||
|
<Step title="Sign Up & Get API Key">
|
||||||
|
Visit [neatlogs.com](https://neatlogs.com/?utm_source=crewAI-docs), create a project, copy the API key.
|
||||||
|
</Step>
|
||||||
|
<Step title="Install SDK">
|
||||||
|
```bash
|
||||||
|
pip install neatlogs
|
||||||
|
```
|
||||||
|
(Latest version 0.8.0, Python 3.8+; MIT license) :contentReference[oaicite:1]{index=1}
|
||||||
|
</Step>
|
||||||
|
<Step title="Initialize Neatlogs">
|
||||||
|
Before starting Crew agents, add:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import neatlogs
|
||||||
|
neatlogs.init("YOUR_PROJECT_API_KEY")
|
||||||
|
```
|
||||||
|
|
||||||
|
Agents run as usual. Neatlogs captures everything automatically.
|
||||||
|
|
||||||
|
</Step>
|
||||||
|
</Steps>
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Under the Hood
|
||||||
|
|
||||||
|
According to GitHub, Neatlogs:
|
||||||
|
|
||||||
|
- Captures thoughts, tool calls, responses, errors, and token stats :contentReference[oaicite:2]{index=2}
|
||||||
|
- Supports AI-powered task generation and robust evaluation workflows :contentReference[oaicite:3]{index=3}
|
||||||
|
|
||||||
|
All with just two lines of code.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Watch It Work
|
||||||
|
|
||||||
|
### 🔍 Full Demo (4 min)
|
||||||
|
|
||||||
|
<iframe
|
||||||
|
width="100%"
|
||||||
|
height="315"
|
||||||
|
src="https://www.youtube.com/embed/8KDme9T2I7Q?si=b8oHteaBwFNs_Duk"
|
||||||
|
title="YouTube video player"
|
||||||
|
frameBorder="0"
|
||||||
|
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
||||||
|
allowFullScreen
|
||||||
|
></iframe>
|
||||||
|
|
||||||
|
### ⚙️ CrewAI Integration (30 s)
|
||||||
|
|
||||||
|
<iframe
|
||||||
|
className="w-full aspect-video rounded-xl"
|
||||||
|
src="https://www.loom.com/embed/9c78b552af43452bb3e4783cb8d91230?sid=e9d7d370-a91a-49b0-809e-2f375d9e801d"
|
||||||
|
title="Loom video player"
|
||||||
|
frameBorder="0"
|
||||||
|
allowFullScreen
|
||||||
|
></iframe>
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Links & Support
|
||||||
|
|
||||||
|
- 📘 [Neatlogs Docs](https://docs.neatlogs.com/)
|
||||||
|
- 🔐 [Dashboard & API Key](https://app.neatlogs.com/)
|
||||||
|
- 🐦 [Follow on Twitter](https://twitter.com/neatlogs)
|
||||||
|
- 📧 Contact: hello@neatlogs.com
|
||||||
|
- 🛠 [GitHub SDK](https://github.com/NeatLogs/neatlogs) :contentReference[oaicite:4]{index=4}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## TL;DR
|
||||||
|
|
||||||
|
With just:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install neatlogs
|
||||||
|
|
||||||
|
import neatlogs
|
||||||
|
neatlogs.init("YOUR_API_KEY")
|
||||||
|
|
||||||
|
You can now capture, understand, share, and act on your CrewAI agent runs in seconds.
|
||||||
|
No setup overhead. Full trace transparency. Full team collaboration.
|
||||||
|
```
|
||||||
BIN
docs/images/neatlogs-1.png
Normal file
BIN
docs/images/neatlogs-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 222 KiB |
BIN
docs/images/neatlogs-2.png
Normal file
BIN
docs/images/neatlogs-2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 329 KiB |
BIN
docs/images/neatlogs-3.png
Normal file
BIN
docs/images/neatlogs-3.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 590 KiB |
BIN
docs/images/neatlogs-4.png
Normal file
BIN
docs/images/neatlogs-4.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 216 KiB |
BIN
docs/images/neatlogs-5.png
Normal file
BIN
docs/images/neatlogs-5.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 277 KiB |
@@ -149,34 +149,33 @@ from crewai_tools import SerperDevTool
|
|||||||
|
|
||||||
# Crie um agente com todos os parâmetros disponíveis
|
# Crie um agente com todos os parâmetros disponíveis
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
role="Senior Data Scientist",
|
role="Cientista de Dados Sênior",
|
||||||
goal="Analyze and interpret complex datasets to provide actionable insights",
|
goal="Analisar e interpretar conjuntos de dados complexos para fornecer insights acionáveis",
|
||||||
backstory="With over 10 years of experience in data science and machine learning, "
|
backstory="Com mais de 10 anos de experiência em ciência de dados e aprendizado de máquina, você é especialista em encontrar padrões em grandes volumes de dados.",
|
||||||
"you excel at finding patterns in complex datasets.",
|
llm="gpt-4", # Padrão: OPENAI_MODEL_NAME ou "gpt-4"
|
||||||
llm="gpt-4", # Default: OPENAI_MODEL_NAME or "gpt-4"
|
function_calling_llm=None, # Opcional: LLM separado para chamadas de ferramentas
|
||||||
function_calling_llm=None, # Optional: Separate LLM for tool calling
|
verbose=False, # Padrão: False
|
||||||
verbose=False, # Default: False
|
allow_delegation=False, # Padrão: False
|
||||||
allow_delegation=False, # Default: False
|
max_iter=20, # Padrão: 20 iterações
|
||||||
max_iter=20, # Default: 20 iterations
|
max_rpm=None, # Opcional: Limite de requisições por minuto
|
||||||
max_rpm=None, # Optional: Rate limit for API calls
|
max_execution_time=None, # Opcional: Tempo máximo de execução em segundos
|
||||||
max_execution_time=None, # Optional: Maximum execution time in seconds
|
max_retry_limit=2, # Padrão: 2 tentativas em caso de erro
|
||||||
max_retry_limit=2, # Default: 2 retries on error
|
allow_code_execution=False, # Padrão: False
|
||||||
allow_code_execution=False, # Default: False
|
code_execution_mode="safe", # Padrão: "safe" (opções: "safe", "unsafe")
|
||||||
code_execution_mode="safe", # Default: "safe" (options: "safe", "unsafe")
|
respect_context_window=True, # Padrão: True
|
||||||
respect_context_window=True, # Default: True
|
use_system_prompt=True, # Padrão: True
|
||||||
use_system_prompt=True, # Default: True
|
multimodal=False, # Padrão: False
|
||||||
multimodal=False, # Default: False
|
inject_date=False, # Padrão: False
|
||||||
inject_date=False, # Default: False
|
date_format="%Y-%m-%d", # Padrão: formato ISO
|
||||||
date_format="%Y-%m-%d", # Default: ISO format
|
reasoning=False, # Padrão: False
|
||||||
reasoning=False, # Default: False
|
max_reasoning_attempts=None, # Padrão: None
|
||||||
max_reasoning_attempts=None, # Default: None
|
tools=[SerperDevTool()], # Opcional: Lista de ferramentas
|
||||||
tools=[SerperDevTool()], # Optional: List of tools
|
knowledge_sources=None, # Opcional: Lista de fontes de conhecimento
|
||||||
knowledge_sources=None, # Optional: List of knowledge sources
|
embedder=None, # Opcional: Configuração de embedder customizado
|
||||||
embedder=None, # Optional: Custom embedder configuration
|
system_template=None, # Opcional: Template de prompt de sistema
|
||||||
system_template=None, # Optional: Custom system prompt template
|
prompt_template=None, # Opcional: Template de prompt customizado
|
||||||
prompt_template=None, # Optional: Custom prompt template
|
response_template=None, # Opcional: Template de resposta customizado
|
||||||
response_template=None, # Optional: Custom response template
|
step_callback=None, # Opcional: Função de callback para monitoramento
|
||||||
step_callback=None, # Optional: Callback function for monitoring
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -185,65 +184,62 @@ Vamos detalhar algumas combinações de parâmetros-chave para casos de uso comu
|
|||||||
#### Agente de Pesquisa Básico
|
#### Agente de Pesquisa Básico
|
||||||
```python Code
|
```python Code
|
||||||
research_agent = Agent(
|
research_agent = Agent(
|
||||||
role="Research Analyst",
|
role="Analista de Pesquisa",
|
||||||
goal="Find and summarize information about specific topics",
|
goal="Encontrar e resumir informações sobre tópicos específicos",
|
||||||
backstory="You are an experienced researcher with attention to detail",
|
backstory="Você é um pesquisador experiente com atenção aos detalhes",
|
||||||
tools=[SerperDevTool()],
|
tools=[SerperDevTool()],
|
||||||
verbose=True # Enable logging for debugging
|
verbose=True # Ativa logs para depuração
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Agente de Desenvolvimento de Código
|
#### Agente de Desenvolvimento de Código
|
||||||
```python Code
|
```python Code
|
||||||
dev_agent = Agent(
|
dev_agent = Agent(
|
||||||
role="Senior Python Developer",
|
role="Desenvolvedor Python Sênior",
|
||||||
goal="Write and debug Python code",
|
goal="Escrever e depurar códigos Python",
|
||||||
backstory="Expert Python developer with 10 years of experience",
|
backstory="Desenvolvedor Python especialista com 10 anos de experiência",
|
||||||
allow_code_execution=True,
|
allow_code_execution=True,
|
||||||
code_execution_mode="safe", # Uses Docker for safety
|
code_execution_mode="safe", # Usa Docker para segurança
|
||||||
max_execution_time=300, # 5-minute timeout
|
max_execution_time=300, # Limite de 5 minutos
|
||||||
max_retry_limit=3 # More retries for complex code tasks
|
max_retry_limit=3 # Mais tentativas para tarefas complexas
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Agente de Análise de Longa Duração
|
#### Agente de Análise de Longa Duração
|
||||||
```python Code
|
```python Code
|
||||||
analysis_agent = Agent(
|
analysis_agent = Agent(
|
||||||
role="Data Analyst",
|
role="Analista de Dados",
|
||||||
goal="Perform deep analysis of large datasets",
|
goal="Realizar análise aprofundada de grandes conjuntos de dados",
|
||||||
backstory="Specialized in big data analysis and pattern recognition",
|
backstory="Especialista em análise de big data e reconhecimento de padrões",
|
||||||
memory=True,
|
memory=True,
|
||||||
respect_context_window=True,
|
respect_context_window=True,
|
||||||
max_rpm=10, # Limit API calls
|
max_rpm=10, # Limite de requisições por minuto
|
||||||
function_calling_llm="gpt-4o-mini" # Cheaper model for tool calls
|
function_calling_llm="gpt-4o-mini" # Modelo mais econômico para chamadas de ferramentas
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Agente com Template Personalizado
|
#### Agente com Template Personalizado
|
||||||
```python Code
|
```python Code
|
||||||
custom_agent = Agent(
|
custom_agent = Agent(
|
||||||
role="Customer Service Representative",
|
role="Atendente de Suporte ao Cliente",
|
||||||
goal="Assist customers with their inquiries",
|
goal="Auxiliar clientes com suas dúvidas e solicitações",
|
||||||
backstory="Experienced in customer support with a focus on satisfaction",
|
backstory="Experiente em atendimento ao cliente com foco em satisfação",
|
||||||
system_template="""<|start_header_id|>system<|end_header_id|>
|
system_template="""<|start_header_id|>system<|end_header_id|>\n {{ .System }}<|eot_id|>""",
|
||||||
{{ .System }}<|eot_id|>""",
|
prompt_template="""<|start_header_id|>user<|end_header_id|>\n {{ .Prompt }}<|eot_id|>""",
|
||||||
prompt_template="""<|start_header_id|>user<|end_header_id|>
|
response_template="""<|start_header_id|>assistant<|end_header_id|>\n {{ .Response }}<|eot_id|>""",
|
||||||
{{ .Prompt }}<|eot_id|>""",
|
|
||||||
response_template="""<|start_header_id|>assistant<|end_header_id|>
|
|
||||||
{{ .Response }}<|eot_id|>""",
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Agente Ciente de Data, com Raciocínio
|
#### Agente Ciente de Data, com Raciocínio
|
||||||
```python Code
|
```python Code
|
||||||
strategic_agent = Agent(
|
strategic_agent = Agent(
|
||||||
role="Market Analyst",
|
role="Analista de Mercado",
|
||||||
goal="Track market movements with precise date references and strategic planning",
|
goal="Acompanhar movimentos do mercado com referências de datas precisas e planejamento estratégico",
|
||||||
backstory="Expert in time-sensitive financial analysis and strategic reporting",
|
backstory="Especialista em análise financeira sensível ao tempo e relatórios estratégicos",
|
||||||
inject_date=True, # Automatically inject current date into tasks
|
inject_date=True, # Injeta automaticamente a data atual nas tarefas
|
||||||
date_format="%B %d, %Y", # Format as "May 21, 2025"
|
date_format="%d de %B de %Y", # Exemplo: "21 de maio de 2025"
|
||||||
reasoning=True, # Enable strategic planning
|
reasoning=True, # Ativa planejamento estratégico
|
||||||
max_reasoning_attempts=2, # Limit planning iterations
|
max_reasoning_attempts=2, # Limite de iterações de planejamento
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -251,12 +247,12 @@ strategic_agent = Agent(
|
|||||||
#### Agente de Raciocínio
|
#### Agente de Raciocínio
|
||||||
```python Code
|
```python Code
|
||||||
reasoning_agent = Agent(
|
reasoning_agent = Agent(
|
||||||
role="Strategic Planner",
|
role="Planejador Estratégico",
|
||||||
goal="Analyze complex problems and create detailed execution plans",
|
goal="Analisar problemas complexos e criar planos de execução detalhados",
|
||||||
backstory="Expert strategic planner who methodically breaks down complex challenges",
|
backstory="Especialista em planejamento estratégico que desmembra desafios complexos metodicamente",
|
||||||
reasoning=True, # Enable reasoning and planning
|
reasoning=True, # Ativa raciocínio e planejamento
|
||||||
max_reasoning_attempts=3, # Limit reasoning attempts
|
max_reasoning_attempts=3, # Limite de tentativas de raciocínio
|
||||||
max_iter=30, # Allow more iterations for complex planning
|
max_iter=30, # Permite mais iterações para planejamento complexo
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -264,10 +260,10 @@ reasoning_agent = Agent(
|
|||||||
#### Agente Multimodal
|
#### Agente Multimodal
|
||||||
```python Code
|
```python Code
|
||||||
multimodal_agent = Agent(
|
multimodal_agent = Agent(
|
||||||
role="Visual Content Analyst",
|
role="Analista de Conteúdo Visual",
|
||||||
goal="Analyze and process both text and visual content",
|
goal="Analisar e processar tanto conteúdo textual quanto visual",
|
||||||
backstory="Specialized in multimodal analysis combining text and image understanding",
|
backstory="Especialista em análise multimodal combinando compreensão de texto e imagem",
|
||||||
multimodal=True, # Enable multimodal capabilities
|
multimodal=True, # Ativa capacidades multimodais
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -336,8 +332,8 @@ wiki_tool = WikipediaTools()
|
|||||||
|
|
||||||
# Adicionar ferramentas ao agente
|
# Adicionar ferramentas ao agente
|
||||||
researcher = Agent(
|
researcher = Agent(
|
||||||
role="AI Technology Researcher",
|
role="Pesquisador de Tecnologia em IA",
|
||||||
goal="Research the latest AI developments",
|
goal="Pesquisar os últimos avanços em IA",
|
||||||
tools=[search_tool, wiki_tool],
|
tools=[search_tool, wiki_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
@@ -351,9 +347,9 @@ Agentes podem manter a memória de suas interações e usar contexto de tarefas
|
|||||||
from crewai import Agent
|
from crewai import Agent
|
||||||
|
|
||||||
analyst = Agent(
|
analyst = Agent(
|
||||||
role="Data Analyst",
|
role="Analista de Dados",
|
||||||
goal="Analyze and remember complex data patterns",
|
goal="Analisar e memorizar padrões complexos de dados",
|
||||||
memory=True, # Enable memory
|
memory=True, # Ativa memória
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -380,10 +376,10 @@ Esta é a **configuração padrão e recomendada** para a maioria dos casos. Qua
|
|||||||
```python Code
|
```python Code
|
||||||
# Agente com gerenciamento automático de contexto (padrão)
|
# Agente com gerenciamento automático de contexto (padrão)
|
||||||
smart_agent = Agent(
|
smart_agent = Agent(
|
||||||
role="Research Analyst",
|
role="Analista de Pesquisa",
|
||||||
goal="Analyze large documents and datasets",
|
goal="Analisar grandes documentos e conjuntos de dados",
|
||||||
backstory="Expert at processing extensive information",
|
backstory="Especialista em processar informações extensas",
|
||||||
respect_context_window=True, # 🔑 Default: auto-handle context limits
|
respect_context_window=True, # 🔑 Padrão: gerencia limites de contexto automaticamente
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ title: CLI
|
|||||||
description: Aprenda a usar o CLI do CrewAI para interagir com o CrewAI.
|
description: Aprenda a usar o CLI do CrewAI para interagir com o CrewAI.
|
||||||
icon: terminal
|
icon: terminal
|
||||||
---
|
---
|
||||||
|
<Warning>A partir da versão 0.140.0, a plataforma CrewAI Enterprise iniciou um processo de migração de seu provedor de login. Como resultado, o fluxo de autenticação via CLI foi atualizado. Usuários que utlizam o Google para fazer login, ou que criaram conta após 3 de julho de 2025 não poderão fazer login com versões anteriores da biblioteca `crewai`.</Warning>
|
||||||
|
|
||||||
## Visão Geral
|
## Visão Geral
|
||||||
|
|
||||||
@@ -75,6 +76,22 @@ Exemplo:
|
|||||||
crewai train -n 10 -f my_training_data.pkl
|
crewai train -n 10 -f my_training_data.pkl
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Exemplo de uso programático do comando train
|
||||||
|
n_iterations = 2
|
||||||
|
inputs = {"topic": "Treinamento CrewAI"}
|
||||||
|
filename = "seu_modelo.pkl"
|
||||||
|
|
||||||
|
try:
|
||||||
|
SuaCrew().crew().train(
|
||||||
|
n_iterations=n_iterations,
|
||||||
|
inputs=inputs,
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Ocorreu um erro ao treinar a crew: {e}")
|
||||||
|
```
|
||||||
|
|
||||||
### 4. Replay
|
### 4. Replay
|
||||||
|
|
||||||
Reexecute a execução do crew a partir de uma tarefa específica.
|
Reexecute a execução do crew a partir de uma tarefa específica.
|
||||||
|
|||||||
@@ -15,18 +15,18 @@ from crewai import Agent, Crew, Task
|
|||||||
|
|
||||||
# Enable collaboration for agents
|
# Enable collaboration for agents
|
||||||
researcher = Agent(
|
researcher = Agent(
|
||||||
role="Research Specialist",
|
role="Especialista em Pesquisa",
|
||||||
goal="Conduct thorough research on any topic",
|
goal="Realizar pesquisas aprofundadas sobre qualquer tema",
|
||||||
backstory="Expert researcher with access to various sources",
|
backstory="Pesquisador especialista com acesso a diversas fontes",
|
||||||
allow_delegation=True, # 🔑 Key setting for collaboration
|
allow_delegation=True, # 🔑 Configuração chave para colaboração
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
writer = Agent(
|
writer = Agent(
|
||||||
role="Content Writer",
|
role="Redator de Conteúdo",
|
||||||
goal="Create engaging content based on research",
|
goal="Criar conteúdo envolvente com base em pesquisas",
|
||||||
backstory="Skilled writer who transforms research into compelling content",
|
backstory="Redator habilidoso que transforma pesquisas em conteúdo atraente",
|
||||||
allow_delegation=True, # 🔑 Enables asking questions to other agents
|
allow_delegation=True, # 🔑 Permite fazer perguntas a outros agentes
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -67,19 +67,17 @@ from crewai import Agent, Crew, Task, Process
|
|||||||
|
|
||||||
# Create collaborative agents
|
# Create collaborative agents
|
||||||
researcher = Agent(
|
researcher = Agent(
|
||||||
role="Research Specialist",
|
role="Especialista em Pesquisa",
|
||||||
goal="Find accurate, up-to-date information on any topic",
|
goal="Realizar pesquisas aprofundadas sobre qualquer tema",
|
||||||
backstory="""You're a meticulous researcher with expertise in finding
|
backstory="Pesquisador especialista com acesso a diversas fontes",
|
||||||
reliable sources and fact-checking information across various domains.""",
|
|
||||||
allow_delegation=True,
|
allow_delegation=True,
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
writer = Agent(
|
writer = Agent(
|
||||||
role="Content Writer",
|
role="Redator de Conteúdo",
|
||||||
goal="Create engaging, well-structured content",
|
goal="Criar conteúdo envolvente com base em pesquisas",
|
||||||
backstory="""You're a skilled content writer who excels at transforming
|
backstory="Redator habilidoso que transforma pesquisas em conteúdo atraente",
|
||||||
research into compelling, readable content for different audiences.""",
|
|
||||||
allow_delegation=True,
|
allow_delegation=True,
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
@@ -95,17 +93,17 @@ editor = Agent(
|
|||||||
|
|
||||||
# Create a task that encourages collaboration
|
# Create a task that encourages collaboration
|
||||||
article_task = Task(
|
article_task = Task(
|
||||||
description="""Write a comprehensive 1000-word article about 'The Future of AI in Healthcare'.
|
description="""Escreva um artigo abrangente de 1000 palavras sobre 'O Futuro da IA na Saúde'.
|
||||||
|
|
||||||
The article should include:
|
O artigo deve incluir:
|
||||||
- Current AI applications in healthcare
|
- Aplicações atuais de IA na saúde
|
||||||
- Emerging trends and technologies
|
- Tendências e tecnologias emergentes
|
||||||
- Potential challenges and ethical considerations
|
- Desafios potenciais e considerações éticas
|
||||||
- Expert predictions for the next 5 years
|
- Previsões de especialistas para os próximos 5 anos
|
||||||
|
|
||||||
Collaborate with your teammates to ensure accuracy and quality.""",
|
Colabore com seus colegas para garantir precisão e qualidade.""",
|
||||||
expected_output="A well-researched, engaging 1000-word article with proper structure and citations",
|
expected_output="Um artigo bem pesquisado, envolvente, com 1000 palavras, estrutura adequada e citações",
|
||||||
agent=writer # Writer leads, but can delegate research to researcher
|
agent=writer # O redator lidera, mas pode delegar pesquisa ao pesquisador
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create collaborative crew
|
# Create collaborative crew
|
||||||
@@ -124,37 +122,37 @@ result = crew.kickoff()
|
|||||||
### Padrão 1: Pesquisa → Redação → Edição
|
### Padrão 1: Pesquisa → Redação → Edição
|
||||||
```python
|
```python
|
||||||
research_task = Task(
|
research_task = Task(
|
||||||
description="Research the latest developments in quantum computing",
|
description="Pesquise os últimos avanços em computação quântica",
|
||||||
expected_output="Comprehensive research summary with key findings and sources",
|
expected_output="Resumo abrangente da pesquisa com principais descobertas e fontes",
|
||||||
agent=researcher
|
agent=researcher
|
||||||
)
|
)
|
||||||
|
|
||||||
writing_task = Task(
|
writing_task = Task(
|
||||||
description="Write an article based on the research findings",
|
description="Escreva um artigo com base nos achados da pesquisa",
|
||||||
expected_output="Engaging 800-word article about quantum computing",
|
expected_output="Artigo envolvente de 800 palavras sobre computação quântica",
|
||||||
agent=writer,
|
agent=writer,
|
||||||
context=[research_task] # Gets research output as context
|
context=[research_task] # Recebe a saída da pesquisa como contexto
|
||||||
)
|
)
|
||||||
|
|
||||||
editing_task = Task(
|
editing_task = Task(
|
||||||
description="Edit and polish the article for publication",
|
description="Edite e revise o artigo para publicação",
|
||||||
expected_output="Publication-ready article with improved clarity and flow",
|
expected_output="Artigo pronto para publicação, com clareza e fluidez aprimoradas",
|
||||||
agent=editor,
|
agent=editor,
|
||||||
context=[writing_task] # Gets article draft as context
|
context=[writing_task] # Recebe o rascunho do artigo como contexto
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Padrão 2: Tarefa Única Colaborativa
|
### Padrão 2: Tarefa Única Colaborativa
|
||||||
```python
|
```python
|
||||||
collaborative_task = Task(
|
collaborative_task = Task(
|
||||||
description="""Create a marketing strategy for a new AI product.
|
description="""Crie uma estratégia de marketing para um novo produto de IA.
|
||||||
|
|
||||||
Writer: Focus on messaging and content strategy
|
Redator: Foque em mensagens e estratégia de conteúdo
|
||||||
Researcher: Provide market analysis and competitor insights
|
Pesquisador: Forneça análise de mercado e insights de concorrentes
|
||||||
|
|
||||||
Work together to create a comprehensive strategy.""",
|
Trabalhem juntos para criar uma estratégia abrangente.""",
|
||||||
expected_output="Complete marketing strategy with research backing",
|
expected_output="Estratégia de marketing completa com embasamento em pesquisa",
|
||||||
agent=writer # Lead agent, but can delegate to researcher
|
agent=writer # Agente líder, mas pode delegar ao pesquisador
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -167,35 +165,35 @@ from crewai import Agent, Crew, Task, Process
|
|||||||
|
|
||||||
# Manager agent coordinates the team
|
# Manager agent coordinates the team
|
||||||
manager = Agent(
|
manager = Agent(
|
||||||
role="Project Manager",
|
role="Gerente de Projetos",
|
||||||
goal="Coordinate team efforts and ensure project success",
|
goal="Coordenar esforços da equipe e garantir o sucesso do projeto",
|
||||||
backstory="Experienced project manager skilled at delegation and quality control",
|
backstory="Gerente de projetos experiente, habilidoso em delegação e controle de qualidade",
|
||||||
allow_delegation=True,
|
allow_delegation=True,
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Specialist agents
|
# Specialist agents
|
||||||
researcher = Agent(
|
researcher = Agent(
|
||||||
role="Researcher",
|
role="Pesquisador",
|
||||||
goal="Provide accurate research and analysis",
|
goal="Fornecer pesquisa e análise precisas",
|
||||||
backstory="Expert researcher with deep analytical skills",
|
backstory="Pesquisador especialista com habilidades analíticas profundas",
|
||||||
allow_delegation=False, # Specialists focus on their expertise
|
allow_delegation=False, # Especialistas focam em sua expertise
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
writer = Agent(
|
writer = Agent(
|
||||||
role="Writer",
|
role="Redator",
|
||||||
goal="Create compelling content",
|
goal="Criar conteúdo envolvente",
|
||||||
backstory="Skilled writer who creates engaging content",
|
backstory="Redator habilidoso que cria conteúdo atraente",
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Manager-led task
|
# Manager-led task
|
||||||
project_task = Task(
|
project_task = Task(
|
||||||
description="Create a comprehensive market analysis report with recommendations",
|
description="Crie um relatório de análise de mercado completo com recomendações",
|
||||||
expected_output="Executive summary, detailed analysis, and strategic recommendations",
|
expected_output="Resumo executivo, análise detalhada e recomendações estratégicas",
|
||||||
agent=manager # Manager will delegate to specialists
|
agent=manager # O gerente delega para especialistas
|
||||||
)
|
)
|
||||||
|
|
||||||
# Hierarchical crew
|
# Hierarchical crew
|
||||||
|
|||||||
@@ -153,32 +153,32 @@ from crewai_tools import YourCustomTool
|
|||||||
class YourCrewName:
|
class YourCrewName:
|
||||||
def agent_one(self) -> Agent:
|
def agent_one(self) -> Agent:
|
||||||
return Agent(
|
return Agent(
|
||||||
role="Data Analyst",
|
role="Analista de Dados",
|
||||||
goal="Analyze data trends in the market",
|
goal="Analisar tendências de dados no mercado brasileiro",
|
||||||
backstory="An experienced data analyst with a background in economics",
|
backstory="Analista experiente com formação em economia",
|
||||||
verbose=True,
|
verbose=True,
|
||||||
tools=[YourCustomTool()]
|
tools=[YourCustomTool()]
|
||||||
)
|
)
|
||||||
|
|
||||||
def agent_two(self) -> Agent:
|
def agent_two(self) -> Agent:
|
||||||
return Agent(
|
return Agent(
|
||||||
role="Market Researcher",
|
role="Pesquisador de Mercado",
|
||||||
goal="Gather information on market dynamics",
|
goal="Coletar informações sobre a dinâmica do mercado nacional",
|
||||||
backstory="A diligent researcher with a keen eye for detail",
|
backstory="Pesquisador dedicado com olhar atento aos detalhes",
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
def task_one(self) -> Task:
|
def task_one(self) -> Task:
|
||||||
return Task(
|
return Task(
|
||||||
description="Collect recent market data and identify trends.",
|
description="Coletar dados recentes do mercado brasileiro e identificar tendências.",
|
||||||
expected_output="A report summarizing key trends in the market.",
|
expected_output="Um relatório resumido com as principais tendências do mercado.",
|
||||||
agent=self.agent_one()
|
agent=self.agent_one()
|
||||||
)
|
)
|
||||||
|
|
||||||
def task_two(self) -> Task:
|
def task_two(self) -> Task:
|
||||||
return Task(
|
return Task(
|
||||||
description="Research factors affecting market dynamics.",
|
description="Pesquisar fatores que afetam a dinâmica do mercado nacional.",
|
||||||
expected_output="An analysis of factors influencing the market.",
|
expected_output="Uma análise dos fatores que influenciam o mercado.",
|
||||||
agent=self.agent_two()
|
agent=self.agent_two()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -51,24 +51,24 @@ from crewai.utilities.events import (
|
|||||||
)
|
)
|
||||||
from crewai.utilities.events.base_event_listener import BaseEventListener
|
from crewai.utilities.events.base_event_listener import BaseEventListener
|
||||||
|
|
||||||
class MyCustomListener(BaseEventListener):
|
class MeuListenerPersonalizado(BaseEventListener):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def setup_listeners(self, crewai_event_bus):
|
def setup_listeners(self, crewai_event_bus):
|
||||||
@crewai_event_bus.on(CrewKickoffStartedEvent)
|
@crewai_event_bus.on(CrewKickoffStartedEvent)
|
||||||
def on_crew_started(source, event):
|
def ao_iniciar_crew(source, event):
|
||||||
print(f"Crew '{event.crew_name}' has started execution!")
|
print(f"Crew '{event.crew_name}' iniciou a execução!")
|
||||||
|
|
||||||
@crewai_event_bus.on(CrewKickoffCompletedEvent)
|
@crewai_event_bus.on(CrewKickoffCompletedEvent)
|
||||||
def on_crew_completed(source, event):
|
def ao_finalizar_crew(source, event):
|
||||||
print(f"Crew '{event.crew_name}' has completed execution!")
|
print(f"Crew '{event.crew_name}' finalizou a execução!")
|
||||||
print(f"Output: {event.output}")
|
print(f"Saída: {event.output}")
|
||||||
|
|
||||||
@crewai_event_bus.on(AgentExecutionCompletedEvent)
|
@crewai_event_bus.on(AgentExecutionCompletedEvent)
|
||||||
def on_agent_execution_completed(source, event):
|
def ao_finalizar_execucao_agente(source, event):
|
||||||
print(f"Agent '{event.agent.role}' completed task")
|
print(f"Agente '{event.agent.role}' concluiu a tarefa")
|
||||||
print(f"Output: {event.output}")
|
print(f"Saída: {event.output}")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Registrando Corretamente Seu Listener
|
## Registrando Corretamente Seu Listener
|
||||||
|
|||||||
@@ -486,8 +486,9 @@ Existem duas formas de executar um flow:
|
|||||||
Você pode executar um flow programaticamente criando uma instância da sua classe de flow e chamando o método `kickoff()`:
|
Você pode executar um flow programaticamente criando uma instância da sua classe de flow e chamando o método `kickoff()`:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
flow = ExampleFlow()
|
# Exemplo de execução de flow em português
|
||||||
result = flow.kickoff()
|
flow = ExemploFlow()
|
||||||
|
resultado = flow.kickoff()
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usando a CLI
|
### Usando a CLI
|
||||||
|
|||||||
@@ -39,17 +39,17 @@ llm = LLM(model="gpt-4o-mini", temperature=0)
|
|||||||
|
|
||||||
# Create an agent with the knowledge store
|
# Create an agent with the knowledge store
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
role="About User",
|
role="Sobre o Usuário",
|
||||||
goal="You know everything about the user.",
|
goal="Você sabe tudo sobre o usuário.",
|
||||||
backstory="You are a master at understanding people and their preferences.",
|
backstory="Você é mestre em entender pessoas e suas preferências.",
|
||||||
verbose=True,
|
verbose=True,
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
llm=llm,
|
llm=llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
task = Task(
|
task = Task(
|
||||||
description="Answer the following questions about the user: {question}",
|
description="Responda às seguintes perguntas sobre o usuário: {question}",
|
||||||
expected_output="An answer to the question.",
|
expected_output="Uma resposta para a pergunta.",
|
||||||
agent=agent,
|
agent=agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -87,17 +87,17 @@ llm = LLM(model="gpt-4o-mini", temperature=0)
|
|||||||
|
|
||||||
# Create an agent with the knowledge store
|
# Create an agent with the knowledge store
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
role="About papers",
|
role="Sobre artigos",
|
||||||
goal="You know everything about the papers.",
|
goal="Você sabe tudo sobre os artigos.",
|
||||||
backstory="You are a master at understanding papers and their content.",
|
backstory="Você é mestre em entender artigos e seus conteúdos.",
|
||||||
verbose=True,
|
verbose=True,
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
llm=llm,
|
llm=llm,
|
||||||
)
|
)
|
||||||
|
|
||||||
task = Task(
|
task = Task(
|
||||||
description="Answer the following questions about the papers: {question}",
|
description="Responda às seguintes perguntas sobre os artigos: {question}",
|
||||||
expected_output="An answer to the question.",
|
expected_output="Uma resposta para a pergunta.",
|
||||||
agent=agent,
|
agent=agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -201,16 +201,16 @@ specialist_knowledge = StringKnowledgeSource(
|
|||||||
)
|
)
|
||||||
|
|
||||||
specialist_agent = Agent(
|
specialist_agent = Agent(
|
||||||
role="Technical Specialist",
|
role="Especialista Técnico",
|
||||||
goal="Provide technical expertise",
|
goal="Fornecer expertise técnica",
|
||||||
backstory="Expert in specialized technical domains",
|
backstory="Especialista em domínios técnicos especializados",
|
||||||
knowledge_sources=[specialist_knowledge] # Agent-specific knowledge
|
knowledge_sources=[specialist_knowledge] # Conhecimento específico do agente
|
||||||
)
|
)
|
||||||
|
|
||||||
task = Task(
|
task = Task(
|
||||||
description="Answer technical questions",
|
description="Responda perguntas técnicas",
|
||||||
agent=specialist_agent,
|
agent=specialist_agent,
|
||||||
expected_output="Technical answer"
|
expected_output="Resposta técnica"
|
||||||
)
|
)
|
||||||
|
|
||||||
# No crew-level knowledge required
|
# No crew-level knowledge required
|
||||||
@@ -240,7 +240,7 @@ Cada nível de knowledge usa coleções de armazenamento independentes:
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Agent knowledge storage
|
# Agent knowledge storage
|
||||||
agent_collection_name = agent.role # e.g., "Technical Specialist"
|
agent_collection_name = agent.role # e.g., "Especialista Técnico"
|
||||||
|
|
||||||
# Crew knowledge storage
|
# Crew knowledge storage
|
||||||
crew_collection_name = "crew"
|
crew_collection_name = "crew"
|
||||||
@@ -248,7 +248,7 @@ crew_collection_name = "crew"
|
|||||||
# Both stored in same ChromaDB instance but different collections
|
# Both stored in same ChromaDB instance but different collections
|
||||||
# Path: ~/.local/share/CrewAI/{project}/knowledge/
|
# Path: ~/.local/share/CrewAI/{project}/knowledge/
|
||||||
# ├── crew/ # Crew knowledge collection
|
# ├── crew/ # Crew knowledge collection
|
||||||
# ├── Technical Specialist/ # Agent knowledge collection
|
# ├── Especialista Técnico/ # Agent knowledge collection
|
||||||
# └── Another Agent Role/ # Another agent's collection
|
# └── Another Agent Role/ # Another agent's collection
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -265,7 +265,7 @@ agent_knowledge = StringKnowledgeSource(
|
|||||||
)
|
)
|
||||||
|
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
role="Specialist",
|
role="Especialista",
|
||||||
goal="Use specialized knowledge",
|
goal="Use specialized knowledge",
|
||||||
backstory="Expert with specific knowledge",
|
backstory="Expert with specific knowledge",
|
||||||
knowledge_sources=[agent_knowledge],
|
knowledge_sources=[agent_knowledge],
|
||||||
@@ -299,10 +299,10 @@ specialist_knowledge = StringKnowledgeSource(
|
|||||||
)
|
)
|
||||||
|
|
||||||
specialist = Agent(
|
specialist = Agent(
|
||||||
role="Technical Specialist",
|
role="Especialista Técnico",
|
||||||
goal="Provide technical expertise",
|
goal="Fornecer expertise técnica",
|
||||||
backstory="Technical expert",
|
backstory="Especialista em domínios técnicos especializados",
|
||||||
knowledge_sources=[specialist_knowledge] # Agent-specific
|
knowledge_sources=[specialist_knowledge] # Conhecimento específico do agente
|
||||||
)
|
)
|
||||||
|
|
||||||
generalist = Agent(
|
generalist = Agent(
|
||||||
|
|||||||
@@ -78,15 +78,15 @@ Existem diferentes locais no código do CrewAI onde você pode especificar o mod
|
|||||||
|
|
||||||
# Configuração avançada com parâmetros detalhados
|
# Configuração avançada com parâmetros detalhados
|
||||||
llm = LLM(
|
llm = LLM(
|
||||||
model="model-id-here", # gpt-4o, gemini-2.0-flash, anthropic/claude...
|
model="openai/gpt-4",
|
||||||
temperature=0.7, # Mais alto para saídas criativas
|
temperature=0.8,
|
||||||
timeout=120, # Segundos para aguardar resposta
|
max_tokens=150,
|
||||||
max_tokens=4000, # Comprimento máximo da resposta
|
top_p=0.9,
|
||||||
top_p=0.9, # Parâmetro de amostragem nucleus
|
frequency_penalty=0.1,
|
||||||
frequency_penalty=0.1 , # Reduz repetição
|
presence_penalty=0.1,
|
||||||
presence_penalty=0.1, # Incentiva diversidade de tópicos
|
response_format={"type":"json"},
|
||||||
response_format={"type": "json"}, # Para respostas estruturadas
|
stop=["FIM"],
|
||||||
seed=42 # Para resultados reproduzíveis
|
seed=42
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -127,13 +127,13 @@ Nesta seção, você encontrará exemplos detalhados que ajudam a selecionar, co
|
|||||||
from crewai import LLM
|
from crewai import LLM
|
||||||
|
|
||||||
llm = LLM(
|
llm = LLM(
|
||||||
model="openai/gpt-4", # chamar modelo por provider/model_name
|
model="openai/gpt-4",
|
||||||
temperature=0.8,
|
temperature=0.8,
|
||||||
max_tokens=150,
|
max_tokens=150,
|
||||||
top_p=0.9,
|
top_p=0.9,
|
||||||
frequency_penalty=0.1,
|
frequency_penalty=0.1,
|
||||||
presence_penalty=0.1,
|
presence_penalty=0.1,
|
||||||
stop=["END"],
|
stop=["FIM"],
|
||||||
seed=42
|
seed=42
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -169,7 +169,7 @@ Nesta seção, você encontrará exemplos detalhados que ajudam a selecionar, co
|
|||||||
llm = LLM(
|
llm = LLM(
|
||||||
model="meta_llama/Llama-4-Scout-17B-16E-Instruct-FP8",
|
model="meta_llama/Llama-4-Scout-17B-16E-Instruct-FP8",
|
||||||
temperature=0.8,
|
temperature=0.8,
|
||||||
stop=["END"],
|
stop=["FIM"],
|
||||||
seed=42
|
seed=42
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ Começar a usar o recurso de planejamento é muito simples, o único passo neces
|
|||||||
from crewai import Crew, Agent, Task, Process
|
from crewai import Crew, Agent, Task, Process
|
||||||
|
|
||||||
# Monte sua crew com capacidades de planejamento
|
# Monte sua crew com capacidades de planejamento
|
||||||
my_crew = Crew(
|
minha_crew = Crew(
|
||||||
agents=self.agents,
|
agents=self.agents,
|
||||||
tasks=self.tasks,
|
tasks=self.tasks,
|
||||||
process=Process.sequential,
|
process=Process.sequential,
|
||||||
|
|||||||
@@ -28,23 +28,23 @@ from crewai import Crew, Process
|
|||||||
|
|
||||||
# Exemplo: Criando uma crew com processo sequencial
|
# Exemplo: Criando uma crew com processo sequencial
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=my_agents,
|
agents=meus_agentes,
|
||||||
tasks=my_tasks,
|
tasks=minhas_tarefas,
|
||||||
process=Process.sequential
|
process=Process.sequential
|
||||||
)
|
)
|
||||||
|
|
||||||
# Exemplo: Criando uma crew com processo hierárquico
|
# Exemplo: Criando uma crew com processo hierárquico
|
||||||
# Certifique-se de fornecer um manager_llm ou manager_agent
|
# Certifique-se de fornecer um manager_llm ou manager_agent
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=my_agents,
|
agents=meus_agentes,
|
||||||
tasks=my_tasks,
|
tasks=minhas_tarefas,
|
||||||
process=Process.hierarchical,
|
process=Process.hierarchical,
|
||||||
manager_llm="gpt-4o"
|
manager_llm="gpt-4o"
|
||||||
# ou
|
# ou
|
||||||
# manager_agent=my_manager_agent
|
# manager_agent=meu_agente_gerente
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
**Nota:** Certifique-se de que `my_agents` e `my_tasks` estejam definidos antes de criar o objeto `Crew`, e para o processo hierárquico, é necessário também fornecer o `manager_llm` ou `manager_agent`.
|
**Nota:** Certifique-se de que `meus_agentes` e `minhas_tarefas` estejam definidos antes de criar o objeto `Crew`, e para o processo hierárquico, é necessário também fornecer o `manager_llm` ou `manager_agent`.
|
||||||
|
|
||||||
## Processo Sequencial
|
## Processo Sequencial
|
||||||
|
|
||||||
|
|||||||
@@ -15,12 +15,12 @@ Para habilitar o reasoning para um agente, basta definir `reasoning=True` ao cri
|
|||||||
```python
|
```python
|
||||||
from crewai import Agent
|
from crewai import Agent
|
||||||
|
|
||||||
agent = Agent(
|
analista = Agent(
|
||||||
role="Data Analyst",
|
role="Analista de Dados",
|
||||||
goal="Analyze complex datasets and provide insights",
|
goal="Analisar dados e fornecer insights",
|
||||||
backstory="You are an experienced data analyst with expertise in finding patterns in complex data.",
|
backstory="Você é um analista de dados especialista.",
|
||||||
reasoning=True, # Enable reasoning
|
reasoning=True,
|
||||||
max_reasoning_attempts=3 # Optional: Set a maximum number of reasoning attempts
|
max_reasoning_attempts=3 # Opcional: Defina um limite de tentativas de reasoning
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -53,23 +53,23 @@ Aqui está um exemplo completo:
|
|||||||
from crewai import Agent, Task, Crew
|
from crewai import Agent, Task, Crew
|
||||||
|
|
||||||
# Create an agent with reasoning enabled
|
# Create an agent with reasoning enabled
|
||||||
analyst = Agent(
|
analista = Agent(
|
||||||
role="Data Analyst",
|
role="Analista de Dados",
|
||||||
goal="Analyze data and provide insights",
|
goal="Analisar dados e fornecer insights",
|
||||||
backstory="You are an expert data analyst.",
|
backstory="Você é um analista de dados especialista.",
|
||||||
reasoning=True,
|
reasoning=True,
|
||||||
max_reasoning_attempts=3 # Optional: Set a limit on reasoning attempts
|
max_reasoning_attempts=3 # Opcional: Defina um limite de tentativas de reasoning
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create a task
|
# Create a task
|
||||||
analysis_task = Task(
|
analysis_task = Task(
|
||||||
description="Analyze the provided sales data and identify key trends.",
|
description="Analise os dados de vendas fornecidos e identifique as principais tendências.",
|
||||||
expected_output="A report highlighting the top 3 sales trends.",
|
expected_output="Um relatório destacando as 3 principais tendências de vendas.",
|
||||||
agent=analyst
|
agent=analista
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create a crew and run the task
|
# Create a crew and run the task
|
||||||
crew = Crew(agents=[analyst], tasks=[analysis_task])
|
crew = Crew(agents=[analista], tasks=[analysis_task])
|
||||||
result = crew.kickoff()
|
result = crew.kickoff()
|
||||||
|
|
||||||
print(result)
|
print(result)
|
||||||
@@ -90,16 +90,16 @@ logging.basicConfig(level=logging.INFO)
|
|||||||
|
|
||||||
# Create an agent with reasoning enabled
|
# Create an agent with reasoning enabled
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
role="Data Analyst",
|
role="Analista de Dados",
|
||||||
goal="Analyze data and provide insights",
|
goal="Analisar dados e fornecer insights",
|
||||||
reasoning=True,
|
reasoning=True,
|
||||||
max_reasoning_attempts=3
|
max_reasoning_attempts=3
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create a task
|
# Create a task
|
||||||
task = Task(
|
task = Task(
|
||||||
description="Analyze the provided sales data and identify key trends.",
|
description="Analise os dados de vendas fornecidos e identifique as principais tendências.",
|
||||||
expected_output="A report highlighting the top 3 sales trends.",
|
expected_output="Um relatório destacando as 3 principais tendências de vendas.",
|
||||||
agent=agent
|
agent=agent
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -113,7 +113,7 @@ result = agent.execute_task(task)
|
|||||||
Veja um exemplo de como pode ser um plano de reasoning para uma tarefa de análise de dados:
|
Veja um exemplo de como pode ser um plano de reasoning para uma tarefa de análise de dados:
|
||||||
|
|
||||||
```
|
```
|
||||||
Task: Analyze the provided sales data and identify key trends.
|
Task: Analise os dados de vendas fornecidos e identifique as principais tendências.
|
||||||
|
|
||||||
Reasoning Plan:
|
Reasoning Plan:
|
||||||
I'll analyze the sales data to identify the top 3 trends.
|
I'll analyze the sales data to identify the top 3 trends.
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ crew = Crew(
|
|||||||
| **Saída JSON** _(opcional)_ | `output_json` | `Optional[Type[BaseModel]]` | Um modelo Pydantic para estruturar a saída em JSON. |
|
| **Saída JSON** _(opcional)_ | `output_json` | `Optional[Type[BaseModel]]` | Um modelo Pydantic para estruturar a saída em JSON. |
|
||||||
| **Output Pydantic** _(opcional)_ | `output_pydantic` | `Optional[Type[BaseModel]]` | Um modelo Pydantic para a saída da tarefa. |
|
| **Output Pydantic** _(opcional)_ | `output_pydantic` | `Optional[Type[BaseModel]]` | Um modelo Pydantic para a saída da tarefa. |
|
||||||
| **Callback** _(opcional)_ | `callback` | `Optional[Any]` | Função/objeto a ser executado após a conclusão da tarefa. |
|
| **Callback** _(opcional)_ | `callback` | `Optional[Any]` | Função/objeto a ser executado após a conclusão da tarefa. |
|
||||||
|
| **Guardrail** _(opcional)_ | `guardrail` | `Optional[Union[Callable, str]]` | Função ou descrição em string para validar a saída da tarefa antes de prosseguir para a próxima tarefa. |
|
||||||
|
|
||||||
## Criando Tarefas
|
## Criando Tarefas
|
||||||
|
|
||||||
@@ -86,6 +87,7 @@ research_task:
|
|||||||
expected_output: >
|
expected_output: >
|
||||||
Uma lista com 10 tópicos em bullet points das informações mais relevantes sobre {topic}
|
Uma lista com 10 tópicos em bullet points das informações mais relevantes sobre {topic}
|
||||||
agent: researcher
|
agent: researcher
|
||||||
|
guardrail: garanta que cada bullet point contenha no mínimo 100 palavras
|
||||||
|
|
||||||
reporting_task:
|
reporting_task:
|
||||||
description: >
|
description: >
|
||||||
@@ -330,9 +332,13 @@ analysis_task = Task(
|
|||||||
|
|
||||||
Guardrails (trilhas de proteção) de tarefas fornecem uma maneira de validar e transformar as saídas das tarefas antes que elas sejam passadas para a próxima tarefa. Esse recurso assegura a qualidade dos dados e oferece feedback aos agentes quando sua saída não atende a critérios específicos.
|
Guardrails (trilhas de proteção) de tarefas fornecem uma maneira de validar e transformar as saídas das tarefas antes que elas sejam passadas para a próxima tarefa. Esse recurso assegura a qualidade dos dados e oferece feedback aos agentes quando sua saída não atende a critérios específicos.
|
||||||
|
|
||||||
### Usando Guardrails em Tarefas
|
**Guardrails podem ser definidos de duas maneiras:**
|
||||||
|
1. **Guardrails baseados em função**: Funções Python que implementam lógica de validação customizada
|
||||||
|
2. **Guardrails baseados em string**: Descrições em linguagem natural que são automaticamente convertidas em validação baseada em LLM
|
||||||
|
|
||||||
Para adicionar um guardrail a uma tarefa, forneça uma função de validação por meio do parâmetro `guardrail`:
|
### Guardrails Baseados em Função
|
||||||
|
|
||||||
|
Para adicionar um guardrail baseado em função a uma tarefa, forneça uma função de validação por meio do parâmetro `guardrail`:
|
||||||
|
|
||||||
```python Code
|
```python Code
|
||||||
from typing import Tuple, Union, Dict, Any
|
from typing import Tuple, Union, Dict, Any
|
||||||
@@ -370,9 +376,82 @@ blog_task = Task(
|
|||||||
- Em caso de sucesso: retorna uma tupla `(True, resultado_validado)`
|
- Em caso de sucesso: retorna uma tupla `(True, resultado_validado)`
|
||||||
- Em caso de falha: retorna uma tupla `(False, "mensagem de erro explicando a falha")`
|
- Em caso de falha: retorna uma tupla `(False, "mensagem de erro explicando a falha")`
|
||||||
|
|
||||||
### LLMGuardrail
|
### Guardrails Baseados em String
|
||||||
|
|
||||||
A classe `LLMGuardrail` oferece um mecanismo robusto para validação das saídas das tarefas.
|
Guardrails baseados em string permitem que você descreva critérios de validação em linguagem natural. Quando você fornece uma string em vez de uma função, o CrewAI automaticamente a converte em um `LLMGuardrail` que usa um agente de IA para validar a saída da tarefa.
|
||||||
|
|
||||||
|
#### Usando Guardrails de String em Python
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
from crewai import Task
|
||||||
|
|
||||||
|
# Guardrail simples baseado em string
|
||||||
|
blog_task = Task(
|
||||||
|
description="Escreva um post de blog sobre IA",
|
||||||
|
expected_output="Um post de blog com menos de 200 palavras",
|
||||||
|
agent=blog_agent,
|
||||||
|
guardrail="Garanta que o post do blog tenha menos de 200 palavras e inclua exemplos práticos"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Critérios de validação mais complexos
|
||||||
|
research_task = Task(
|
||||||
|
description="Pesquise tendências de IA para 2025",
|
||||||
|
expected_output="Um relatório abrangente de pesquisa",
|
||||||
|
agent=research_agent,
|
||||||
|
guardrail="Garanta que cada descoberta inclua uma fonte confiável e seja respaldada por dados recentes de 2024-2025"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Usando Guardrails de String em YAML
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
research_task:
|
||||||
|
description: Pesquise os últimos desenvolvimentos em IA
|
||||||
|
expected_output: Uma lista de 10 bullet points sobre IA
|
||||||
|
agent: researcher
|
||||||
|
guardrail: garanta que cada bullet point contenha no mínimo 100 palavras
|
||||||
|
|
||||||
|
validation_task:
|
||||||
|
description: Valide os achados da pesquisa
|
||||||
|
expected_output: Um relatório de validação
|
||||||
|
agent: validator
|
||||||
|
guardrail: confirme que todas as fontes são de publicações respeitáveis e publicadas nos últimos 2 anos
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Como Funcionam os Guardrails de String
|
||||||
|
|
||||||
|
Quando você fornece um guardrail de string, o CrewAI automaticamente:
|
||||||
|
1. Cria uma instância `LLMGuardrail` usando a string como critério de validação
|
||||||
|
2. Usa o LLM do agente da tarefa para alimentar a validação
|
||||||
|
3. Cria um agente temporário de validação que verifica a saída contra seus critérios
|
||||||
|
4. Retorna feedback detalhado se a validação falhar
|
||||||
|
|
||||||
|
Esta abordagem é ideal quando você quer usar linguagem natural para descrever regras de validação sem escrever funções de validação customizadas.
|
||||||
|
|
||||||
|
### Classe LLMGuardrail
|
||||||
|
|
||||||
|
A classe `LLMGuardrail` é o mecanismo subjacente que alimenta os guardrails baseados em string. Você também pode usá-la diretamente para maior controle avançado:
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
from crewai import Task
|
||||||
|
from crewai.tasks.llm_guardrail import LLMGuardrail
|
||||||
|
from crewai.llm import LLM
|
||||||
|
|
||||||
|
# Crie um LLMGuardrail customizado com LLM específico
|
||||||
|
custom_guardrail = LLMGuardrail(
|
||||||
|
description="Garanta que a resposta contenha exatamente 5 bullet points com citações adequadas",
|
||||||
|
llm=LLM(model="gpt-4o-mini")
|
||||||
|
)
|
||||||
|
|
||||||
|
task = Task(
|
||||||
|
description="Pesquise medidas de segurança em IA",
|
||||||
|
expected_output="Uma análise detalhada com bullet points",
|
||||||
|
agent=research_agent,
|
||||||
|
guardrail=custom_guardrail
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Nota**: Quando você usa um guardrail de string, o CrewAI automaticamente cria uma instância `LLMGuardrail` usando o LLM do agente da sua tarefa. Usar `LLMGuardrail` diretamente lhe dá mais controle sobre o processo de validação e seleção de LLM.
|
||||||
|
|
||||||
### Melhores Práticas de Tratamento de Erros
|
### Melhores Práticas de Tratamento de Erros
|
||||||
|
|
||||||
@@ -386,7 +465,7 @@ def validate_with_context(result: TaskOutput) -> Tuple[bool, Any]:
|
|||||||
validated_data = perform_validation(result)
|
validated_data = perform_validation(result)
|
||||||
return (True, validated_data)
|
return (True, validated_data)
|
||||||
except ValidationError as e:
|
except ValidationError as e:
|
||||||
return (False, f"VALIDATION_ERROR: {str(e)}")
|
return (False, f"ERRO_DE_VALIDACAO: {str(e)}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return (False, str(e))
|
return (False, str(e))
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -67,17 +67,17 @@ web_rag_tool = WebsiteSearchTool()
|
|||||||
|
|
||||||
# Criar agentes
|
# Criar agentes
|
||||||
researcher = Agent(
|
researcher = Agent(
|
||||||
role='Market Research Analyst',
|
role='Analista de Mercado',
|
||||||
goal='Provide up-to-date market analysis of the AI industry',
|
goal='Fornecer análise de mercado atualizada da indústria de IA',
|
||||||
backstory='An expert analyst with a keen eye for market trends.',
|
backstory='Analista especialista com olhar atento para tendências de mercado.',
|
||||||
tools=[search_tool, web_rag_tool],
|
tools=[search_tool, web_rag_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
writer = Agent(
|
writer = Agent(
|
||||||
role='Content Writer',
|
role='Redator de Conteúdo',
|
||||||
goal='Craft engaging blog posts about the AI industry',
|
goal='Criar posts de blog envolventes sobre a indústria de IA',
|
||||||
backstory='A skilled writer with a passion for technology.',
|
backstory='Redator habilidoso com paixão por tecnologia.',
|
||||||
tools=[docs_tool, file_tool],
|
tools=[docs_tool, file_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -36,19 +36,18 @@ Para treinar sua crew de forma programática, siga estes passos:
|
|||||||
3. Execute o comando de treinamento dentro de um bloco try-except para tratar possíveis erros.
|
3. Execute o comando de treinamento dentro de um bloco try-except para tratar possíveis erros.
|
||||||
|
|
||||||
```python Code
|
```python Code
|
||||||
n_iterations = 2
|
n_iteracoes = 2
|
||||||
inputs = {"topic": "CrewAI Training"}
|
entradas = {"topic": "Treinamento CrewAI"}
|
||||||
filename = "your_model.pkl"
|
nome_arquivo = "seu_modelo.pkl"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
YourCrewName_Crew().crew().train(
|
SuaCrew().crew().train(
|
||||||
n_iterations=n_iterations,
|
n_iterations=n_iteracoes,
|
||||||
inputs=inputs,
|
inputs=entradas,
|
||||||
filename=filename
|
filename=nome_arquivo
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(f"An error occurred while training the crew: {e}")
|
raise Exception(f"Ocorreu um erro ao treinar a crew: {e}")
|
||||||
```
|
```
|
||||||
|
|
||||||
### Pontos Importantes
|
### Pontos Importantes
|
||||||
|
|||||||
@@ -26,13 +26,13 @@ from crewai.tasks.hallucination_guardrail import HallucinationGuardrail
|
|||||||
from crewai import LLM
|
from crewai import LLM
|
||||||
|
|
||||||
# Uso básico - utiliza o expected_output da tarefa como contexto
|
# Uso básico - utiliza o expected_output da tarefa como contexto
|
||||||
guardrail = HallucinationGuardrail(
|
protecao = HallucinationGuardrail(
|
||||||
llm=LLM(model="gpt-4o-mini")
|
llm=LLM(model="gpt-4o-mini")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Com contexto de referência explícito
|
# Com contexto de referência explícito
|
||||||
context_guardrail = HallucinationGuardrail(
|
protecao_com_contexto = HallucinationGuardrail(
|
||||||
context="AI helps with various tasks including analysis and generation.",
|
context="IA ajuda em várias tarefas, incluindo análise e geração.",
|
||||||
llm=LLM(model="gpt-4o-mini")
|
llm=LLM(model="gpt-4o-mini")
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -43,11 +43,11 @@ context_guardrail = HallucinationGuardrail(
|
|||||||
from crewai import Task
|
from crewai import Task
|
||||||
|
|
||||||
# Crie sua tarefa com a proteção
|
# Crie sua tarefa com a proteção
|
||||||
task = Task(
|
minha_tarefa = Task(
|
||||||
description="Write a summary about AI capabilities",
|
description="Escreva um resumo sobre as capacidades da IA",
|
||||||
expected_output="A factual summary based on the provided context",
|
expected_output="Um resumo factual baseado no contexto fornecido",
|
||||||
agent=my_agent,
|
agent=meu_agente,
|
||||||
guardrail=guardrail # Adiciona a proteção para validar a saída
|
guardrail=protecao # Adiciona a proteção para validar a saída
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -59,8 +59,8 @@ Para validação mais rigorosa, é possível definir um limiar de fidelidade per
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Proteção rigorosa exigindo alta pontuação de fidelidade
|
# Proteção rigorosa exigindo alta pontuação de fidelidade
|
||||||
strict_guardrail = HallucinationGuardrail(
|
protecao_rigorosa = HallucinationGuardrail(
|
||||||
context="Quantum computing uses qubits that exist in superposition states.",
|
context="Computação quântica utiliza qubits que existem em estados de superposição.",
|
||||||
llm=LLM(model="gpt-4o-mini"),
|
llm=LLM(model="gpt-4o-mini"),
|
||||||
threshold=8.0 # Requer pontuação >= 8 para validar
|
threshold=8.0 # Requer pontuação >= 8 para validar
|
||||||
)
|
)
|
||||||
@@ -72,10 +72,10 @@ Se sua tarefa utiliza ferramentas, você pode incluir as respostas das ferrament
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Proteção com contexto de resposta da ferramenta
|
# Proteção com contexto de resposta da ferramenta
|
||||||
weather_guardrail = HallucinationGuardrail(
|
protecao_clima = HallucinationGuardrail(
|
||||||
context="Current weather information for the requested location",
|
context="Informações meteorológicas atuais para o local solicitado",
|
||||||
llm=LLM(model="gpt-4o-mini"),
|
llm=LLM(model="gpt-4o-mini"),
|
||||||
tool_response="Weather API returned: Temperature 22°C, Humidity 65%, Clear skies"
|
tool_response="API do Clima retornou: Temperatura 22°C, Umidade 65%, Céu limpo"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -123,15 +123,15 @@ Quando uma proteção é adicionada à tarefa, ela valida automaticamente a saí
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Fluxo de validação de saída da tarefa
|
# Fluxo de validação de saída da tarefa
|
||||||
task_output = agent.execute_task(task)
|
task_output = meu_agente.execute_task(minha_tarefa)
|
||||||
validation_result = guardrail(task_output)
|
resultado_validacao = protecao(task_output)
|
||||||
|
|
||||||
if validation_result.valid:
|
if resultado_validacao.valid:
|
||||||
# Tarefa concluída com sucesso
|
# Tarefa concluída com sucesso
|
||||||
return task_output
|
return task_output
|
||||||
else:
|
else:
|
||||||
# Tarefa falha com feedback de validação
|
# Tarefa falha com feedback de validação
|
||||||
raise ValidationError(validation_result.feedback)
|
raise ValidationError(resultado_validacao.feedback)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Rastreamento de Eventos
|
### Rastreamento de Eventos
|
||||||
@@ -151,10 +151,10 @@ A proteção se integra ao sistema de eventos do CrewAI para fornecer observabil
|
|||||||
Inclua todas as informações factuais relevantes nas quais a IA deve basear sua saída:
|
Inclua todas as informações factuais relevantes nas quais a IA deve basear sua saída:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
context = """
|
contexto = """
|
||||||
Company XYZ was founded in 2020 and specializes in renewable energy solutions.
|
Empresa XYZ foi fundada em 2020 e é especializada em soluções de energia renovável.
|
||||||
They have 150 employees and generated $50M revenue in 2023.
|
Possui 150 funcionários e faturou R$ 50 milhões em 2023.
|
||||||
Their main products include solar panels and wind turbines.
|
Seus principais produtos incluem painéis solares e turbinas eólicas.
|
||||||
"""
|
"""
|
||||||
```
|
```
|
||||||
</Step>
|
</Step>
|
||||||
@@ -164,10 +164,10 @@ A proteção se integra ao sistema de eventos do CrewAI para fornecer observabil
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Bom: Contexto focado
|
# Bom: Contexto focado
|
||||||
context = "The current weather in New York is 18°C with light rain."
|
contexto = "O clima atual em Nova York é 18°C com chuva leve."
|
||||||
|
|
||||||
# Evite: Informações irrelevantes
|
# Evite: Informações irrelevantes
|
||||||
context = "The weather is 18°C. The city has 8 million people. Traffic is heavy."
|
contexto = "The weather is 18°C. The city has 8 million people. Traffic is heavy."
|
||||||
```
|
```
|
||||||
</Step>
|
</Step>
|
||||||
|
|
||||||
|
|||||||
@@ -84,31 +84,31 @@ from crewai import Agent, Task, Crew
|
|||||||
from crewai_tools import CrewaiEnterpriseTools
|
from crewai_tools import CrewaiEnterpriseTools
|
||||||
|
|
||||||
# Obtenha ferramentas enterprise (a ferramenta Gmail será incluída)
|
# Obtenha ferramentas enterprise (a ferramenta Gmail será incluída)
|
||||||
enterprise_tools = CrewaiEnterpriseTools(
|
ferramentas_enterprise = CrewaiEnterpriseTools(
|
||||||
enterprise_token="your_enterprise_token"
|
enterprise_token="seu_token_enterprise"
|
||||||
)
|
)
|
||||||
# imprima as ferramentas
|
# imprima as ferramentas
|
||||||
print(enterprise_tools)
|
printf(ferramentas_enterprise)
|
||||||
|
|
||||||
# Crie um agente com capacidades do Gmail
|
# Crie um agente com capacidades do Gmail
|
||||||
email_agent = Agent(
|
agente_email = Agent(
|
||||||
role="Email Manager",
|
role="Gerente de E-mails",
|
||||||
goal="Manage and organize email communications",
|
goal="Gerenciar e organizar comunicações por e-mail",
|
||||||
backstory="An AI assistant specialized in email management and communication.",
|
backstory="Um assistente de IA especializado em gestão de e-mails e comunicação.",
|
||||||
tools=enterprise_tools
|
tools=ferramentas_enterprise
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tarefa para enviar um e-mail
|
# Tarefa para enviar um e-mail
|
||||||
email_task = Task(
|
tarefa_email = Task(
|
||||||
description="Draft and send a follow-up email to john@example.com about the project update",
|
description="Redigir e enviar um e-mail de acompanhamento para john@example.com sobre a atualização do projeto",
|
||||||
agent=email_agent,
|
agent=agente_email,
|
||||||
expected_output="Confirmation that email was sent successfully"
|
expected_output="Confirmação de que o e-mail foi enviado com sucesso"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Execute a tarefa
|
# Execute a tarefa
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=[email_agent],
|
agents=[agente_email],
|
||||||
tasks=[email_task]
|
tasks=[tarefa_email]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Execute o crew
|
# Execute o crew
|
||||||
@@ -125,23 +125,23 @@ enterprise_tools = CrewaiEnterpriseTools(
|
|||||||
)
|
)
|
||||||
gmail_tool = enterprise_tools["gmail_find_email"]
|
gmail_tool = enterprise_tools["gmail_find_email"]
|
||||||
|
|
||||||
gmail_agent = Agent(
|
agente_gmail = Agent(
|
||||||
role="Gmail Manager",
|
role="Gerente do Gmail",
|
||||||
goal="Manage gmail communications and notifications",
|
goal="Gerenciar comunicações e notificações do gmail",
|
||||||
backstory="An AI assistant that helps coordinate gmail communications.",
|
backstory="Um assistente de IA que ajuda a coordenar comunicações no gmail.",
|
||||||
tools=[gmail_tool]
|
tools=[gmail_tool]
|
||||||
)
|
)
|
||||||
|
|
||||||
notification_task = Task(
|
tarefa_notificacao = Task(
|
||||||
description="Find the email from john@example.com",
|
description="Encontrar o e-mail de john@example.com",
|
||||||
agent=gmail_agent,
|
agent=agente_gmail,
|
||||||
expected_output="Email found from john@example.com"
|
expected_output="E-mail encontrado de john@example.com"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Execute a tarefa
|
# Execute a tarefa
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=[slack_agent],
|
agents=[agente_gmail],
|
||||||
tasks=[notification_task]
|
tasks=[tarefa_notificacao]
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ Antes de usar o Repositório de Ferramentas, certifique-se de que você possui:
|
|||||||
Para instalar uma ferramenta:
|
Para instalar uma ferramenta:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
crewai tool install <tool-name>
|
crewai tool install <nome-da-ferramenta>
|
||||||
```
|
```
|
||||||
|
|
||||||
Isso instala a ferramenta e a adiciona ao `pyproject.toml`.
|
Isso instala a ferramenta e a adiciona ao `pyproject.toml`.
|
||||||
@@ -40,7 +40,7 @@ Isso instala a ferramenta e a adiciona ao `pyproject.toml`.
|
|||||||
Para criar um novo projeto de ferramenta:
|
Para criar um novo projeto de ferramenta:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
crewai tool create <tool-name>
|
crewai tool create <nome-da-ferramenta>
|
||||||
```
|
```
|
||||||
|
|
||||||
Isso gera um projeto de ferramenta estruturado localmente.
|
Isso gera um projeto de ferramenta estruturado localmente.
|
||||||
@@ -76,7 +76,7 @@ Para atualizar uma ferramenta publicada:
|
|||||||
3. Faça o commit das alterações e publique
|
3. Faça o commit das alterações e publique
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git commit -m "Update version to 0.1.1"
|
git commit -m "Atualizar versão para 0.1.1"
|
||||||
crewai tool publish
|
crewai tool publish
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -12,16 +12,17 @@ O Enterprise Event Streaming permite que você receba atualizações em tempo re
|
|||||||
|
|
||||||
Ao utilizar a API Kickoff, inclua um objeto `webhooks` em sua requisição, por exemplo:
|
Ao utilizar a API Kickoff, inclua um objeto `webhooks` em sua requisição, por exemplo:
|
||||||
|
|
||||||
|
# Exemplo de uso da API Kickoff com webhooks
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"inputs": {"foo": "bar"},
|
"inputs": {"foo": "bar"},
|
||||||
"webhooks": {
|
"webhooks": {
|
||||||
"events": ["crew_kickoff_started", "llm_call_started"],
|
"events": ["crew_kickoff_started", "llm_call_started"],
|
||||||
"url": "https://your.endpoint/webhook",
|
"url": "https://seu.endpoint/webhook",
|
||||||
"realtime": false,
|
"realtime": false,
|
||||||
"authentication": {
|
"authentication": {
|
||||||
"strategy": "bearer",
|
"strategy": "bearer",
|
||||||
"token": "my-secret-token"
|
"token": "meu-token-secreto"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -33,19 +34,20 @@ Se `realtime` estiver definido como `true`, cada evento será entregue individua
|
|||||||
|
|
||||||
Cada webhook envia uma lista de eventos:
|
Cada webhook envia uma lista de eventos:
|
||||||
|
|
||||||
|
# Exemplo de evento enviado pelo webhook
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"events": [
|
"events": [
|
||||||
{
|
{
|
||||||
"id": "event-id",
|
"id": "id-do-evento",
|
||||||
"execution_id": "crew-run-id",
|
"execution_id": "id-da-execucao-do-crew",
|
||||||
"timestamp": "2025-02-16T10:58:44.965Z",
|
"timestamp": "2025-02-16T10:58:44.965Z",
|
||||||
"type": "llm_call_started",
|
"type": "llm_call_started",
|
||||||
"data": {
|
"data": {
|
||||||
"model": "gpt-4",
|
"model": "gpt-4",
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "system", "content": "You are an assistant."},
|
{"role": "system", "content": "Você é um assistente."},
|
||||||
{"role": "user", "content": "Summarize this article."}
|
{"role": "user", "content": "Resuma este artigo."}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,11 +41,8 @@ A CLI fornece a maneira mais rápida de implantar crews desenvolvidos localmente
|
|||||||
Primeiro, você precisa autenticar sua CLI com a plataforma CrewAI Enterprise:
|
Primeiro, você precisa autenticar sua CLI com a plataforma CrewAI Enterprise:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Se já possui uma conta CrewAI Enterprise
|
# Se já possui uma conta CrewAI Enterprise, ou deseja criar uma:
|
||||||
crewai login
|
crewai login
|
||||||
|
|
||||||
# Se vai criar uma nova conta
|
|
||||||
crewai signup
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Ao executar qualquer um dos comandos, a CLI irá:
|
Ao executar qualquer um dos comandos, a CLI irá:
|
||||||
|
|||||||
@@ -16,17 +16,17 @@ from crewai import CrewBase
|
|||||||
from crewai.project import before_kickoff
|
from crewai.project import before_kickoff
|
||||||
|
|
||||||
@CrewBase
|
@CrewBase
|
||||||
class MyCrew:
|
class MinhaEquipe:
|
||||||
@before_kickoff
|
@before_kickoff
|
||||||
def prepare_data(self, inputs):
|
def preparar_dados(self, entradas):
|
||||||
# Preprocess or modify inputs
|
# Pré-processa ou modifica as entradas
|
||||||
inputs['processed'] = True
|
entradas['processado'] = True
|
||||||
return inputs
|
return entradas
|
||||||
|
|
||||||
#...
|
#...
|
||||||
```
|
```
|
||||||
|
|
||||||
Neste exemplo, a função prepare_data modifica as entradas adicionando um novo par chave-valor indicando que as entradas foram processadas.
|
Neste exemplo, a função preparar_dados modifica as entradas adicionando um novo par chave-valor indicando que as entradas foram processadas.
|
||||||
|
|
||||||
## Hook Depois do Kickoff
|
## Hook Depois do Kickoff
|
||||||
|
|
||||||
@@ -39,17 +39,17 @@ from crewai import CrewBase
|
|||||||
from crewai.project import after_kickoff
|
from crewai.project import after_kickoff
|
||||||
|
|
||||||
@CrewBase
|
@CrewBase
|
||||||
class MyCrew:
|
class MinhaEquipe:
|
||||||
@after_kickoff
|
@after_kickoff
|
||||||
def log_results(self, result):
|
def registrar_resultados(self, resultado):
|
||||||
# Log or modify the results
|
# Registra ou modifica os resultados
|
||||||
print("Crew execution completed with result:", result)
|
print("Execução da equipe concluída com resultado:", resultado)
|
||||||
return result
|
return resultado
|
||||||
|
|
||||||
# ...
|
# ...
|
||||||
```
|
```
|
||||||
|
|
||||||
Na função `log_results`, os resultados da execução da crew são simplesmente impressos. Você pode estender isso para realizar operações mais complexas, como enviar notificações ou integrar com outros serviços.
|
Na função `registrar_resultados`, os resultados da execução da crew são simplesmente impressos. Você pode estender isso para realizar operações mais complexas, como enviar notificações ou integrar com outros serviços.
|
||||||
|
|
||||||
## Utilizando Ambos os Hooks
|
## Utilizando Ambos os Hooks
|
||||||
|
|
||||||
|
|||||||
@@ -77,9 +77,9 @@ search_tool = SerperDevTool()
|
|||||||
|
|
||||||
# Inicialize o agente com opções avançadas
|
# Inicialize o agente com opções avançadas
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
role='Research Analyst',
|
role='Analista de Pesquisa',
|
||||||
goal='Provide up-to-date market analysis',
|
goal='Fornecer análises de mercado atualizadas',
|
||||||
backstory='An expert analyst with a keen eye for market trends.',
|
backstory='Um analista especialista com olhar atento para tendências de mercado.',
|
||||||
tools=[search_tool],
|
tools=[search_tool],
|
||||||
memory=True, # Ativa memória
|
memory=True, # Ativa memória
|
||||||
verbose=True,
|
verbose=True,
|
||||||
@@ -98,14 +98,9 @@ eficiência dentro do ecossistema CrewAI. Se necessário, a delegação pode ser
|
|||||||
|
|
||||||
```python Code
|
```python Code
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
role='Content Writer',
|
role='Redator de Conteúdo',
|
||||||
goal='Write engaging content on market trends',
|
goal='Escrever conteúdo envolvente sobre tendências de mercado',
|
||||||
backstory='A seasoned writer with expertise in market analysis.',
|
backstory='Um redator experiente com expertise em análise de mercado.',
|
||||||
allow_delegation=True # Habilitando delegação
|
allow_delegation=True # Habilitando delegação
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Conclusão
|
|
||||||
|
|
||||||
Personalizar agentes no CrewAI definindo seus papéis, objetivos, histórias e ferramentas, juntamente com opções avançadas como personalização de modelo de linguagem, memória, ajustes de performance e preferências de delegação,
|
|
||||||
proporciona uma equipe de IA sofisticada e preparada para enfrentar desafios complexos.
|
|
||||||
@@ -45,17 +45,17 @@ from crewai import Crew, Agent, Task
|
|||||||
|
|
||||||
# Create an agent with code execution enabled
|
# Create an agent with code execution enabled
|
||||||
coding_agent = Agent(
|
coding_agent = Agent(
|
||||||
role="Python Data Analyst",
|
role="Analista de Dados Python",
|
||||||
goal="Analyze data and provide insights using Python",
|
goal="Analisar dados e fornecer insights usando Python",
|
||||||
backstory="You are an experienced data analyst with strong Python skills.",
|
backstory="Você é um analista de dados experiente com fortes habilidades em Python.",
|
||||||
allow_code_execution=True
|
allow_code_execution=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create a task that requires code execution
|
# Create a task that requires code execution
|
||||||
data_analysis_task = Task(
|
data_analysis_task = Task(
|
||||||
description="Analyze the given dataset and calculate the average age of participants. Ages: {ages}",
|
description="Analise o conjunto de dados fornecido e calcule a idade média dos participantes. Idades: {ages}",
|
||||||
agent=coding_agent,
|
agent=coding_agent,
|
||||||
expected_output="The average age of the participants."
|
expected_output="A idade média dos participantes."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create a crew and add the task
|
# Create a crew and add the task
|
||||||
@@ -83,23 +83,23 @@ from crewai import Crew, Agent, Task
|
|||||||
|
|
||||||
# Create an agent with code execution enabled
|
# Create an agent with code execution enabled
|
||||||
coding_agent = Agent(
|
coding_agent = Agent(
|
||||||
role="Python Data Analyst",
|
role="Analista de Dados Python",
|
||||||
goal="Analyze data and provide insights using Python",
|
goal="Analisar dados e fornecer insights usando Python",
|
||||||
backstory="You are an experienced data analyst with strong Python skills.",
|
backstory="Você é um analista de dados experiente com fortes habilidades em Python.",
|
||||||
allow_code_execution=True
|
allow_code_execution=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create tasks that require code execution
|
# Create tasks that require code execution
|
||||||
task_1 = Task(
|
task_1 = Task(
|
||||||
description="Analyze the first dataset and calculate the average age of participants. Ages: {ages}",
|
description="Analise o primeiro conjunto de dados e calcule a idade média dos participantes. Idades: {ages}",
|
||||||
agent=coding_agent,
|
agent=coding_agent,
|
||||||
expected_output="The average age of the participants."
|
expected_output="A idade média dos participantes."
|
||||||
)
|
)
|
||||||
|
|
||||||
task_2 = Task(
|
task_2 = Task(
|
||||||
description="Analyze the second dataset and calculate the average age of participants. Ages: {ages}",
|
description="Analise o segundo conjunto de dados e calcule a idade média dos participantes. Idades: {ages}",
|
||||||
agent=coding_agent,
|
agent=coding_agent,
|
||||||
expected_output="The average age of the participants."
|
expected_output="A idade média dos participantes."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create two crews and add tasks
|
# Create two crews and add tasks
|
||||||
|
|||||||
@@ -43,11 +43,11 @@ try:
|
|||||||
with MCPServerAdapter(server_params_list) as aggregated_tools:
|
with MCPServerAdapter(server_params_list) as aggregated_tools:
|
||||||
print(f"Available aggregated tools: {[tool.name for tool in aggregated_tools]}")
|
print(f"Available aggregated tools: {[tool.name for tool in aggregated_tools]}")
|
||||||
|
|
||||||
multi_server_agent = Agent(
|
agente_multiservidor = Agent(
|
||||||
role="Versatile Assistant",
|
role="Assistente Versátil",
|
||||||
goal="Utilize tools from local Stdio, remote SSE, and remote HTTP MCP servers.",
|
goal="Utilizar ferramentas de servidores MCP locais Stdio, remotos SSE e remotos HTTP.",
|
||||||
backstory="An AI agent capable of leveraging a diverse set of tools from multiple sources.",
|
backstory="Um agente de IA capaz de aproveitar um conjunto diversificado de ferramentas de múltiplas fontes.",
|
||||||
tools=aggregated_tools, # All tools are available here
|
tools=aggregated_tools, # Todas as ferramentas estão disponíveis aqui
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -73,10 +73,10 @@ server_params = {
|
|||||||
with MCPServerAdapter(server_params) as mcp_tools:
|
with MCPServerAdapter(server_params) as mcp_tools:
|
||||||
print(f"Available tools: {[tool.name for tool in mcp_tools]}")
|
print(f"Available tools: {[tool.name for tool in mcp_tools]}")
|
||||||
|
|
||||||
my_agent = Agent(
|
meu_agente = Agent(
|
||||||
role="MCP Tool User",
|
role="Usuário de Ferramentas MCP",
|
||||||
goal="Utilize tools from an MCP server.",
|
goal="Utilizar ferramentas de um servidor MCP.",
|
||||||
backstory="I can connect to MCP servers and use their tools.",
|
backstory="Posso conectar a servidores MCP e usar suas ferramentas.",
|
||||||
tools=mcp_tools, # Passe as ferramentas carregadas para o seu agente
|
tools=mcp_tools, # Passe as ferramentas carregadas para o seu agente
|
||||||
reasoning=True,
|
reasoning=True,
|
||||||
verbose=True
|
verbose=True
|
||||||
@@ -91,10 +91,10 @@ Este padrão geral mostra como integrar ferramentas. Para exemplos específicos
|
|||||||
with MCPServerAdapter(server_params) as mcp_tools:
|
with MCPServerAdapter(server_params) as mcp_tools:
|
||||||
print(f"Available tools: {[tool.name for tool in mcp_tools]}")
|
print(f"Available tools: {[tool.name for tool in mcp_tools]}")
|
||||||
|
|
||||||
my_agent = Agent(
|
meu_agente = Agent(
|
||||||
role="MCP Tool User",
|
role="Usuário de Ferramentas MCP",
|
||||||
goal="Utilize tools from an MCP server.",
|
goal="Utilizar ferramentas de um servidor MCP.",
|
||||||
backstory="I can connect to MCP servers and use their tools.",
|
backstory="Posso conectar a servidores MCP e usar suas ferramentas.",
|
||||||
tools=mcp_tools["tool_name"], # Passe as ferramentas filtradas para o seu agente
|
tools=mcp_tools["tool_name"], # Passe as ferramentas filtradas para o seu agente
|
||||||
reasoning=True,
|
reasoning=True,
|
||||||
verbose=True
|
verbose=True
|
||||||
|
|||||||
@@ -37,24 +37,24 @@ try:
|
|||||||
print(f"Available tools from SSE MCP server: {[tool.name for tool in tools]}")
|
print(f"Available tools from SSE MCP server: {[tool.name for tool in tools]}")
|
||||||
|
|
||||||
# Example: Using a tool from the SSE MCP server
|
# Example: Using a tool from the SSE MCP server
|
||||||
sse_agent = Agent(
|
agente_sse = Agent(
|
||||||
role="Remote Service User",
|
role="Usuário de Serviço Remoto",
|
||||||
goal="Utilize a tool provided by a remote SSE MCP server.",
|
goal="Utilizar uma ferramenta fornecida por um servidor MCP remoto via SSE.",
|
||||||
backstory="An AI agent that connects to external services via SSE.",
|
backstory="Um agente de IA que conecta a serviços externos via SSE.",
|
||||||
tools=tools,
|
tools=tools,
|
||||||
reasoning=True,
|
reasoning=True,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
sse_task = Task(
|
sse_task = Task(
|
||||||
description="Fetch real-time stock updates for 'AAPL' using an SSE tool.",
|
description="Buscar atualizações em tempo real das ações 'AAPL' usando uma ferramenta SSE.",
|
||||||
expected_output="The latest stock price for AAPL.",
|
expected_output="O preço mais recente da ação AAPL.",
|
||||||
agent=sse_agent,
|
agent=agente_sse,
|
||||||
markdown=True
|
markdown=True
|
||||||
)
|
)
|
||||||
|
|
||||||
sse_crew = Crew(
|
sse_crew = Crew(
|
||||||
agents=[sse_agent],
|
agents=[agente_sse],
|
||||||
tasks=[sse_task],
|
tasks=[sse_task],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
process=Process.sequential
|
process=Process.sequential
|
||||||
@@ -101,16 +101,16 @@ try:
|
|||||||
print(f"Available tools (manual SSE): {[tool.name for tool in tools]}")
|
print(f"Available tools (manual SSE): {[tool.name for tool in tools]}")
|
||||||
|
|
||||||
manual_sse_agent = Agent(
|
manual_sse_agent = Agent(
|
||||||
role="Remote Data Analyst",
|
role="Analista Remoto de Dados",
|
||||||
goal="Analyze data fetched from a remote SSE MCP server using manual connection management.",
|
goal="Analisar dados obtidos de um servidor MCP remoto SSE usando gerenciamento manual de conexão.",
|
||||||
backstory="An AI skilled in handling SSE connections explicitly.",
|
backstory="Um agente de IA especializado em gerenciar conexões SSE explicitamente.",
|
||||||
tools=tools,
|
tools=tools,
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
analysis_task = Task(
|
analysis_task = Task(
|
||||||
description="Fetch and analyze the latest user activity trends from the SSE server.",
|
description="Buscar e analisar as tendências mais recentes de atividade de usuários do servidor SSE.",
|
||||||
expected_output="A summary report of user activity trends.",
|
expected_output="Um relatório resumido das tendências de atividade dos usuários.",
|
||||||
agent=manual_sse_agent
|
agent=manual_sse_agent
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -38,24 +38,24 @@ with MCPServerAdapter(server_params) as tools:
|
|||||||
print(f"Available tools from Stdio MCP server: {[tool.name for tool in tools]}")
|
print(f"Available tools from Stdio MCP server: {[tool.name for tool in tools]}")
|
||||||
|
|
||||||
# Exemplo: Usando as ferramentas do servidor MCP Stdio em um Agente CrewAI
|
# Exemplo: Usando as ferramentas do servidor MCP Stdio em um Agente CrewAI
|
||||||
research_agent = Agent(
|
pesquisador_local = Agent(
|
||||||
role="Local Data Processor",
|
role="Processador Local de Dados",
|
||||||
goal="Process data using a local Stdio-based tool.",
|
goal="Processar dados usando uma ferramenta local baseada em Stdio.",
|
||||||
backstory="An AI that leverages local scripts via MCP for specialized tasks.",
|
backstory="Uma IA que utiliza scripts locais via MCP para tarefas especializadas.",
|
||||||
tools=tools,
|
tools=tools,
|
||||||
reasoning=True,
|
reasoning=True,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
processing_task = Task(
|
processing_task = Task(
|
||||||
description="Process the input data file 'data.txt' and summarize its contents.",
|
description="Processar o arquivo de dados de entrada 'data.txt' e resumir seu conteúdo.",
|
||||||
expected_output="A summary of the processed data.",
|
expected_output="Um resumo dos dados processados.",
|
||||||
agent=research_agent,
|
agent=pesquisador_local,
|
||||||
markdown=True
|
markdown=True
|
||||||
)
|
)
|
||||||
|
|
||||||
data_crew = Crew(
|
data_crew = Crew(
|
||||||
agents=[research_agent],
|
agents=[pesquisador_local],
|
||||||
tasks=[processing_task],
|
tasks=[processing_task],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
process=Process.sequential
|
process=Process.sequential
|
||||||
@@ -95,16 +95,16 @@ try:
|
|||||||
|
|
||||||
# Exemplo: Usando as ferramentas com sua configuração de Agent, Task, Crew
|
# Exemplo: Usando as ferramentas com sua configuração de Agent, Task, Crew
|
||||||
manual_agent = Agent(
|
manual_agent = Agent(
|
||||||
role="Local Task Executor",
|
role="Executor Local de Tarefas",
|
||||||
goal="Execute a specific local task using a manually managed Stdio tool.",
|
goal="Executar uma tarefa local específica usando uma ferramenta Stdio gerenciada manualmente.",
|
||||||
backstory="An AI proficient in controlling local processes via MCP.",
|
backstory="Uma IA proficiente em controlar processos locais via MCP.",
|
||||||
tools=tools,
|
tools=tools,
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
manual_task = Task(
|
manual_task = Task(
|
||||||
description="Execute the 'perform_analysis' command via the Stdio tool.",
|
description="Executar o comando 'perform_analysis' via ferramenta Stdio.",
|
||||||
expected_output="Results of the analysis.",
|
expected_output="Resultados da análise.",
|
||||||
agent=manual_agent
|
agent=manual_agent
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -35,22 +35,22 @@ try:
|
|||||||
with MCPServerAdapter(server_params) as tools:
|
with MCPServerAdapter(server_params) as tools:
|
||||||
print(f"Available tools from Streamable HTTP MCP server: {[tool.name for tool in tools]}")
|
print(f"Available tools from Streamable HTTP MCP server: {[tool.name for tool in tools]}")
|
||||||
|
|
||||||
http_agent = Agent(
|
agente_http = Agent(
|
||||||
role="HTTP Service Integrator",
|
role="Integrador de Serviços HTTP",
|
||||||
goal="Utilize tools from a remote MCP server via Streamable HTTP.",
|
goal="Utilizar ferramentas de um servidor MCP remoto via Streamable HTTP.",
|
||||||
backstory="An AI agent adept at interacting with complex web services.",
|
backstory="Um agente de IA especializado em interagir com serviços web complexos.",
|
||||||
tools=tools,
|
tools=tools,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
http_task = Task(
|
http_task = Task(
|
||||||
description="Perform a complex data query using a tool from the Streamable HTTP server.",
|
description="Realizar uma consulta de dados complexa usando uma ferramenta do servidor Streamable HTTP.",
|
||||||
expected_output="The result of the complex data query.",
|
expected_output="O resultado da consulta de dados complexa.",
|
||||||
agent=http_agent,
|
agent=agente_http,
|
||||||
)
|
)
|
||||||
|
|
||||||
http_crew = Crew(
|
http_crew = Crew(
|
||||||
agents=[http_agent],
|
agents=[agente_http],
|
||||||
tasks=[http_task],
|
tasks=[http_task],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
process=Process.sequential
|
process=Process.sequential
|
||||||
@@ -91,16 +91,16 @@ try:
|
|||||||
print(f"Available tools (manual Streamable HTTP): {[tool.name for tool in tools]}")
|
print(f"Available tools (manual Streamable HTTP): {[tool.name for tool in tools]}")
|
||||||
|
|
||||||
manual_http_agent = Agent(
|
manual_http_agent = Agent(
|
||||||
role="Advanced Web Service User",
|
role="Usuário Avançado de Serviços Web",
|
||||||
goal="Interact with an MCP server using manually managed Streamable HTTP connections.",
|
goal="Interagir com um servidor MCP usando conexões HTTP Streamable gerenciadas manualmente.",
|
||||||
backstory="An AI specialist in fine-tuning HTTP-based service integrations.",
|
backstory="Um especialista em IA em ajustar integrações baseadas em HTTP.",
|
||||||
tools=tools,
|
tools=tools,
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
data_processing_task = Task(
|
data_processing_task = Task(
|
||||||
description="Submit data for processing and retrieve results via Streamable HTTP.",
|
description="Enviar dados para processamento e recuperar resultados via Streamable HTTP.",
|
||||||
expected_output="Processed data or confirmation.",
|
expected_output="Dados processados ou confirmação.",
|
||||||
agent=manual_http_agent
|
agent=manual_http_agent
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -78,47 +78,40 @@ CrewAIInstrumentor().instrument(skip_dep_check=True, tracer_provider=tracer_prov
|
|||||||
search_tool = SerperDevTool()
|
search_tool = SerperDevTool()
|
||||||
|
|
||||||
# Defina seus agentes com papéis e objetivos
|
# Defina seus agentes com papéis e objetivos
|
||||||
researcher = Agent(
|
pesquisador = Agent(
|
||||||
role="Senior Research Analyst",
|
role="Analista Sênior de Pesquisa",
|
||||||
goal="Uncover cutting-edge developments in AI and data science",
|
goal="Descobrir os avanços mais recentes em IA e ciência de dados",
|
||||||
backstory="""You work at a leading tech think tank.
|
backstory="""
|
||||||
Your expertise lies in identifying emerging trends.
|
Você trabalha em um importante think tank de tecnologia. Sua especialidade é identificar tendências emergentes. Você tem habilidade para dissecar dados complexos e apresentar insights acionáveis.
|
||||||
You have a knack for dissecting complex data and presenting actionable insights.""",
|
""",
|
||||||
verbose=True,
|
verbose=True,
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
# You can pass an optional llm attribute specifying what model you wanna use.
|
|
||||||
# llm=ChatOpenAI(model_name="gpt-3.5", temperature=0.7),
|
|
||||||
tools=[search_tool],
|
tools=[search_tool],
|
||||||
)
|
)
|
||||||
writer = Agent(
|
writer = Agent(
|
||||||
role="Tech Content Strategist",
|
role="Estrategista de Conteúdo Técnico",
|
||||||
goal="Craft compelling content on tech advancements",
|
goal="Criar conteúdo envolvente sobre avanços tecnológicos",
|
||||||
backstory="""You are a renowned Content Strategist, known for your insightful and engaging articles.
|
backstory="Você é um Estrategista de Conteúdo renomado, conhecido por seus artigos perspicazes e envolventes. Você transforma conceitos complexos em narrativas atraentes.",
|
||||||
You transform complex concepts into compelling narratives.""",
|
|
||||||
verbose=True,
|
verbose=True,
|
||||||
allow_delegation=True,
|
allow_delegation=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Crie tarefas para seus agentes
|
# Crie tarefas para seus agentes
|
||||||
task1 = Task(
|
task1 = Task(
|
||||||
description="""Conduct a comprehensive analysis of the latest advancements in AI in 2024.
|
description="Realize uma análise abrangente dos avanços mais recentes em IA em 2024. Identifique tendências-chave, tecnologias inovadoras e impactos potenciais na indústria.",
|
||||||
Identify key trends, breakthrough technologies, and potential industry impacts.""",
|
expected_output="Relatório analítico completo em tópicos",
|
||||||
expected_output="Full analysis report in bullet points",
|
agent=pesquisador,
|
||||||
agent=researcher,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
task2 = Task(
|
task2 = Task(
|
||||||
description="""Using the insights provided, develop an engaging blog
|
description="Utilizando os insights fornecidos, desenvolva um blog envolvente destacando os avanços mais significativos em IA. O post deve ser informativo e acessível, voltado para um público técnico. Dê um tom interessante, evite palavras complexas para não soar como IA.",
|
||||||
post that highlights the most significant AI advancements.
|
expected_output="Post de blog completo com pelo menos 4 parágrafos",
|
||||||
Your post should be informative yet accessible, catering to a tech-savvy audience.
|
|
||||||
Make it sound cool, avoid complex words so it doesn't sound like AI.""",
|
|
||||||
expected_output="Full blog post of at least 4 paragraphs",
|
|
||||||
agent=writer,
|
agent=writer,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Instancie seu crew com um processo sequencial
|
# Instancie seu crew com um processo sequencial
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=[researcher, writer], tasks=[task1, task2], verbose=1, process=Process.sequential
|
agents=[pesquisador, writer], tasks=[task1, task2], verbose=1, process=Process.sequential
|
||||||
)
|
)
|
||||||
|
|
||||||
# Coloque seu crew para trabalhar!
|
# Coloque seu crew para trabalhar!
|
||||||
|
|||||||
@@ -76,20 +76,20 @@ from crewai_tools import (
|
|||||||
|
|
||||||
web_rag_tool = WebsiteSearchTool()
|
web_rag_tool = WebsiteSearchTool()
|
||||||
|
|
||||||
writer = Agent(
|
escritor = Agent(
|
||||||
role="Writer",
|
role="Escritor",
|
||||||
goal="Você torna a matemática envolvente e compreensível para crianças pequenas através de poesias",
|
goal="Você torna a matemática envolvente e compreensível para crianças pequenas através de poesias",
|
||||||
backstory="Você é especialista em escrever haicais mas não sabe nada de matemática.",
|
backstory="Você é especialista em escrever haicais mas não sabe nada de matemática.",
|
||||||
tools=[web_rag_tool],
|
tools=[web_rag_tool],
|
||||||
)
|
)
|
||||||
|
|
||||||
task = Task(description=("O que é {multiplicação}?"),
|
tarefa = Task(description=("O que é {multiplicação}?"),
|
||||||
expected_output=("Componha um haicai que inclua a resposta."),
|
expected_output=("Componha um haicai que inclua a resposta."),
|
||||||
agent=writer)
|
agent=escritor)
|
||||||
|
|
||||||
crew = Crew(
|
equipe = Crew(
|
||||||
agents=[writer],
|
agents=[escritor],
|
||||||
tasks=[task],
|
tasks=[tarefa],
|
||||||
share_crew=False
|
share_crew=False
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ Essa integração permite o registro de hiperparâmetros, o monitoramento de reg
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
from langtrace_python_sdk import langtrace
|
from langtrace_python_sdk import langtrace
|
||||||
langtrace.init(api_key='<LANGTRACE_API_KEY>')
|
langtrace.init(api_key='<SUA_CHAVE_LANGTRACE>')
|
||||||
|
|
||||||
# Agora importe os módulos do CrewAI
|
# Agora importe os módulos do CrewAI
|
||||||
from crewai import Agent, Task, Crew
|
from crewai import Agent, Task, Crew
|
||||||
|
|||||||
@@ -73,26 +73,24 @@ instrument_crewai(logger)
|
|||||||
### 4. Crie e execute sua aplicação CrewAI normalmente
|
### 4. Crie e execute sua aplicação CrewAI normalmente
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
pesquisador = Agent(
|
||||||
# Crie seu agente
|
role='Pesquisador Sênior',
|
||||||
researcher = Agent(
|
goal='Descobrir os avanços mais recentes em IA',
|
||||||
role='Senior Research Analyst',
|
backstory="Você é um pesquisador especialista em um think tank de tecnologia...",
|
||||||
goal='Uncover cutting-edge developments in AI',
|
|
||||||
backstory="You are an expert researcher at a tech think tank...",
|
|
||||||
verbose=True,
|
verbose=True,
|
||||||
llm=llm
|
llm=llm
|
||||||
)
|
)
|
||||||
|
|
||||||
# Defina a tarefa
|
# Defina a tarefa
|
||||||
research_task = Task(
|
research_task = Task(
|
||||||
description="Research the latest AI advancements...",
|
description="Pesquise os avanços mais recentes em IA...",
|
||||||
expected_output="",
|
expected_output="",
|
||||||
agent=researcher
|
agent=pesquisador
|
||||||
)
|
)
|
||||||
|
|
||||||
# Configure e execute a crew
|
# Configure e execute a crew
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=[researcher],
|
agents=[pesquisador],
|
||||||
tasks=[research_task],
|
tasks=[research_task],
|
||||||
verbose=True
|
verbose=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -70,22 +70,19 @@ O tracing fornece uma forma de registrar os inputs, outputs e metadados associad
|
|||||||
|
|
||||||
class TripAgents:
|
class TripAgents:
|
||||||
def city_selection_agent(self):
|
def city_selection_agent(self):
|
||||||
return Agent(
|
especialista_cidades = Agent(
|
||||||
role="City Selection Expert",
|
role="Especialista em Seleção de Cidades",
|
||||||
goal="Select the best city based on weather, season, and prices",
|
goal="Selecionar a melhor cidade com base no clima, estação e preços",
|
||||||
backstory="An expert in analyzing travel data to pick ideal destinations",
|
backstory="Especialista em analisar dados de viagem para escolher destinos ideais",
|
||||||
tools=[
|
tools=[search_tool],
|
||||||
search_tool,
|
|
||||||
],
|
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def local_expert(self):
|
def local_expert(self):
|
||||||
return Agent(
|
especialista_local = Agent(
|
||||||
role="Local Expert at this city",
|
role="Especialista Local nesta cidade",
|
||||||
goal="Provide the BEST insights about the selected city",
|
goal="Fornecer as MELHORES informações sobre a cidade selecionada",
|
||||||
backstory="""A knowledgeable local guide with extensive information
|
backstory="Um guia local experiente com amplo conhecimento sobre a cidade, suas atrações e costumes",
|
||||||
about the city, it's attractions and customs""",
|
|
||||||
tools=[search_tool],
|
tools=[search_tool],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
@@ -96,53 +93,36 @@ O tracing fornece uma forma de registrar os inputs, outputs e metadados associad
|
|||||||
return Task(
|
return Task(
|
||||||
description=dedent(
|
description=dedent(
|
||||||
f"""
|
f"""
|
||||||
Analyze and select the best city for the trip based
|
Analise e selecione a melhor cidade para a viagem com base em critérios específicos como padrões climáticos, eventos sazonais e custos de viagem. Esta tarefa envolve comparar várias cidades, considerando fatores como condições climáticas atuais, eventos culturais ou sazonais e despesas gerais de viagem.
|
||||||
on specific criteria such as weather patterns, seasonal
|
Sua resposta final deve ser um relatório detalhado sobre a cidade escolhida e tudo o que você descobriu sobre ela, incluindo custos reais de voo, previsão do tempo e atrações.
|
||||||
events, and travel costs. This task involves comparing
|
|
||||||
multiple cities, considering factors like current weather
|
|
||||||
conditions, upcoming cultural or seasonal events, and
|
|
||||||
overall travel expenses.
|
|
||||||
Your final answer must be a detailed
|
|
||||||
report on the chosen city, and everything you found out
|
|
||||||
about it, including the actual flight costs, weather
|
|
||||||
forecast and attractions.
|
|
||||||
|
|
||||||
Traveling from: {origin}
|
Saindo de: {origin}
|
||||||
City Options: {cities}
|
Opções de cidades: {cities}
|
||||||
Trip Date: {range}
|
Data da viagem: {range}
|
||||||
Traveler Interests: {interests}
|
Interesses do viajante: {interests}
|
||||||
"""
|
"""
|
||||||
),
|
),
|
||||||
agent=agent,
|
agent=agent,
|
||||||
expected_output="Detailed report on the chosen city including flight costs, weather forecast, and attractions",
|
expected_output="Relatório detalhado sobre a cidade escolhida incluindo custos de voo, previsão do tempo e atrações",
|
||||||
)
|
)
|
||||||
|
|
||||||
def gather_task(self, agent, origin, interests, range):
|
def gather_task(self, agent, origin, interests, range):
|
||||||
return Task(
|
return Task(
|
||||||
description=dedent(
|
description=dedent(
|
||||||
f"""
|
f"""
|
||||||
As a local expert on this city you must compile an
|
Como especialista local nesta cidade, você deve compilar um guia aprofundado para alguém que está viajando para lá e quer ter a MELHOR viagem possível!
|
||||||
in-depth guide for someone traveling there and wanting
|
Reúna informações sobre principais atrações, costumes locais, eventos especiais e recomendações de atividades diárias.
|
||||||
to have THE BEST trip ever!
|
Encontre os melhores lugares para ir, aqueles que só um local conhece.
|
||||||
Gather information about key attractions, local customs,
|
Este guia deve fornecer uma visão abrangente do que a cidade tem a oferecer, incluindo joias escondidas, pontos culturais, marcos imperdíveis, previsão do tempo e custos gerais.
|
||||||
special events, and daily activity recommendations.
|
A resposta final deve ser um guia completo da cidade, rico em insights culturais e dicas práticas, adaptado para aprimorar a experiência de viagem.
|
||||||
Find the best spots to go to, the kind of place only a
|
|
||||||
local would know.
|
|
||||||
This guide should provide a thorough overview of what
|
|
||||||
the city has to offer, including hidden gems, cultural
|
|
||||||
hotspots, must-visit landmarks, weather forecasts, and
|
|
||||||
high level costs.
|
|
||||||
The final answer must be a comprehensive city guide,
|
|
||||||
rich in cultural insights and practical tips,
|
|
||||||
tailored to enhance the travel experience.
|
|
||||||
|
|
||||||
Trip Date: {range}
|
Data da viagem: {range}
|
||||||
Traveling from: {origin}
|
Saindo de: {origin}
|
||||||
Traveler Interests: {interests}
|
Interesses do viajante: {interests}
|
||||||
"""
|
"""
|
||||||
),
|
),
|
||||||
agent=agent,
|
agent=agent,
|
||||||
expected_output="Comprehensive city guide including hidden gems, cultural hotspots, and practical travel tips",
|
expected_output="Guia completo da cidade incluindo joias escondidas, pontos culturais e dicas práticas",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -189,7 +169,7 @@ O tracing fornece uma forma de registrar os inputs, outputs e metadados associad
|
|||||||
trip_crew = TripCrew("California", "Tokyo", "Dec 12 - Dec 20", "sports")
|
trip_crew = TripCrew("California", "Tokyo", "Dec 12 - Dec 20", "sports")
|
||||||
result = trip_crew.run()
|
result = trip_crew.run()
|
||||||
|
|
||||||
print(result)
|
print("Resultado da equipe:", result)
|
||||||
```
|
```
|
||||||
Consulte a [Documentação de Tracing do MLflow](https://mlflow.org/docs/latest/llms/tracing/index.html) para mais configurações e casos de uso.
|
Consulte a [Documentação de Tracing do MLflow](https://mlflow.org/docs/latest/llms/tracing/index.html) para mais configurações e casos de uso.
|
||||||
</Step>
|
</Step>
|
||||||
|
|||||||
@@ -69,10 +69,10 @@ Essa configuração permite acompanhar hiperparâmetros e monitorar problemas de
|
|||||||
|
|
||||||
openlit.init(disable_metrics=True)
|
openlit.init(disable_metrics=True)
|
||||||
# Definir seus agentes
|
# Definir seus agentes
|
||||||
researcher = Agent(
|
pesquisador = Agent(
|
||||||
role="Researcher",
|
role="Pesquisador",
|
||||||
goal="Conduct thorough research and analysis on AI and AI agents",
|
goal="Realizar pesquisas e análises aprofundadas sobre IA e agentes de IA",
|
||||||
backstory="You're an expert researcher, specialized in technology, software engineering, AI, and startups. You work as a freelancer and are currently researching for a new client.",
|
backstory="Você é um pesquisador especialista em tecnologia, engenharia de software, IA e startups. Trabalha como freelancer e está atualmente pesquisando para um novo cliente.",
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
llm='command-r'
|
llm='command-r'
|
||||||
)
|
)
|
||||||
@@ -80,24 +80,24 @@ Essa configuração permite acompanhar hiperparâmetros e monitorar problemas de
|
|||||||
|
|
||||||
# Definir sua task
|
# Definir sua task
|
||||||
task = Task(
|
task = Task(
|
||||||
description="Generate a list of 5 interesting ideas for an article, then write one captivating paragraph for each idea that showcases the potential of a full article on this topic. Return the list of ideas with their paragraphs and your notes.",
|
description="Gere uma lista com 5 ideias interessantes para um artigo e escreva um parágrafo cativante para cada ideia, mostrando o potencial de um artigo completo sobre o tema. Retorne a lista de ideias com seus parágrafos e suas anotações.",
|
||||||
expected_output="5 bullet points, each with a paragraph and accompanying notes.",
|
expected_output="5 tópicos, cada um com um parágrafo e notas complementares.",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Definir o agente gerente
|
# Definir o agente gerente
|
||||||
manager = Agent(
|
gerente = Agent(
|
||||||
role="Project Manager",
|
role="Gerente de Projeto",
|
||||||
goal="Efficiently manage the crew and ensure high-quality task completion",
|
goal="Gerenciar eficientemente a equipe e garantir a conclusão de tarefas de alta qualidade",
|
||||||
backstory="You're an experienced project manager, skilled in overseeing complex projects and guiding teams to success. Your role is to coordinate the efforts of the crew members, ensuring that each task is completed on time and to the highest standard.",
|
backstory="Você é um gerente de projetos experiente, habilidoso em supervisionar projetos complexos e guiar equipes para o sucesso. Sua função é coordenar os esforços dos membros da equipe, garantindo que cada tarefa seja concluída no prazo e com o mais alto padrão.",
|
||||||
allow_delegation=True,
|
allow_delegation=True,
|
||||||
llm='command-r'
|
llm='command-r'
|
||||||
)
|
)
|
||||||
|
|
||||||
# Instanciar sua crew com um manager personalizado
|
# Instanciar sua crew com um manager personalizado
|
||||||
crew = Crew(
|
crew = Crew(
|
||||||
agents=[researcher],
|
agents=[pesquisador],
|
||||||
tasks=[task],
|
tasks=[task],
|
||||||
manager_agent=manager,
|
manager_agent=gerente,
|
||||||
process=Process.hierarchical,
|
process=Process.hierarchical,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -132,18 +132,18 @@ Essa configuração permite acompanhar hiperparâmetros e monitorar problemas de
|
|||||||
|
|
||||||
# Criar um agente com execução de código habilitada
|
# Criar um agente com execução de código habilitada
|
||||||
coding_agent = Agent(
|
coding_agent = Agent(
|
||||||
role="Python Data Analyst",
|
role="Analista de Dados Python",
|
||||||
goal="Analyze data and provide insights using Python",
|
goal="Analisar dados e fornecer insights usando Python",
|
||||||
backstory="You are an experienced data analyst with strong Python skills.",
|
backstory="Você é um analista de dados experiente com fortes habilidades em Python.",
|
||||||
allow_code_execution=True,
|
allow_code_execution=True,
|
||||||
llm="command-r"
|
llm="command-r"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Criar uma task que exige execução de código
|
# Criar uma task que exige execução de código
|
||||||
data_analysis_task = Task(
|
data_analysis_task = Task(
|
||||||
description="Analyze the given dataset and calculate the average age of participants. Ages: {ages}",
|
description="Analise o conjunto de dados fornecido e calcule a idade média dos participantes. Idades: {ages}",
|
||||||
agent=coding_agent,
|
agent=coding_agent,
|
||||||
expected_output="5 bullet points, each with a paragraph and accompanying notes.",
|
expected_output="5 tópicos, cada um com um parágrafo e notas complementares.",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Criar uma crew e adicionar a task
|
# Criar uma crew e adicionar a task
|
||||||
|
|||||||
@@ -58,43 +58,43 @@ Neste guia, utilizaremos o exemplo de início rápido da CrewAI.
|
|||||||
from crewai import Agent, Crew, Task, Process
|
from crewai import Agent, Crew, Task, Process
|
||||||
|
|
||||||
|
|
||||||
class YourCrewName:
|
class NomeDaEquipe:
|
||||||
def agent_one(self) -> Agent:
|
def agente_um(self) -> Agent:
|
||||||
return Agent(
|
return Agent(
|
||||||
role="Data Analyst",
|
role="Analista de Dados",
|
||||||
goal="Analyze data trends in the market",
|
goal="Analisar tendências de dados no mercado",
|
||||||
backstory="An experienced data analyst with a background in economics",
|
backstory="Analista de dados experiente com formação em economia",
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def agent_two(self) -> Agent:
|
def agente_dois(self) -> Agent:
|
||||||
return Agent(
|
return Agent(
|
||||||
role="Market Researcher",
|
role="Pesquisador de Mercado",
|
||||||
goal="Gather information on market dynamics",
|
goal="Coletar informações sobre a dinâmica do mercado",
|
||||||
backstory="A diligent researcher with a keen eye for detail",
|
backstory="Pesquisador dedicado com olhar atento para detalhes",
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def task_one(self) -> Task:
|
def tarefa_um(self) -> Task:
|
||||||
return Task(
|
return Task(
|
||||||
name="Collect Data Task",
|
name="Tarefa de Coleta de Dados",
|
||||||
description="Collect recent market data and identify trends.",
|
description="Coletar dados recentes do mercado e identificar tendências.",
|
||||||
expected_output="A report summarizing key trends in the market.",
|
expected_output="Um relatório resumindo as principais tendências do mercado.",
|
||||||
agent=self.agent_one(),
|
agent=self.agente_um(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def task_two(self) -> Task:
|
def tarefa_dois(self) -> Task:
|
||||||
return Task(
|
return Task(
|
||||||
name="Market Research Task",
|
name="Tarefa de Pesquisa de Mercado",
|
||||||
description="Research factors affecting market dynamics.",
|
description="Pesquisar fatores que afetam a dinâmica do mercado.",
|
||||||
expected_output="An analysis of factors influencing the market.",
|
expected_output="Uma análise dos fatores que influenciam o mercado.",
|
||||||
agent=self.agent_two(),
|
agent=self.agente_dois(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def crew(self) -> Crew:
|
def equipe(self) -> Crew:
|
||||||
return Crew(
|
return Crew(
|
||||||
agents=[self.agent_one(), self.agent_two()],
|
agents=[self.agente_um(), self.agente_dois()],
|
||||||
tasks=[self.task_one(), self.task_two()],
|
tasks=[self.tarefa_um(), self.tarefa_dois()],
|
||||||
process=Process.sequential,
|
process=Process.sequential,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
@@ -108,7 +108,7 @@ Neste guia, utilizaremos o exemplo de início rápido da CrewAI.
|
|||||||
|
|
||||||
track_crewai(project_name="crewai-integration-demo")
|
track_crewai(project_name="crewai-integration-demo")
|
||||||
|
|
||||||
my_crew = YourCrewName().crew()
|
my_crew = NomeDaEquipe().equipe()
|
||||||
result = my_crew.kickoff()
|
result = my_crew.kickoff()
|
||||||
|
|
||||||
print(result)
|
print(result)
|
||||||
|
|||||||
@@ -64,17 +64,17 @@ patronus_eval_tool = PatronusEvalTool()
|
|||||||
|
|
||||||
# Define an agent that uses the tool
|
# Define an agent that uses the tool
|
||||||
coding_agent = Agent(
|
coding_agent = Agent(
|
||||||
role="Coding Agent",
|
role="Agente de Programação",
|
||||||
goal="Generate high quality code and verify that the output is code",
|
goal="Gerar código de alta qualidade e verificar se a saída é código",
|
||||||
backstory="An experienced coder who can generate high quality python code.",
|
backstory="Um programador experiente que pode gerar código Python de alta qualidade.",
|
||||||
tools=[patronus_eval_tool],
|
tools=[patronus_eval_tool],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Example task to generate and evaluate code
|
# Example task to generate and evaluate code
|
||||||
generate_code_task = Task(
|
generate_code_task = Task(
|
||||||
description="Create a simple program to generate the first N numbers in the Fibonacci sequence. Select the most appropriate evaluator and criteria for evaluating your output.",
|
description="Crie um programa simples para gerar os N primeiros números da sequência de Fibonacci. Selecione o avaliador e os critérios mais apropriados para avaliar sua saída.",
|
||||||
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
|
expected_output="Programa que gera os N primeiros números da sequência de Fibonacci.",
|
||||||
agent=coding_agent,
|
agent=coding_agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -98,17 +98,17 @@ patronus_eval_tool = PatronusPredefinedCriteriaEvalTool(
|
|||||||
|
|
||||||
# Define an agent that uses the tool
|
# Define an agent that uses the tool
|
||||||
coding_agent = Agent(
|
coding_agent = Agent(
|
||||||
role="Coding Agent",
|
role="Agente de Programação",
|
||||||
goal="Generate high quality code",
|
goal="Gerar código de alta qualidade",
|
||||||
backstory="An experienced coder who can generate high quality python code.",
|
backstory="Um programador experiente que pode gerar código Python de alta qualidade.",
|
||||||
tools=[patronus_eval_tool],
|
tools=[patronus_eval_tool],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Example task to generate code
|
# Example task to generate code
|
||||||
generate_code_task = Task(
|
generate_code_task = Task(
|
||||||
description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
|
description="Crie um programa simples para gerar os N primeiros números da sequência de Fibonacci.",
|
||||||
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
|
expected_output="Programa que gera os N primeiros números da sequência de Fibonacci.",
|
||||||
agent=coding_agent,
|
agent=coding_agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -149,17 +149,17 @@ patronus_eval_tool = PatronusLocalEvaluatorTool(
|
|||||||
|
|
||||||
# Define an agent that uses the tool
|
# Define an agent that uses the tool
|
||||||
coding_agent = Agent(
|
coding_agent = Agent(
|
||||||
role="Coding Agent",
|
role="Agente de Programação",
|
||||||
goal="Generate high quality code",
|
goal="Gerar código de alta qualidade",
|
||||||
backstory="An experienced coder who can generate high quality python code.",
|
backstory="Um programador experiente que pode gerar código Python de alta qualidade.",
|
||||||
tools=[patronus_eval_tool],
|
tools=[patronus_eval_tool],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Example task to generate code
|
# Example task to generate code
|
||||||
generate_code_task = Task(
|
generate_code_task = Task(
|
||||||
description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
|
description="Crie um programa simples para gerar os N primeiros números da sequência de Fibonacci.",
|
||||||
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
|
expected_output="Programa que gera os N primeiros números da sequência de Fibonacci.",
|
||||||
agent=coding_agent,
|
agent=coding_agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -50,48 +50,48 @@ O Weave captura automaticamente rastreamentos (traces) de suas aplicações Crew
|
|||||||
llm = LLM(model="gpt-4o", temperature=0)
|
llm = LLM(model="gpt-4o", temperature=0)
|
||||||
|
|
||||||
# Crie os agentes
|
# Crie os agentes
|
||||||
researcher = Agent(
|
pesquisador = Agent(
|
||||||
role='Research Analyst',
|
role='Analista de Pesquisa',
|
||||||
goal='Find and analyze the best investment opportunities',
|
goal='Encontrar e analisar as melhores oportunidades de investimento',
|
||||||
backstory='Expert in financial analysis and market research',
|
backstory='Especialista em análise financeira e pesquisa de mercado',
|
||||||
llm=llm,
|
llm=llm,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
writer = Agent(
|
redator = Agent(
|
||||||
role='Report Writer',
|
role='Redator de Relatórios',
|
||||||
goal='Write clear and concise investment reports',
|
goal='Escrever relatórios de investimento claros e concisos',
|
||||||
backstory='Experienced in creating detailed financial reports',
|
backstory='Experiente na criação de relatórios financeiros detalhados',
|
||||||
llm=llm,
|
llm=llm,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Crie as tarefas
|
# Crie as tarefas
|
||||||
research_task = Task(
|
pesquisa = Task(
|
||||||
description='Deep research on the {topic}',
|
description='Pesquisa aprofundada sobre o {tema}',
|
||||||
expected_output='Comprehensive market data including key players, market size, and growth trends.',
|
expected_output='Dados de mercado abrangentes incluindo principais players, tamanho de mercado e tendências de crescimento.',
|
||||||
agent=researcher
|
agent=pesquisador
|
||||||
)
|
)
|
||||||
|
|
||||||
writing_task = Task(
|
redacao = Task(
|
||||||
description='Write a detailed report based on the research',
|
description='Escreva um relatório detalhado com base na pesquisa',
|
||||||
expected_output='The report should be easy to read and understand. Use bullet points where applicable.',
|
expected_output='O relatório deve ser fácil de ler e entender. Use tópicos quando aplicável.',
|
||||||
agent=writer
|
agent=redator
|
||||||
)
|
)
|
||||||
|
|
||||||
# Crie o crew
|
# Crie o crew
|
||||||
crew = Crew(
|
equipe = Crew(
|
||||||
agents=[researcher, writer],
|
agents=[pesquisador, redator],
|
||||||
tasks=[research_task, writing_task],
|
tasks=[pesquisa, redacao],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
process=Process.sequential,
|
process=Process.sequential,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Execute o crew
|
# Execute o crew
|
||||||
result = crew.kickoff(inputs={"topic": "AI in material science"})
|
resultado = equipe.kickoff(inputs={"tema": "IA em ciência dos materiais"})
|
||||||
print(result)
|
print(resultado)
|
||||||
```
|
```
|
||||||
</Step>
|
</Step>
|
||||||
<Step title="Visualize rastreamentos no Weave">
|
<Step title="Visualize rastreamentos no Weave">
|
||||||
|
|||||||
@@ -39,23 +39,19 @@ Siga os passos abaixo para começar a tripular! 🚣♂️
|
|||||||
# src/latest_ai_development/config/agents.yaml
|
# src/latest_ai_development/config/agents.yaml
|
||||||
researcher:
|
researcher:
|
||||||
role: >
|
role: >
|
||||||
{topic} Senior Data Researcher
|
Pesquisador Sênior de Dados em {topic}
|
||||||
goal: >
|
goal: >
|
||||||
Uncover cutting-edge developments in {topic}
|
Descobrir os avanços mais recentes em {topic}
|
||||||
backstory: >
|
backstory: >
|
||||||
You're a seasoned researcher with a knack for uncovering the latest
|
Você é um pesquisador experiente com talento para descobrir os últimos avanços em {topic}. Conhecido por sua habilidade em encontrar as informações mais relevantes e apresentá-las de forma clara e concisa.
|
||||||
developments in {topic}. Known for your ability to find the most relevant
|
|
||||||
information and present it in a clear and concise manner.
|
|
||||||
|
|
||||||
reporting_analyst:
|
reporting_analyst:
|
||||||
role: >
|
role: >
|
||||||
{topic} Reporting Analyst
|
Analista de Relatórios em {topic}
|
||||||
goal: >
|
goal: >
|
||||||
Create detailed reports based on {topic} data analysis and research findings
|
Criar relatórios detalhados com base na análise de dados e descobertas de pesquisa em {topic}
|
||||||
backstory: >
|
backstory: >
|
||||||
You're a meticulous analyst with a keen eye for detail. You're known for
|
Você é um analista meticuloso com um olhar atento aos detalhes. É conhecido por sua capacidade de transformar dados complexos em relatórios claros e concisos, facilitando o entendimento e a tomada de decisão por parte dos outros.
|
||||||
your ability to turn complex data into clear and concise reports, making
|
|
||||||
it easy for others to understand and act on the information you provide.
|
|
||||||
```
|
```
|
||||||
</Step>
|
</Step>
|
||||||
<Step title="Modifique seu arquivo `tasks.yaml`">
|
<Step title="Modifique seu arquivo `tasks.yaml`">
|
||||||
@@ -63,20 +59,19 @@ Siga os passos abaixo para começar a tripular! 🚣♂️
|
|||||||
# src/latest_ai_development/config/tasks.yaml
|
# src/latest_ai_development/config/tasks.yaml
|
||||||
research_task:
|
research_task:
|
||||||
description: >
|
description: >
|
||||||
Conduct a thorough research about {topic}
|
Realize uma pesquisa aprofundada sobre {topic}.
|
||||||
Make sure you find any interesting and relevant information given
|
Certifique-se de encontrar informações interessantes e relevantes considerando que o ano atual é 2025.
|
||||||
the current year is 2025.
|
|
||||||
expected_output: >
|
expected_output: >
|
||||||
A list with 10 bullet points of the most relevant information about {topic}
|
Uma lista com 10 tópicos dos dados mais relevantes sobre {topic}
|
||||||
agent: researcher
|
agent: researcher
|
||||||
|
|
||||||
reporting_task:
|
reporting_task:
|
||||||
description: >
|
description: >
|
||||||
Review the context you got and expand each topic into a full section for a report.
|
Revise o contexto obtido e expanda cada tópico em uma seção completa para um relatório.
|
||||||
Make sure the report is detailed and contains any and all relevant information.
|
Certifique-se de que o relatório seja detalhado e contenha todas as informações relevantes.
|
||||||
expected_output: >
|
expected_output: >
|
||||||
A fully fledge reports with the mains topics, each with a full section of information.
|
Um relatório completo com os principais tópicos, cada um com uma seção detalhada de informações.
|
||||||
Formatted as markdown without '```'
|
Formate como markdown sem usar '```'
|
||||||
agent: reporting_analyst
|
agent: reporting_analyst
|
||||||
output_file: report.md
|
output_file: report.md
|
||||||
```
|
```
|
||||||
@@ -122,15 +117,15 @@ Siga os passos abaixo para começar a tripular! 🚣♂️
|
|||||||
def reporting_task(self) -> Task:
|
def reporting_task(self) -> Task:
|
||||||
return Task(
|
return Task(
|
||||||
config=self.tasks_config['reporting_task'], # type: ignore[index]
|
config=self.tasks_config['reporting_task'], # type: ignore[index]
|
||||||
output_file='output/report.md' # This is the file that will be contain the final report.
|
output_file='output/report.md' # Este é o arquivo que conterá o relatório final.
|
||||||
)
|
)
|
||||||
|
|
||||||
@crew
|
@crew
|
||||||
def crew(self) -> Crew:
|
def crew(self) -> Crew:
|
||||||
"""Creates the LatestAiDevelopment crew"""
|
"""Creates the LatestAiDevelopment crew"""
|
||||||
return Crew(
|
return Crew(
|
||||||
agents=self.agents, # Automatically created by the @agent decorator
|
agents=self.agents, # Criado automaticamente pelo decorador @agent
|
||||||
tasks=self.tasks, # Automatically created by the @task decorator
|
tasks=self.tasks, # Criado automaticamente pelo decorador @task
|
||||||
process=Process.sequential,
|
process=Process.sequential,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
@@ -229,7 +224,7 @@ Siga os passos abaixo para começar a tripular! 🚣♂️
|
|||||||
|
|
||||||
<CodeGroup>
|
<CodeGroup>
|
||||||
```markdown output/report.md
|
```markdown output/report.md
|
||||||
# Comprehensive Report on the Rise and Impact of AI Agents in 2025
|
# Relatório Abrangente sobre a Ascensão e o Impacto dos Agentes de IA em 2025
|
||||||
|
|
||||||
## 1. Introduction to AI Agents
|
## 1. Introduction to AI Agents
|
||||||
In 2025, Artificial Intelligence (AI) agents are at the forefront of innovation across various industries. As intelligent systems that can perform tasks typically requiring human cognition, AI agents are paving the way for significant advancements in operational efficiency, decision-making, and overall productivity within sectors like Human Resources (HR) and Finance. This report aims to detail the rise of AI agents, their frameworks, applications, and potential implications on the workforce.
|
In 2025, Artificial Intelligence (AI) agents are at the forefront of innovation across various industries. As intelligent systems that can perform tasks typically requiring human cognition, AI agents are paving the way for significant advancements in operational efficiency, decision-making, and overall productivity within sectors like Human Resources (HR) and Finance. This report aims to detail the rise of AI agents, their frameworks, applications, and potential implications on the workforce.
|
||||||
|
|||||||
@@ -35,78 +35,18 @@ from crewai_tools import LinkupSearchTool
|
|||||||
from crewai import Agent
|
from crewai import Agent
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# Initialize the tool with your API key
|
# Inicialize a ferramenta com sua chave de API
|
||||||
linkup_tool = LinkupSearchTool(api_key=os.getenv("LINKUP_API_KEY"))
|
linkup_ferramenta = LinkupSearchTool(api_key=os.getenv("LINKUP_API_KEY"))
|
||||||
|
|
||||||
# Define an agent that uses the tool
|
# Defina um agente que usa a ferramenta
|
||||||
@agent
|
@agent
|
||||||
def researcher(self) -> Agent:
|
def pesquisador(self) -> Agent:
|
||||||
'''
|
'''
|
||||||
This agent uses the LinkupSearchTool to retrieve contextual information
|
Este agente usa o LinkupSearchTool para recuperar informações contextuais
|
||||||
from the Linkup API.
|
da API do Linkup.
|
||||||
'''
|
'''
|
||||||
return Agent(
|
return Agent(
|
||||||
config=self.agents_config["researcher"],
|
config=self.agentes_config["pesquisador"],
|
||||||
tools=[linkup_tool]
|
tools=[linkup_ferramenta]
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Parâmetros
|
|
||||||
|
|
||||||
O `LinkupSearchTool` aceita os seguintes parâmetros:
|
|
||||||
|
|
||||||
### Parâmetros do Construtor
|
|
||||||
- **api_key**: Obrigatório. Sua chave de API do Linkup.
|
|
||||||
|
|
||||||
### Parâmetros de Execução
|
|
||||||
- **query**: Obrigatório. O termo ou frase de busca.
|
|
||||||
- **depth**: Opcional. A profundidade da busca. O padrão é "standard".
|
|
||||||
- **output_type**: Opcional. O tipo de saída. O padrão é "searchResults".
|
|
||||||
|
|
||||||
## Uso Avançado
|
|
||||||
|
|
||||||
Você pode personalizar os parâmetros de busca para resultados mais específicos:
|
|
||||||
|
|
||||||
```python Code
|
|
||||||
# Perform a search with custom parameters
|
|
||||||
results = linkup_tool.run(
|
|
||||||
query="Women Nobel Prize Physics",
|
|
||||||
depth="deep",
|
|
||||||
output_type="searchResults"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Formato de Retorno
|
|
||||||
|
|
||||||
A ferramenta retorna resultados no seguinte formato:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"success": true,
|
|
||||||
"results": [
|
|
||||||
{
|
|
||||||
"name": "Result Title",
|
|
||||||
"url": "https://example.com/result",
|
|
||||||
"content": "Content of the result..."
|
|
||||||
},
|
|
||||||
// Additional results...
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Se ocorrer um erro, a resposta será:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"success": false,
|
|
||||||
"error": "Error message"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Tratamento de Erros
|
|
||||||
|
|
||||||
A ferramenta lida com erros de API de forma amigável e fornece feedback estruturado. Se a requisição à API falhar, a ferramenta retornará um dicionário com `success: false` e uma mensagem de erro.
|
|
||||||
|
|
||||||
## Conclusão
|
|
||||||
|
|
||||||
O `LinkupSearchTool` oferece uma forma integrada de incorporar as capacidades de busca de informações contextuais do Linkup aos seus agentes CrewAI. Ao utilizar esta ferramenta, os agentes podem acessar informações relevantes e atualizadas para aprimorar sua tomada de decisão e execução de tarefas.
|
|
||||||
@@ -27,13 +27,13 @@ dependencies = [
|
|||||||
"openpyxl>=3.1.5",
|
"openpyxl>=3.1.5",
|
||||||
"pyvis>=0.3.2",
|
"pyvis>=0.3.2",
|
||||||
# Authentication and Security
|
# Authentication and Security
|
||||||
"auth0-python>=4.7.1",
|
|
||||||
"python-dotenv>=1.0.0",
|
"python-dotenv>=1.0.0",
|
||||||
|
"pyjwt>=2.9.0",
|
||||||
# Configuration and Utils
|
# Configuration and Utils
|
||||||
"click>=8.1.7",
|
"click>=8.1.7",
|
||||||
"appdirs>=1.4.4",
|
"appdirs>=1.4.4",
|
||||||
"jsonref>=1.1.0",
|
"jsonref>=1.1.0",
|
||||||
"json-repair>=0.25.2",
|
"json-repair==0.25.2",
|
||||||
"uv>=0.4.25",
|
"uv>=0.4.25",
|
||||||
"tomli-w>=1.1.0",
|
"tomli-w>=1.1.0",
|
||||||
"tomli>=2.0.2",
|
"tomli>=2.0.2",
|
||||||
@@ -47,11 +47,11 @@ Documentation = "https://docs.crewai.com"
|
|||||||
Repository = "https://github.com/crewAIInc/crewAI"
|
Repository = "https://github.com/crewAIInc/crewAI"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
tools = ["crewai-tools~=0.48.0"]
|
tools = ["crewai-tools~=0.55.0"]
|
||||||
embeddings = [
|
embeddings = [
|
||||||
"tiktoken~=0.8.0"
|
"tiktoken~=0.8.0"
|
||||||
]
|
]
|
||||||
agentops = ["agentops>=0.3.0"]
|
agentops = ["agentops==0.3.18"]
|
||||||
pdfplumber = [
|
pdfplumber = [
|
||||||
"pdfplumber>=0.11.4",
|
"pdfplumber>=0.11.4",
|
||||||
]
|
]
|
||||||
@@ -83,6 +83,8 @@ dev-dependencies = [
|
|||||||
"pytest-recording>=0.13.2",
|
"pytest-recording>=0.13.2",
|
||||||
"pytest-randomly>=3.16.0",
|
"pytest-randomly>=3.16.0",
|
||||||
"pytest-timeout>=2.3.1",
|
"pytest-timeout>=2.3.1",
|
||||||
|
"pytest-xdist>=3.6.1",
|
||||||
|
"pytest-split>=0.9.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
@@ -123,3 +125,15 @@ path = "src/crewai/__init__.py"
|
|||||||
[build-system]
|
[build-system]
|
||||||
requires = ["hatchling"]
|
requires = ["hatchling"]
|
||||||
build-backend = "hatchling.build"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.wheel]
|
||||||
|
exclude = [
|
||||||
|
"docs/**",
|
||||||
|
"docs/",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.sdist]
|
||||||
|
exclude = [
|
||||||
|
"docs/**",
|
||||||
|
"docs/",
|
||||||
|
]
|
||||||
|
|||||||
@@ -28,19 +28,19 @@ _telemetry_submitted = False
|
|||||||
def _track_install():
|
def _track_install():
|
||||||
"""Track package installation/first-use via Scarf analytics."""
|
"""Track package installation/first-use via Scarf analytics."""
|
||||||
global _telemetry_submitted
|
global _telemetry_submitted
|
||||||
|
|
||||||
if _telemetry_submitted or Telemetry._is_telemetry_disabled():
|
if _telemetry_submitted or Telemetry._is_telemetry_disabled():
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pixel_url = "https://api.scarf.sh/v2/packages/CrewAI/crewai/docs/00f2dad1-8334-4a39-934e-003b2e1146db"
|
pixel_url = "https://api.scarf.sh/v2/packages/CrewAI/crewai/docs/00f2dad1-8334-4a39-934e-003b2e1146db"
|
||||||
|
|
||||||
req = urllib.request.Request(pixel_url)
|
req = urllib.request.Request(pixel_url)
|
||||||
req.add_header('User-Agent', f'CrewAI-Python/{__version__}')
|
req.add_header('User-Agent', f'CrewAI-Python/{__version__}')
|
||||||
|
|
||||||
with urllib.request.urlopen(req, timeout=2): # nosec B310
|
with urllib.request.urlopen(req, timeout=2): # nosec B310
|
||||||
_telemetry_submitted = True
|
_telemetry_submitted = True
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -54,7 +54,7 @@ def _track_install_async():
|
|||||||
|
|
||||||
_track_install_async()
|
_track_install_async()
|
||||||
|
|
||||||
__version__ = "0.134.0"
|
__version__ = "0.148.0"
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"Agent",
|
"Agent",
|
||||||
"Crew",
|
"Crew",
|
||||||
|
|||||||
@@ -210,7 +210,6 @@ class Agent(BaseAgent):
|
|||||||
sources=self.knowledge_sources,
|
sources=self.knowledge_sources,
|
||||||
embedder=self.embedder,
|
embedder=self.embedder,
|
||||||
collection_name=self.role,
|
collection_name=self.role,
|
||||||
storage=self.knowledge_storage or None,
|
|
||||||
)
|
)
|
||||||
self.knowledge.add_sources()
|
self.knowledge.add_sources()
|
||||||
except (TypeError, ValueError) as e:
|
except (TypeError, ValueError) as e:
|
||||||
@@ -341,7 +340,8 @@ class Agent(BaseAgent):
|
|||||||
self.knowledge_config.model_dump() if self.knowledge_config else {}
|
self.knowledge_config.model_dump() if self.knowledge_config else {}
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.knowledge:
|
|
||||||
|
if self.knowledge or (self.crew and self.crew.knowledge):
|
||||||
crewai_event_bus.emit(
|
crewai_event_bus.emit(
|
||||||
self,
|
self,
|
||||||
event=KnowledgeRetrievalStartedEvent(
|
event=KnowledgeRetrievalStartedEvent(
|
||||||
@@ -353,25 +353,28 @@ class Agent(BaseAgent):
|
|||||||
task_prompt
|
task_prompt
|
||||||
)
|
)
|
||||||
if self.knowledge_search_query:
|
if self.knowledge_search_query:
|
||||||
agent_knowledge_snippets = self.knowledge.query(
|
# Quering agent specific knowledge
|
||||||
[self.knowledge_search_query], **knowledge_config
|
if self.knowledge:
|
||||||
)
|
agent_knowledge_snippets = self.knowledge.query(
|
||||||
if agent_knowledge_snippets:
|
|
||||||
self.agent_knowledge_context = extract_knowledge_context(
|
|
||||||
agent_knowledge_snippets
|
|
||||||
)
|
|
||||||
if self.agent_knowledge_context:
|
|
||||||
task_prompt += self.agent_knowledge_context
|
|
||||||
if self.crew:
|
|
||||||
knowledge_snippets = self.crew.query_knowledge(
|
|
||||||
[self.knowledge_search_query], **knowledge_config
|
[self.knowledge_search_query], **knowledge_config
|
||||||
)
|
)
|
||||||
if knowledge_snippets:
|
if agent_knowledge_snippets:
|
||||||
self.crew_knowledge_context = extract_knowledge_context(
|
self.agent_knowledge_context = extract_knowledge_context(
|
||||||
knowledge_snippets
|
agent_knowledge_snippets
|
||||||
)
|
)
|
||||||
if self.crew_knowledge_context:
|
if self.agent_knowledge_context:
|
||||||
task_prompt += self.crew_knowledge_context
|
task_prompt += self.agent_knowledge_context
|
||||||
|
|
||||||
|
# Quering crew specific knowledge
|
||||||
|
knowledge_snippets = self.crew.query_knowledge(
|
||||||
|
[self.knowledge_search_query], **knowledge_config
|
||||||
|
)
|
||||||
|
if knowledge_snippets:
|
||||||
|
self.crew_knowledge_context = extract_knowledge_context(
|
||||||
|
knowledge_snippets
|
||||||
|
)
|
||||||
|
if self.crew_knowledge_context:
|
||||||
|
task_prompt += self.crew_knowledge_context
|
||||||
|
|
||||||
crewai_event_bus.emit(
|
crewai_event_bus.emit(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -2,3 +2,7 @@ ALGORITHMS = ["RS256"]
|
|||||||
AUTH0_DOMAIN = "crewai.us.auth0.com"
|
AUTH0_DOMAIN = "crewai.us.auth0.com"
|
||||||
AUTH0_CLIENT_ID = "DEVC5Fw6NlRoSzmDCcOhVq85EfLBjKa8"
|
AUTH0_CLIENT_ID = "DEVC5Fw6NlRoSzmDCcOhVq85EfLBjKa8"
|
||||||
AUTH0_AUDIENCE = "https://crewai.us.auth0.com/api/v2/"
|
AUTH0_AUDIENCE = "https://crewai.us.auth0.com/api/v2/"
|
||||||
|
|
||||||
|
WORKOS_DOMAIN = "login.crewai.com"
|
||||||
|
WORKOS_CLI_CONNECT_APP_ID = "client_01JYT06R59SP0NXYGD994NFXXX"
|
||||||
|
WORKOS_ENVIRONMENT_ID = "client_01JNJQWBJ4SPFN3SWJM5T7BDG8"
|
||||||
|
|||||||
@@ -5,37 +5,72 @@ from typing import Any, Dict
|
|||||||
import requests
|
import requests
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
|
|
||||||
from .constants import AUTH0_AUDIENCE, AUTH0_CLIENT_ID, AUTH0_DOMAIN
|
from .constants import (
|
||||||
from .utils import TokenManager, validate_token
|
AUTH0_AUDIENCE,
|
||||||
|
AUTH0_CLIENT_ID,
|
||||||
|
AUTH0_DOMAIN,
|
||||||
|
WORKOS_DOMAIN,
|
||||||
|
WORKOS_CLI_CONNECT_APP_ID,
|
||||||
|
WORKOS_ENVIRONMENT_ID,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .utils import TokenManager, validate_jwt_token
|
||||||
|
from urllib.parse import quote
|
||||||
|
from crewai.cli.plus_api import PlusAPI
|
||||||
|
from crewai.cli.config import Settings
|
||||||
|
|
||||||
console = Console()
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
class AuthenticationCommand:
|
class AuthenticationCommand:
|
||||||
DEVICE_CODE_URL = f"https://{AUTH0_DOMAIN}/oauth/device/code"
|
AUTH0_DEVICE_CODE_URL = f"https://{AUTH0_DOMAIN}/oauth/device/code"
|
||||||
TOKEN_URL = f"https://{AUTH0_DOMAIN}/oauth/token"
|
AUTH0_TOKEN_URL = f"https://{AUTH0_DOMAIN}/oauth/token"
|
||||||
|
|
||||||
|
WORKOS_DEVICE_CODE_URL = f"https://{WORKOS_DOMAIN}/oauth2/device_authorization"
|
||||||
|
WORKOS_TOKEN_URL = f"https://{WORKOS_DOMAIN}/oauth2/token"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.token_manager = TokenManager()
|
self.token_manager = TokenManager()
|
||||||
|
# TODO: WORKOS - This variable is temporary until migration to WorkOS is complete.
|
||||||
|
self.user_provider = "workos"
|
||||||
|
|
||||||
def signup(self) -> None:
|
def login(self) -> None:
|
||||||
"""Sign up to CrewAI+"""
|
"""Sign up to CrewAI+"""
|
||||||
console.print("Signing Up to CrewAI+ \n", style="bold blue")
|
|
||||||
device_code_data = self._get_device_code()
|
device_code_url = self.WORKOS_DEVICE_CODE_URL
|
||||||
|
token_url = self.WORKOS_TOKEN_URL
|
||||||
|
client_id = WORKOS_CLI_CONNECT_APP_ID
|
||||||
|
audience = None
|
||||||
|
|
||||||
|
console.print("Signing in to CrewAI Enterprise...\n", style="bold blue")
|
||||||
|
|
||||||
|
# TODO: WORKOS - Next line and conditional are temporary until migration to WorkOS is complete.
|
||||||
|
user_provider = self._determine_user_provider()
|
||||||
|
if user_provider == "auth0":
|
||||||
|
device_code_url = self.AUTH0_DEVICE_CODE_URL
|
||||||
|
token_url = self.AUTH0_TOKEN_URL
|
||||||
|
client_id = AUTH0_CLIENT_ID
|
||||||
|
audience = AUTH0_AUDIENCE
|
||||||
|
self.user_provider = "auth0"
|
||||||
|
# End of temporary code.
|
||||||
|
|
||||||
|
device_code_data = self._get_device_code(client_id, device_code_url, audience)
|
||||||
self._display_auth_instructions(device_code_data)
|
self._display_auth_instructions(device_code_data)
|
||||||
|
|
||||||
return self._poll_for_token(device_code_data)
|
return self._poll_for_token(device_code_data, client_id, token_url)
|
||||||
|
|
||||||
def _get_device_code(self) -> Dict[str, Any]:
|
def _get_device_code(
|
||||||
|
self, client_id: str, device_code_url: str, audience: str | None = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
"""Get the device code to authenticate the user."""
|
"""Get the device code to authenticate the user."""
|
||||||
|
|
||||||
device_code_payload = {
|
device_code_payload = {
|
||||||
"client_id": AUTH0_CLIENT_ID,
|
"client_id": client_id,
|
||||||
"scope": "openid",
|
"scope": "openid",
|
||||||
"audience": AUTH0_AUDIENCE,
|
"audience": audience,
|
||||||
}
|
}
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
url=self.DEVICE_CODE_URL, data=device_code_payload, timeout=20
|
url=device_code_url, data=device_code_payload, timeout=20
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
@@ -46,38 +81,33 @@ class AuthenticationCommand:
|
|||||||
console.print("2. Enter the following code: ", device_code_data["user_code"])
|
console.print("2. Enter the following code: ", device_code_data["user_code"])
|
||||||
webbrowser.open(device_code_data["verification_uri_complete"])
|
webbrowser.open(device_code_data["verification_uri_complete"])
|
||||||
|
|
||||||
def _poll_for_token(self, device_code_data: Dict[str, Any]) -> None:
|
def _poll_for_token(
|
||||||
"""Poll the server for the token."""
|
self, device_code_data: Dict[str, Any], client_id: str, token_poll_url: str
|
||||||
|
) -> None:
|
||||||
|
"""Polls the server for the token until it is received, or max attempts are reached."""
|
||||||
|
|
||||||
token_payload = {
|
token_payload = {
|
||||||
"grant_type": "urn:ietf:params:oauth:grant-type:device_code",
|
"grant_type": "urn:ietf:params:oauth:grant-type:device_code",
|
||||||
"device_code": device_code_data["device_code"],
|
"device_code": device_code_data["device_code"],
|
||||||
"client_id": AUTH0_CLIENT_ID,
|
"client_id": client_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.print("\nWaiting for authentication... ", style="bold blue", end="")
|
||||||
|
|
||||||
attempts = 0
|
attempts = 0
|
||||||
while True and attempts < 5:
|
while True and attempts < 10:
|
||||||
response = requests.post(self.TOKEN_URL, data=token_payload, timeout=30)
|
response = requests.post(token_poll_url, data=token_payload, timeout=30)
|
||||||
token_data = response.json()
|
token_data = response.json()
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
validate_token(token_data["id_token"])
|
self._validate_and_save_token(token_data)
|
||||||
expires_in = 360000 # Token expiration time in seconds
|
|
||||||
self.token_manager.save_tokens(token_data["access_token"], expires_in)
|
|
||||||
|
|
||||||
try:
|
console.print(
|
||||||
from crewai.cli.tools.main import ToolCommand
|
"Success!",
|
||||||
ToolCommand().login()
|
style="bold green",
|
||||||
except Exception:
|
)
|
||||||
console.print(
|
|
||||||
"\n[bold yellow]Warning:[/bold yellow] Authentication with the Tool Repository failed.",
|
self._login_to_tool_repository()
|
||||||
style="yellow",
|
|
||||||
)
|
|
||||||
console.print(
|
|
||||||
"Other features will work normally, but you may experience limitations "
|
|
||||||
"with downloading and publishing tools."
|
|
||||||
"\nRun [bold]crewai login[/bold] to try logging in again.\n",
|
|
||||||
style="yellow",
|
|
||||||
)
|
|
||||||
|
|
||||||
console.print(
|
console.print(
|
||||||
"\n[bold green]Welcome to CrewAI Enterprise![/bold green]\n"
|
"\n[bold green]Welcome to CrewAI Enterprise![/bold green]\n"
|
||||||
@@ -93,3 +123,88 @@ class AuthenticationCommand:
|
|||||||
console.print(
|
console.print(
|
||||||
"Timeout: Failed to get the token. Please try again.", style="bold red"
|
"Timeout: Failed to get the token. Please try again.", style="bold red"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _validate_and_save_token(self, token_data: Dict[str, Any]) -> None:
|
||||||
|
"""Validates the JWT token and saves the token to the token manager."""
|
||||||
|
|
||||||
|
jwt_token = token_data["access_token"]
|
||||||
|
jwt_token_data = {
|
||||||
|
"jwt_token": jwt_token,
|
||||||
|
"jwks_url": f"https://{WORKOS_DOMAIN}/oauth2/jwks",
|
||||||
|
"issuer": f"https://{WORKOS_DOMAIN}",
|
||||||
|
"audience": WORKOS_ENVIRONMENT_ID,
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: WORKOS - The following conditional is temporary until migration to WorkOS is complete.
|
||||||
|
if self.user_provider == "auth0":
|
||||||
|
jwt_token_data["jwks_url"] = f"https://{AUTH0_DOMAIN}/.well-known/jwks.json"
|
||||||
|
jwt_token_data["issuer"] = f"https://{AUTH0_DOMAIN}/"
|
||||||
|
jwt_token_data["audience"] = AUTH0_AUDIENCE
|
||||||
|
|
||||||
|
decoded_token = validate_jwt_token(**jwt_token_data)
|
||||||
|
|
||||||
|
expires_at = decoded_token.get("exp", 0)
|
||||||
|
self.token_manager.save_tokens(jwt_token, expires_at)
|
||||||
|
|
||||||
|
def _login_to_tool_repository(self) -> None:
|
||||||
|
"""Login to the tool repository."""
|
||||||
|
|
||||||
|
from crewai.cli.tools.main import ToolCommand
|
||||||
|
|
||||||
|
try:
|
||||||
|
console.print(
|
||||||
|
"Now logging you in to the Tool Repository... ",
|
||||||
|
style="bold blue",
|
||||||
|
end="",
|
||||||
|
)
|
||||||
|
|
||||||
|
ToolCommand().login()
|
||||||
|
|
||||||
|
console.print(
|
||||||
|
"Success!\n",
|
||||||
|
style="bold green",
|
||||||
|
)
|
||||||
|
|
||||||
|
settings = Settings()
|
||||||
|
console.print(
|
||||||
|
f"You are authenticated to the tool repository as [bold cyan]'{settings.org_name}'[/bold cyan] ({settings.org_uuid})",
|
||||||
|
style="green",
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
console.print(
|
||||||
|
"\n[bold yellow]Warning:[/bold yellow] Authentication with the Tool Repository failed.",
|
||||||
|
style="yellow",
|
||||||
|
)
|
||||||
|
console.print(
|
||||||
|
"Other features will work normally, but you may experience limitations "
|
||||||
|
"with downloading and publishing tools."
|
||||||
|
"\nRun [bold]crewai login[/bold] to try logging in again.\n",
|
||||||
|
style="yellow",
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: WORKOS - This method is temporary until migration to WorkOS is complete.
|
||||||
|
def _determine_user_provider(self) -> str:
|
||||||
|
"""Determine which provider to use for authentication."""
|
||||||
|
|
||||||
|
console.print(
|
||||||
|
"Enter your CrewAI Enterprise account email: ", style="bold blue", end=""
|
||||||
|
)
|
||||||
|
email = input()
|
||||||
|
email_encoded = quote(email)
|
||||||
|
|
||||||
|
# It's not correct to call this method directly, but it's temporary until migration is complete.
|
||||||
|
response = PlusAPI("")._make_request(
|
||||||
|
"GET", f"/crewai_plus/api/v1/me/provider?email={email_encoded}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
if response.json().get("provider") == "auth0":
|
||||||
|
return "auth0"
|
||||||
|
else:
|
||||||
|
return "workos"
|
||||||
|
else:
|
||||||
|
console.print(
|
||||||
|
"Error: Failed to authenticate with crewai enterprise. Ensure that you are using the latest crewai version and please try again. If the problem persists, contact support@crewai.com.",
|
||||||
|
style="red",
|
||||||
|
)
|
||||||
|
raise SystemExit
|
||||||
|
|||||||
@@ -1,32 +1,72 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
import jwt
|
||||||
from auth0.authentication.token_verifier import (
|
from jwt import PyJWKClient
|
||||||
AsymmetricSignatureVerifier,
|
|
||||||
TokenVerifier,
|
|
||||||
)
|
|
||||||
from cryptography.fernet import Fernet
|
from cryptography.fernet import Fernet
|
||||||
|
|
||||||
from .constants import AUTH0_CLIENT_ID, AUTH0_DOMAIN
|
|
||||||
|
|
||||||
|
def validate_jwt_token(
|
||||||
def validate_token(id_token: str) -> None:
|
jwt_token: str, jwks_url: str, issuer: str, audience: str
|
||||||
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Verify the token and its precedence
|
Verify the token's signature and claims using PyJWT.
|
||||||
|
:param jwt_token: The JWT (JWS) string to validate.
|
||||||
:param id_token:
|
:param jwks_url: The URL of the JWKS endpoint.
|
||||||
|
:param issuer: The expected issuer of the token.
|
||||||
|
:param audience: The expected audience of the token.
|
||||||
|
:return: The decoded token.
|
||||||
|
:raises Exception: If the token is invalid for any reason (e.g., signature mismatch,
|
||||||
|
expired, incorrect issuer/audience, JWKS fetching error,
|
||||||
|
missing required claims).
|
||||||
"""
|
"""
|
||||||
jwks_url = f"https://{AUTH0_DOMAIN}/.well-known/jwks.json"
|
|
||||||
issuer = f"https://{AUTH0_DOMAIN}/"
|
decoded_token = None
|
||||||
signature_verifier = AsymmetricSignatureVerifier(jwks_url)
|
|
||||||
token_verifier = TokenVerifier(
|
try:
|
||||||
signature_verifier=signature_verifier, issuer=issuer, audience=AUTH0_CLIENT_ID
|
jwk_client = PyJWKClient(jwks_url)
|
||||||
)
|
signing_key = jwk_client.get_signing_key_from_jwt(jwt_token)
|
||||||
token_verifier.verify(id_token)
|
|
||||||
|
_unverified_decoded_token = jwt.decode(
|
||||||
|
jwt_token, options={"verify_signature": False}
|
||||||
|
)
|
||||||
|
decoded_token = jwt.decode(
|
||||||
|
jwt_token,
|
||||||
|
signing_key.key,
|
||||||
|
algorithms=["RS256"],
|
||||||
|
audience=audience,
|
||||||
|
issuer=issuer,
|
||||||
|
options={
|
||||||
|
"verify_signature": True,
|
||||||
|
"verify_exp": True,
|
||||||
|
"verify_nbf": True,
|
||||||
|
"verify_iat": True,
|
||||||
|
"require": ["exp", "iat", "iss", "aud", "sub"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return decoded_token
|
||||||
|
|
||||||
|
except jwt.ExpiredSignatureError:
|
||||||
|
raise Exception("Token has expired.")
|
||||||
|
except jwt.InvalidAudienceError:
|
||||||
|
actual_audience = _unverified_decoded_token.get("aud", "[no audience found]")
|
||||||
|
raise Exception(
|
||||||
|
f"Invalid token audience. Got: '{actual_audience}'. Expected: '{audience}'"
|
||||||
|
)
|
||||||
|
except jwt.InvalidIssuerError:
|
||||||
|
actual_issuer = _unverified_decoded_token.get("iss", "[no issuer found]")
|
||||||
|
raise Exception(
|
||||||
|
f"Invalid token issuer. Got: '{actual_issuer}'. Expected: '{issuer}'"
|
||||||
|
)
|
||||||
|
except jwt.MissingRequiredClaimError as e:
|
||||||
|
raise Exception(f"Token is missing required claims: {str(e)}")
|
||||||
|
except jwt.exceptions.PyJWKClientError as e:
|
||||||
|
raise Exception(f"JWKS or key processing error: {str(e)}")
|
||||||
|
except jwt.InvalidTokenError as e:
|
||||||
|
raise Exception(f"Invalid token: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
class TokenManager:
|
class TokenManager:
|
||||||
@@ -56,14 +96,14 @@ class TokenManager:
|
|||||||
self.save_secure_file(key_filename, new_key)
|
self.save_secure_file(key_filename, new_key)
|
||||||
return new_key
|
return new_key
|
||||||
|
|
||||||
def save_tokens(self, access_token: str, expires_in: int) -> None:
|
def save_tokens(self, access_token: str, expires_at: int) -> None:
|
||||||
"""
|
"""
|
||||||
Save the access token and its expiration time.
|
Save the access token and its expiration time.
|
||||||
|
|
||||||
:param access_token: The access token to save.
|
:param access_token: The access token to save.
|
||||||
:param expires_in: The expiration time of the access token in seconds.
|
:param expires_at: The UNIX timestamp of the expiration time.
|
||||||
"""
|
"""
|
||||||
expiration_time = datetime.now() + timedelta(seconds=expires_in)
|
expiration_time = datetime.fromtimestamp(expires_at)
|
||||||
data = {
|
data = {
|
||||||
"access_token": access_token,
|
"access_token": access_token,
|
||||||
"expiration": expiration_time.isoformat(),
|
"expiration": expiration_time.isoformat(),
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from importlib.metadata import version as get_version
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
from crewai.cli.config import Settings
|
||||||
from crewai.cli.add_crew_to_flow import add_crew_to_flow
|
from crewai.cli.add_crew_to_flow import add_crew_to_flow
|
||||||
from crewai.cli.create_crew import create_crew
|
from crewai.cli.create_crew import create_crew
|
||||||
from crewai.cli.create_flow import create_flow
|
from crewai.cli.create_flow import create_flow
|
||||||
@@ -138,8 +138,12 @@ def log_tasks_outputs() -> None:
|
|||||||
@click.option("-s", "--short", is_flag=True, help="Reset SHORT TERM memory")
|
@click.option("-s", "--short", is_flag=True, help="Reset SHORT TERM memory")
|
||||||
@click.option("-e", "--entities", is_flag=True, help="Reset ENTITIES memory")
|
@click.option("-e", "--entities", is_flag=True, help="Reset ENTITIES memory")
|
||||||
@click.option("-kn", "--knowledge", is_flag=True, help="Reset KNOWLEDGE storage")
|
@click.option("-kn", "--knowledge", is_flag=True, help="Reset KNOWLEDGE storage")
|
||||||
@click.option("-akn", "--agent-knowledge", is_flag=True, help="Reset AGENT KNOWLEDGE storage")
|
@click.option(
|
||||||
@click.option("-k","--kickoff-outputs",is_flag=True,help="Reset LATEST KICKOFF TASK OUTPUTS")
|
"-akn", "--agent-knowledge", is_flag=True, help="Reset AGENT KNOWLEDGE storage"
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"-k", "--kickoff-outputs", is_flag=True, help="Reset LATEST KICKOFF TASK OUTPUTS"
|
||||||
|
)
|
||||||
@click.option("-a", "--all", is_flag=True, help="Reset ALL memories")
|
@click.option("-a", "--all", is_flag=True, help="Reset ALL memories")
|
||||||
def reset_memories(
|
def reset_memories(
|
||||||
long: bool,
|
long: bool,
|
||||||
@@ -154,13 +158,23 @@ def reset_memories(
|
|||||||
Reset the crew memories (long, short, entity, latest_crew_kickoff_ouputs, knowledge, agent_knowledge). This will delete all the data saved.
|
Reset the crew memories (long, short, entity, latest_crew_kickoff_ouputs, knowledge, agent_knowledge). This will delete all the data saved.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
memory_types = [long, short, entities, knowledge, agent_knowledge, kickoff_outputs, all]
|
memory_types = [
|
||||||
|
long,
|
||||||
|
short,
|
||||||
|
entities,
|
||||||
|
knowledge,
|
||||||
|
agent_knowledge,
|
||||||
|
kickoff_outputs,
|
||||||
|
all,
|
||||||
|
]
|
||||||
if not any(memory_types):
|
if not any(memory_types):
|
||||||
click.echo(
|
click.echo(
|
||||||
"Please specify at least one memory type to reset using the appropriate flags."
|
"Please specify at least one memory type to reset using the appropriate flags."
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
reset_memories_command(long, short, entities, knowledge, agent_knowledge, kickoff_outputs, all)
|
reset_memories_command(
|
||||||
|
long, short, entities, knowledge, agent_knowledge, kickoff_outputs, all
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
click.echo(f"An error occurred while resetting memories: {e}", err=True)
|
click.echo(f"An error occurred while resetting memories: {e}", err=True)
|
||||||
|
|
||||||
@@ -210,16 +224,11 @@ def update():
|
|||||||
update_crew()
|
update_crew()
|
||||||
|
|
||||||
|
|
||||||
@crewai.command()
|
|
||||||
def signup():
|
|
||||||
"""Sign Up/Login to CrewAI+."""
|
|
||||||
AuthenticationCommand().signup()
|
|
||||||
|
|
||||||
|
|
||||||
@crewai.command()
|
@crewai.command()
|
||||||
def login():
|
def login():
|
||||||
"""Sign Up/Login to CrewAI+."""
|
"""Sign Up/Login to CrewAI Enterprise."""
|
||||||
AuthenticationCommand().signup()
|
Settings().clear()
|
||||||
|
AuthenticationCommand().login()
|
||||||
|
|
||||||
|
|
||||||
# DEPLOY CREWAI+ COMMANDS
|
# DEPLOY CREWAI+ COMMANDS
|
||||||
|
|||||||
@@ -37,6 +37,10 @@ class Settings(BaseModel):
|
|||||||
merged_data = {**file_data, **data}
|
merged_data = {**file_data, **data}
|
||||||
super().__init__(config_path=config_path, **merged_data)
|
super().__init__(config_path=config_path, **merged_data)
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
"""Clear all settings"""
|
||||||
|
self.config_path.unlink(missing_ok=True)
|
||||||
|
|
||||||
def dump(self) -> None:
|
def dump(self) -> None:
|
||||||
"""Save current settings to settings.json"""
|
"""Save current settings to settings.json"""
|
||||||
if self.config_path.is_file():
|
if self.config_path.is_file():
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
|
|||||||
authors = [{ name = "Your Name", email = "you@example.com" }]
|
authors = [{ name = "Your Name", email = "you@example.com" }]
|
||||||
requires-python = ">=3.10,<3.14"
|
requires-python = ">=3.10,<3.14"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crewai[tools]>=0.134.0,<1.0.0"
|
"crewai[tools]>=0.148.0,<1.0.0"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
|
|||||||
authors = [{ name = "Your Name", email = "you@example.com" }]
|
authors = [{ name = "Your Name", email = "you@example.com" }]
|
||||||
requires-python = ">=3.10,<3.14"
|
requires-python = ">=3.10,<3.14"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crewai[tools]>=0.134.0,<1.0.0",
|
"crewai[tools]>=0.148.0,<1.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ description = "Power up your crews with {{folder_name}}"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10,<3.14"
|
requires-python = ">=3.10,<3.14"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crewai[tools]>=0.134.0"
|
"crewai[tools]>=0.148.0"
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.crewai]
|
[tool.crewai]
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
|
|||||||
|
|
||||||
console.print(f"Successfully installed {handle}", style="bold green")
|
console.print(f"Successfully installed {handle}", style="bold green")
|
||||||
|
|
||||||
def login(self):
|
def login(self) -> None:
|
||||||
login_response = self.plus_api_client.login_to_tool_repository()
|
login_response = self.plus_api_client.login_to_tool_repository()
|
||||||
|
|
||||||
if login_response.status_code != 200:
|
if login_response.status_code != 200:
|
||||||
@@ -175,18 +175,10 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
|
|||||||
settings.tool_repository_password = login_response_json["credential"][
|
settings.tool_repository_password = login_response_json["credential"][
|
||||||
"password"
|
"password"
|
||||||
]
|
]
|
||||||
settings.org_uuid = login_response_json["current_organization"][
|
settings.org_uuid = login_response_json["current_organization"]["uuid"]
|
||||||
"uuid"
|
settings.org_name = login_response_json["current_organization"]["name"]
|
||||||
]
|
|
||||||
settings.org_name = login_response_json["current_organization"][
|
|
||||||
"name"
|
|
||||||
]
|
|
||||||
settings.dump()
|
settings.dump()
|
||||||
|
|
||||||
console.print(
|
|
||||||
f"Successfully authenticated to the tool repository as {settings.org_name} ({settings.org_uuid}).", style="bold green"
|
|
||||||
)
|
|
||||||
|
|
||||||
def _add_package(self, tool_details: dict[str, Any]):
|
def _add_package(self, tool_details: dict[str, Any]):
|
||||||
is_from_pypi = tool_details.get("source", None) == "pypi"
|
is_from_pypi = tool_details.get("source", None) == "pypi"
|
||||||
tool_handle = tool_details["handle"]
|
tool_handle = tool_details["handle"]
|
||||||
@@ -243,9 +235,15 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
|
|||||||
|
|
||||||
return env
|
return env
|
||||||
|
|
||||||
def _print_current_organization(self):
|
def _print_current_organization(self) -> None:
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
if settings.org_uuid:
|
if settings.org_uuid:
|
||||||
console.print(f"Current organization: {settings.org_name} ({settings.org_uuid})", style="bold blue")
|
console.print(
|
||||||
|
f"Current organization: {settings.org_name} ({settings.org_uuid})",
|
||||||
|
style="bold blue",
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
console.print("No organization currently set. We recommend setting one before using: `crewai org switch <org_id>` command.", style="yellow")
|
console.print(
|
||||||
|
"No organization currently set. We recommend setting one before using: `crewai org switch <org_id>` command.",
|
||||||
|
style="yellow",
|
||||||
|
)
|
||||||
|
|||||||
@@ -18,6 +18,11 @@ from typing import (
|
|||||||
cast,
|
cast,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from opentelemetry import baggage
|
||||||
|
from opentelemetry.context import attach, detach
|
||||||
|
|
||||||
|
from crewai.utilities.crew.models import CrewContext
|
||||||
|
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
UUID4,
|
UUID4,
|
||||||
BaseModel,
|
BaseModel,
|
||||||
@@ -616,6 +621,11 @@ class Crew(FlowTrackable, BaseModel):
|
|||||||
self,
|
self,
|
||||||
inputs: Optional[Dict[str, Any]] = None,
|
inputs: Optional[Dict[str, Any]] = None,
|
||||||
) -> CrewOutput:
|
) -> CrewOutput:
|
||||||
|
ctx = baggage.set_baggage(
|
||||||
|
"crew_context", CrewContext(id=str(self.id), key=self.key)
|
||||||
|
)
|
||||||
|
token = attach(ctx)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for before_callback in self.before_kickoff_callbacks:
|
for before_callback in self.before_kickoff_callbacks:
|
||||||
if inputs is None:
|
if inputs is None:
|
||||||
@@ -676,6 +686,8 @@ class Crew(FlowTrackable, BaseModel):
|
|||||||
CrewKickoffFailedEvent(error=str(e), crew_name=self.name or "crew"),
|
CrewKickoffFailedEvent(error=str(e), crew_name=self.name or "crew"),
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
finally:
|
||||||
|
detach(token)
|
||||||
|
|
||||||
def kickoff_for_each(self, inputs: List[Dict[str, Any]]) -> List[CrewOutput]:
|
def kickoff_for_each(self, inputs: List[Dict[str, Any]]) -> List[CrewOutput]:
|
||||||
"""Executes the Crew's workflow for each input in the list and aggregates results."""
|
"""Executes the Crew's workflow for each input in the list and aggregates results."""
|
||||||
@@ -1319,6 +1331,7 @@ class Crew(FlowTrackable, BaseModel):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
test_crew = self.copy()
|
test_crew = self.copy()
|
||||||
|
|
||||||
evaluator = CrewEvaluator(test_crew, llm_instance)
|
evaluator = CrewEvaluator(test_crew, llm_instance)
|
||||||
|
|
||||||
for i in range(1, n_iterations + 1):
|
for i in range(1, n_iterations + 1):
|
||||||
|
|||||||
40
src/crewai/experimental/__init__.py
Normal file
40
src/crewai/experimental/__init__.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
from crewai.experimental.evaluation import (
|
||||||
|
BaseEvaluator,
|
||||||
|
EvaluationScore,
|
||||||
|
MetricCategory,
|
||||||
|
AgentEvaluationResult,
|
||||||
|
SemanticQualityEvaluator,
|
||||||
|
GoalAlignmentEvaluator,
|
||||||
|
ReasoningEfficiencyEvaluator,
|
||||||
|
ToolSelectionEvaluator,
|
||||||
|
ParameterExtractionEvaluator,
|
||||||
|
ToolInvocationEvaluator,
|
||||||
|
EvaluationTraceCallback,
|
||||||
|
create_evaluation_callbacks,
|
||||||
|
AgentEvaluator,
|
||||||
|
create_default_evaluator,
|
||||||
|
ExperimentRunner,
|
||||||
|
ExperimentResults,
|
||||||
|
ExperimentResult,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BaseEvaluator",
|
||||||
|
"EvaluationScore",
|
||||||
|
"MetricCategory",
|
||||||
|
"AgentEvaluationResult",
|
||||||
|
"SemanticQualityEvaluator",
|
||||||
|
"GoalAlignmentEvaluator",
|
||||||
|
"ReasoningEfficiencyEvaluator",
|
||||||
|
"ToolSelectionEvaluator",
|
||||||
|
"ParameterExtractionEvaluator",
|
||||||
|
"ToolInvocationEvaluator",
|
||||||
|
"EvaluationTraceCallback",
|
||||||
|
"create_evaluation_callbacks",
|
||||||
|
"AgentEvaluator",
|
||||||
|
"create_default_evaluator",
|
||||||
|
"ExperimentRunner",
|
||||||
|
"ExperimentResults",
|
||||||
|
"ExperimentResult"
|
||||||
|
]
|
||||||
51
src/crewai/experimental/evaluation/__init__.py
Normal file
51
src/crewai/experimental/evaluation/__init__.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
from crewai.experimental.evaluation.base_evaluator import (
|
||||||
|
BaseEvaluator,
|
||||||
|
EvaluationScore,
|
||||||
|
MetricCategory,
|
||||||
|
AgentEvaluationResult
|
||||||
|
)
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.metrics import (
|
||||||
|
SemanticQualityEvaluator,
|
||||||
|
GoalAlignmentEvaluator,
|
||||||
|
ReasoningEfficiencyEvaluator,
|
||||||
|
ToolSelectionEvaluator,
|
||||||
|
ParameterExtractionEvaluator,
|
||||||
|
ToolInvocationEvaluator
|
||||||
|
)
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.evaluation_listener import (
|
||||||
|
EvaluationTraceCallback,
|
||||||
|
create_evaluation_callbacks
|
||||||
|
)
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.agent_evaluator import (
|
||||||
|
AgentEvaluator,
|
||||||
|
create_default_evaluator
|
||||||
|
)
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.experiment import (
|
||||||
|
ExperimentRunner,
|
||||||
|
ExperimentResults,
|
||||||
|
ExperimentResult
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BaseEvaluator",
|
||||||
|
"EvaluationScore",
|
||||||
|
"MetricCategory",
|
||||||
|
"AgentEvaluationResult",
|
||||||
|
"SemanticQualityEvaluator",
|
||||||
|
"GoalAlignmentEvaluator",
|
||||||
|
"ReasoningEfficiencyEvaluator",
|
||||||
|
"ToolSelectionEvaluator",
|
||||||
|
"ParameterExtractionEvaluator",
|
||||||
|
"ToolInvocationEvaluator",
|
||||||
|
"EvaluationTraceCallback",
|
||||||
|
"create_evaluation_callbacks",
|
||||||
|
"AgentEvaluator",
|
||||||
|
"create_default_evaluator",
|
||||||
|
"ExperimentRunner",
|
||||||
|
"ExperimentResults",
|
||||||
|
"ExperimentResult"
|
||||||
|
]
|
||||||
245
src/crewai/experimental/evaluation/agent_evaluator.py
Normal file
245
src/crewai/experimental/evaluation/agent_evaluator.py
Normal file
@@ -0,0 +1,245 @@
|
|||||||
|
import threading
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.base_evaluator import AgentEvaluationResult, AggregationStrategy
|
||||||
|
from crewai.agent import Agent
|
||||||
|
from crewai.task import Task
|
||||||
|
from crewai.experimental.evaluation.evaluation_display import EvaluationDisplayFormatter
|
||||||
|
from crewai.utilities.events.agent_events import AgentEvaluationStartedEvent, AgentEvaluationCompletedEvent, AgentEvaluationFailedEvent
|
||||||
|
from crewai.experimental.evaluation import BaseEvaluator, create_evaluation_callbacks
|
||||||
|
from collections.abc import Sequence
|
||||||
|
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||||
|
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
|
||||||
|
from crewai.utilities.events.task_events import TaskCompletedEvent
|
||||||
|
from crewai.utilities.events.agent_events import LiteAgentExecutionCompletedEvent
|
||||||
|
from crewai.experimental.evaluation.base_evaluator import AgentAggregatedEvaluationResult, EvaluationScore, MetricCategory
|
||||||
|
|
||||||
|
class ExecutionState:
|
||||||
|
def __init__(self):
|
||||||
|
self.traces = {}
|
||||||
|
self.current_agent_id: str | None = None
|
||||||
|
self.current_task_id: str | None = None
|
||||||
|
self.iteration = 1
|
||||||
|
self.iterations_results = {}
|
||||||
|
self.agent_evaluators = {}
|
||||||
|
|
||||||
|
class AgentEvaluator:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
agents: list[Agent],
|
||||||
|
evaluators: Sequence[BaseEvaluator] | None = None,
|
||||||
|
):
|
||||||
|
self.agents: list[Agent] = agents
|
||||||
|
self.evaluators: Sequence[BaseEvaluator] | None = evaluators
|
||||||
|
|
||||||
|
self.callback = create_evaluation_callbacks()
|
||||||
|
self.console_formatter = ConsoleFormatter()
|
||||||
|
self.display_formatter = EvaluationDisplayFormatter()
|
||||||
|
|
||||||
|
self._thread_local: threading.local = threading.local()
|
||||||
|
|
||||||
|
for agent in self.agents:
|
||||||
|
self._execution_state.agent_evaluators[str(agent.id)] = self.evaluators
|
||||||
|
|
||||||
|
self._subscribe_to_events()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _execution_state(self) -> ExecutionState:
|
||||||
|
if not hasattr(self._thread_local, 'execution_state'):
|
||||||
|
self._thread_local.execution_state = ExecutionState()
|
||||||
|
return self._thread_local.execution_state
|
||||||
|
|
||||||
|
def _subscribe_to_events(self) -> None:
|
||||||
|
from typing import cast
|
||||||
|
crewai_event_bus.register_handler(TaskCompletedEvent, cast(Any, self._handle_task_completed))
|
||||||
|
crewai_event_bus.register_handler(LiteAgentExecutionCompletedEvent, cast(Any, self._handle_lite_agent_completed))
|
||||||
|
|
||||||
|
def _handle_task_completed(self, source: Any, event: TaskCompletedEvent) -> None:
|
||||||
|
assert event.task is not None
|
||||||
|
agent = event.task.agent
|
||||||
|
if agent and str(getattr(agent, 'id', 'unknown')) in self._execution_state.agent_evaluators:
|
||||||
|
self.emit_evaluation_started_event(agent_role=agent.role, agent_id=str(agent.id), task_id=str(event.task.id))
|
||||||
|
|
||||||
|
state = ExecutionState()
|
||||||
|
state.current_agent_id = str(agent.id)
|
||||||
|
state.current_task_id = str(event.task.id)
|
||||||
|
|
||||||
|
assert state.current_agent_id is not None and state.current_task_id is not None
|
||||||
|
trace = self.callback.get_trace(state.current_agent_id, state.current_task_id)
|
||||||
|
|
||||||
|
if not trace:
|
||||||
|
return
|
||||||
|
|
||||||
|
result = self.evaluate(
|
||||||
|
agent=agent,
|
||||||
|
task=event.task,
|
||||||
|
execution_trace=trace,
|
||||||
|
final_output=event.output,
|
||||||
|
state=state
|
||||||
|
)
|
||||||
|
|
||||||
|
current_iteration = self._execution_state.iteration
|
||||||
|
if current_iteration not in self._execution_state.iterations_results:
|
||||||
|
self._execution_state.iterations_results[current_iteration] = {}
|
||||||
|
|
||||||
|
if agent.role not in self._execution_state.iterations_results[current_iteration]:
|
||||||
|
self._execution_state.iterations_results[current_iteration][agent.role] = []
|
||||||
|
|
||||||
|
self._execution_state.iterations_results[current_iteration][agent.role].append(result)
|
||||||
|
|
||||||
|
def _handle_lite_agent_completed(self, source: object, event: LiteAgentExecutionCompletedEvent) -> None:
|
||||||
|
agent_info = event.agent_info
|
||||||
|
agent_id = str(agent_info["id"])
|
||||||
|
|
||||||
|
if agent_id in self._execution_state.agent_evaluators:
|
||||||
|
state = ExecutionState()
|
||||||
|
state.current_agent_id = agent_id
|
||||||
|
state.current_task_id = "lite_task"
|
||||||
|
|
||||||
|
target_agent = None
|
||||||
|
for agent in self.agents:
|
||||||
|
if str(agent.id) == agent_id:
|
||||||
|
target_agent = agent
|
||||||
|
break
|
||||||
|
|
||||||
|
if not target_agent:
|
||||||
|
return
|
||||||
|
|
||||||
|
assert state.current_agent_id is not None and state.current_task_id is not None
|
||||||
|
trace = self.callback.get_trace(state.current_agent_id, state.current_task_id)
|
||||||
|
|
||||||
|
if not trace:
|
||||||
|
return
|
||||||
|
|
||||||
|
result = self.evaluate(
|
||||||
|
agent=target_agent,
|
||||||
|
execution_trace=trace,
|
||||||
|
final_output=event.output,
|
||||||
|
state=state
|
||||||
|
)
|
||||||
|
|
||||||
|
current_iteration = self._execution_state.iteration
|
||||||
|
if current_iteration not in self._execution_state.iterations_results:
|
||||||
|
self._execution_state.iterations_results[current_iteration] = {}
|
||||||
|
|
||||||
|
agent_role = target_agent.role
|
||||||
|
if agent_role not in self._execution_state.iterations_results[current_iteration]:
|
||||||
|
self._execution_state.iterations_results[current_iteration][agent_role] = []
|
||||||
|
|
||||||
|
self._execution_state.iterations_results[current_iteration][agent_role].append(result)
|
||||||
|
|
||||||
|
def set_iteration(self, iteration: int) -> None:
|
||||||
|
self._execution_state.iteration = iteration
|
||||||
|
|
||||||
|
def reset_iterations_results(self) -> None:
|
||||||
|
self._execution_state.iterations_results = {}
|
||||||
|
|
||||||
|
def get_evaluation_results(self) -> dict[str, list[AgentEvaluationResult]]:
|
||||||
|
if self._execution_state.iterations_results and self._execution_state.iteration in self._execution_state.iterations_results:
|
||||||
|
return self._execution_state.iterations_results[self._execution_state.iteration]
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def display_results_with_iterations(self) -> None:
|
||||||
|
self.display_formatter.display_summary_results(self._execution_state.iterations_results)
|
||||||
|
|
||||||
|
def get_agent_evaluation(self, strategy: AggregationStrategy = AggregationStrategy.SIMPLE_AVERAGE, include_evaluation_feedback: bool = True) -> dict[str, AgentAggregatedEvaluationResult]:
|
||||||
|
agent_results = {}
|
||||||
|
with crewai_event_bus.scoped_handlers():
|
||||||
|
task_results = self.get_evaluation_results()
|
||||||
|
for agent_role, results in task_results.items():
|
||||||
|
if not results:
|
||||||
|
continue
|
||||||
|
|
||||||
|
agent_id = results[0].agent_id
|
||||||
|
|
||||||
|
aggregated_result = self.display_formatter._aggregate_agent_results(
|
||||||
|
agent_id=agent_id,
|
||||||
|
agent_role=agent_role,
|
||||||
|
results=results,
|
||||||
|
strategy=strategy
|
||||||
|
)
|
||||||
|
|
||||||
|
agent_results[agent_role] = aggregated_result
|
||||||
|
|
||||||
|
|
||||||
|
if self._execution_state.iterations_results and self._execution_state.iteration == max(self._execution_state.iterations_results.keys(), default=0):
|
||||||
|
self.display_results_with_iterations()
|
||||||
|
|
||||||
|
if include_evaluation_feedback:
|
||||||
|
self.display_evaluation_with_feedback()
|
||||||
|
|
||||||
|
return agent_results
|
||||||
|
|
||||||
|
def display_evaluation_with_feedback(self) -> None:
|
||||||
|
self.display_formatter.display_evaluation_with_feedback(self._execution_state.iterations_results)
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
agent: Agent,
|
||||||
|
execution_trace: dict[str, Any],
|
||||||
|
final_output: Any,
|
||||||
|
state: ExecutionState,
|
||||||
|
task: Task | None = None,
|
||||||
|
) -> AgentEvaluationResult:
|
||||||
|
result = AgentEvaluationResult(
|
||||||
|
agent_id=state.current_agent_id or str(agent.id),
|
||||||
|
task_id=state.current_task_id or (str(task.id) if task else "unknown_task")
|
||||||
|
)
|
||||||
|
|
||||||
|
assert self.evaluators is not None
|
||||||
|
task_id = str(task.id) if task else None
|
||||||
|
for evaluator in self.evaluators:
|
||||||
|
try:
|
||||||
|
self.emit_evaluation_started_event(agent_role=agent.role, agent_id=str(agent.id), task_id=task_id)
|
||||||
|
score = evaluator.evaluate(
|
||||||
|
agent=agent,
|
||||||
|
task=task,
|
||||||
|
execution_trace=execution_trace,
|
||||||
|
final_output=final_output
|
||||||
|
)
|
||||||
|
result.metrics[evaluator.metric_category] = score
|
||||||
|
self.emit_evaluation_completed_event(agent_role=agent.role, agent_id=str(agent.id), task_id=task_id, metric_category=evaluator.metric_category, score=score)
|
||||||
|
except Exception as e:
|
||||||
|
self.emit_evaluation_failed_event(agent_role=agent.role, agent_id=str(agent.id), task_id=task_id, error=str(e))
|
||||||
|
self.console_formatter.print(f"Error in {evaluator.metric_category.value} evaluator: {str(e)}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def emit_evaluation_started_event(self, agent_role: str, agent_id: str, task_id: str | None = None):
|
||||||
|
crewai_event_bus.emit(
|
||||||
|
self,
|
||||||
|
AgentEvaluationStartedEvent(agent_role=agent_role, agent_id=agent_id, task_id=task_id, iteration=self._execution_state.iteration)
|
||||||
|
)
|
||||||
|
|
||||||
|
def emit_evaluation_completed_event(self, agent_role: str, agent_id: str, task_id: str | None = None, metric_category: MetricCategory | None = None, score: EvaluationScore | None = None):
|
||||||
|
crewai_event_bus.emit(
|
||||||
|
self,
|
||||||
|
AgentEvaluationCompletedEvent(agent_role=agent_role, agent_id=agent_id, task_id=task_id, iteration=self._execution_state.iteration, metric_category=metric_category, score=score)
|
||||||
|
)
|
||||||
|
|
||||||
|
def emit_evaluation_failed_event(self, agent_role: str, agent_id: str, error: str, task_id: str | None = None):
|
||||||
|
crewai_event_bus.emit(
|
||||||
|
self,
|
||||||
|
AgentEvaluationFailedEvent(agent_role=agent_role, agent_id=agent_id, task_id=task_id, iteration=self._execution_state.iteration, error=error)
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_default_evaluator(agents: list[Agent], llm: None = None):
|
||||||
|
from crewai.experimental.evaluation import (
|
||||||
|
GoalAlignmentEvaluator,
|
||||||
|
SemanticQualityEvaluator,
|
||||||
|
ToolSelectionEvaluator,
|
||||||
|
ParameterExtractionEvaluator,
|
||||||
|
ToolInvocationEvaluator,
|
||||||
|
ReasoningEfficiencyEvaluator
|
||||||
|
)
|
||||||
|
|
||||||
|
evaluators = [
|
||||||
|
GoalAlignmentEvaluator(llm=llm),
|
||||||
|
SemanticQualityEvaluator(llm=llm),
|
||||||
|
ToolSelectionEvaluator(llm=llm),
|
||||||
|
ParameterExtractionEvaluator(llm=llm),
|
||||||
|
ToolInvocationEvaluator(llm=llm),
|
||||||
|
ReasoningEfficiencyEvaluator(llm=llm),
|
||||||
|
]
|
||||||
|
|
||||||
|
return AgentEvaluator(evaluators=evaluators, agents=agents)
|
||||||
125
src/crewai/experimental/evaluation/base_evaluator.py
Normal file
125
src/crewai/experimental/evaluation/base_evaluator.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
import abc
|
||||||
|
import enum
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from crewai.agent import Agent
|
||||||
|
from crewai.task import Task
|
||||||
|
from crewai.llm import BaseLLM
|
||||||
|
from crewai.utilities.llm_utils import create_llm
|
||||||
|
|
||||||
|
class MetricCategory(enum.Enum):
|
||||||
|
GOAL_ALIGNMENT = "goal_alignment"
|
||||||
|
SEMANTIC_QUALITY = "semantic_quality"
|
||||||
|
REASONING_EFFICIENCY = "reasoning_efficiency"
|
||||||
|
TOOL_SELECTION = "tool_selection"
|
||||||
|
PARAMETER_EXTRACTION = "parameter_extraction"
|
||||||
|
TOOL_INVOCATION = "tool_invocation"
|
||||||
|
|
||||||
|
def title(self):
|
||||||
|
return self.value.replace('_', ' ').title()
|
||||||
|
|
||||||
|
|
||||||
|
class EvaluationScore(BaseModel):
|
||||||
|
score: float | None = Field(
|
||||||
|
default=5.0,
|
||||||
|
description="Numeric score from 0-10 where 0 is worst and 10 is best, None if not applicable",
|
||||||
|
ge=0.0,
|
||||||
|
le=10.0
|
||||||
|
)
|
||||||
|
feedback: str = Field(
|
||||||
|
default="",
|
||||||
|
description="Detailed feedback explaining the evaluation score"
|
||||||
|
)
|
||||||
|
raw_response: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Raw response from the evaluator (e.g., LLM)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
if self.score is None:
|
||||||
|
return f"Score: N/A - {self.feedback}"
|
||||||
|
return f"Score: {self.score:.1f}/10 - {self.feedback}"
|
||||||
|
|
||||||
|
|
||||||
|
class BaseEvaluator(abc.ABC):
|
||||||
|
def __init__(self, llm: BaseLLM | None = None):
|
||||||
|
self.llm: BaseLLM | None = create_llm(llm)
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abc.abstractmethod
|
||||||
|
def metric_category(self) -> MetricCategory:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
agent: Agent,
|
||||||
|
execution_trace: Dict[str, Any],
|
||||||
|
final_output: Any,
|
||||||
|
task: Task | None = None,
|
||||||
|
) -> EvaluationScore:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AgentEvaluationResult(BaseModel):
|
||||||
|
agent_id: str = Field(description="ID of the evaluated agent")
|
||||||
|
task_id: str = Field(description="ID of the task that was executed")
|
||||||
|
metrics: Dict[MetricCategory, EvaluationScore] = Field(
|
||||||
|
default_factory=dict,
|
||||||
|
description="Evaluation scores for each metric category"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AggregationStrategy(Enum):
|
||||||
|
SIMPLE_AVERAGE = "simple_average" # Equal weight to all tasks
|
||||||
|
WEIGHTED_BY_COMPLEXITY = "weighted_by_complexity" # Weight by task complexity
|
||||||
|
BEST_PERFORMANCE = "best_performance" # Use best scores across tasks
|
||||||
|
WORST_PERFORMANCE = "worst_performance" # Use worst scores across tasks
|
||||||
|
|
||||||
|
|
||||||
|
class AgentAggregatedEvaluationResult(BaseModel):
|
||||||
|
agent_id: str = Field(
|
||||||
|
default="",
|
||||||
|
description="ID of the agent"
|
||||||
|
)
|
||||||
|
agent_role: str = Field(
|
||||||
|
default="",
|
||||||
|
description="Role of the agent"
|
||||||
|
)
|
||||||
|
task_count: int = Field(
|
||||||
|
default=0,
|
||||||
|
description="Number of tasks included in this aggregation"
|
||||||
|
)
|
||||||
|
aggregation_strategy: AggregationStrategy = Field(
|
||||||
|
default=AggregationStrategy.SIMPLE_AVERAGE,
|
||||||
|
description="Strategy used for aggregation"
|
||||||
|
)
|
||||||
|
metrics: Dict[MetricCategory, EvaluationScore] = Field(
|
||||||
|
default_factory=dict,
|
||||||
|
description="Aggregated metrics across all tasks"
|
||||||
|
)
|
||||||
|
task_results: List[str] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="IDs of tasks included in this aggregation"
|
||||||
|
)
|
||||||
|
overall_score: Optional[float] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Overall score for this agent"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
result = f"Agent Evaluation: {self.agent_role}\n"
|
||||||
|
result += f"Strategy: {self.aggregation_strategy.value}\n"
|
||||||
|
result += f"Tasks evaluated: {self.task_count}\n"
|
||||||
|
|
||||||
|
for category, score in self.metrics.items():
|
||||||
|
result += f"\n\n- {category.value.upper()}: {score.score}/10\n"
|
||||||
|
|
||||||
|
if score.feedback:
|
||||||
|
detailed_feedback = "\n ".join(score.feedback.split('\n'))
|
||||||
|
result += f" {detailed_feedback}\n"
|
||||||
|
|
||||||
|
return result
|
||||||
333
src/crewai/experimental/evaluation/evaluation_display.py
Normal file
333
src/crewai/experimental/evaluation/evaluation_display.py
Normal file
@@ -0,0 +1,333 @@
|
|||||||
|
from collections import defaultdict
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.box import HEAVY_EDGE, ROUNDED
|
||||||
|
from collections.abc import Sequence
|
||||||
|
from crewai.experimental.evaluation.base_evaluator import AgentAggregatedEvaluationResult, AggregationStrategy, AgentEvaluationResult, MetricCategory
|
||||||
|
from crewai.experimental.evaluation import EvaluationScore
|
||||||
|
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
|
||||||
|
from crewai.utilities.llm_utils import create_llm
|
||||||
|
|
||||||
|
class EvaluationDisplayFormatter:
|
||||||
|
def __init__(self):
|
||||||
|
self.console_formatter = ConsoleFormatter()
|
||||||
|
|
||||||
|
def display_evaluation_with_feedback(self, iterations_results: Dict[int, Dict[str, List[Any]]]):
|
||||||
|
if not iterations_results:
|
||||||
|
self.console_formatter.print("[yellow]No evaluation results to display[/yellow]")
|
||||||
|
return
|
||||||
|
|
||||||
|
all_agent_roles: set[str] = set()
|
||||||
|
for iter_results in iterations_results.values():
|
||||||
|
all_agent_roles.update(iter_results.keys())
|
||||||
|
|
||||||
|
for agent_role in sorted(all_agent_roles):
|
||||||
|
self.console_formatter.print(f"\n[bold cyan]Agent: {agent_role}[/bold cyan]")
|
||||||
|
|
||||||
|
for iter_num, results in sorted(iterations_results.items()):
|
||||||
|
if agent_role not in results or not results[agent_role]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
agent_results = results[agent_role]
|
||||||
|
agent_id = agent_results[0].agent_id
|
||||||
|
|
||||||
|
aggregated_result = self._aggregate_agent_results(
|
||||||
|
agent_id=agent_id,
|
||||||
|
agent_role=agent_role,
|
||||||
|
results=agent_results,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.console_formatter.print(f"\n[bold]Iteration {iter_num}[/bold]")
|
||||||
|
|
||||||
|
table = Table(box=ROUNDED)
|
||||||
|
table.add_column("Metric", style="cyan")
|
||||||
|
table.add_column("Score (1-10)", justify="center")
|
||||||
|
table.add_column("Feedback", style="green")
|
||||||
|
|
||||||
|
if aggregated_result.metrics:
|
||||||
|
for metric, evaluation_score in aggregated_result.metrics.items():
|
||||||
|
score = evaluation_score.score
|
||||||
|
|
||||||
|
if isinstance(score, (int, float)):
|
||||||
|
if score >= 8.0:
|
||||||
|
score_text = f"[green]{score:.1f}[/green]"
|
||||||
|
elif score >= 6.0:
|
||||||
|
score_text = f"[cyan]{score:.1f}[/cyan]"
|
||||||
|
elif score >= 4.0:
|
||||||
|
score_text = f"[yellow]{score:.1f}[/yellow]"
|
||||||
|
else:
|
||||||
|
score_text = f"[red]{score:.1f}[/red]"
|
||||||
|
else:
|
||||||
|
score_text = "[dim]N/A[/dim]"
|
||||||
|
|
||||||
|
table.add_section()
|
||||||
|
table.add_row(
|
||||||
|
metric.title(),
|
||||||
|
score_text,
|
||||||
|
evaluation_score.feedback or ""
|
||||||
|
)
|
||||||
|
|
||||||
|
if aggregated_result.overall_score is not None:
|
||||||
|
overall_score = aggregated_result.overall_score
|
||||||
|
if overall_score >= 8.0:
|
||||||
|
overall_color = "green"
|
||||||
|
elif overall_score >= 6.0:
|
||||||
|
overall_color = "cyan"
|
||||||
|
elif overall_score >= 4.0:
|
||||||
|
overall_color = "yellow"
|
||||||
|
else:
|
||||||
|
overall_color = "red"
|
||||||
|
|
||||||
|
table.add_section()
|
||||||
|
table.add_row(
|
||||||
|
"Overall Score",
|
||||||
|
f"[{overall_color}]{overall_score:.1f}[/]",
|
||||||
|
"Overall agent evaluation score"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.console_formatter.print(table)
|
||||||
|
|
||||||
|
def display_summary_results(self, iterations_results: Dict[int, Dict[str, List[AgentAggregatedEvaluationResult]]]):
|
||||||
|
if not iterations_results:
|
||||||
|
self.console_formatter.print("[yellow]No evaluation results to display[/yellow]")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.console_formatter.print("\n")
|
||||||
|
|
||||||
|
table = Table(title="Agent Performance Scores \n (1-10 Higher is better)", box=HEAVY_EDGE)
|
||||||
|
|
||||||
|
table.add_column("Agent/Metric", style="cyan")
|
||||||
|
|
||||||
|
for iter_num in sorted(iterations_results.keys()):
|
||||||
|
run_label = f"Run {iter_num}"
|
||||||
|
table.add_column(run_label, justify="center")
|
||||||
|
|
||||||
|
table.add_column("Avg. Total", justify="center")
|
||||||
|
|
||||||
|
all_agent_roles: set[str] = set()
|
||||||
|
for results in iterations_results.values():
|
||||||
|
all_agent_roles.update(results.keys())
|
||||||
|
|
||||||
|
for agent_role in sorted(all_agent_roles):
|
||||||
|
agent_scores_by_iteration = {}
|
||||||
|
agent_metrics_by_iteration = {}
|
||||||
|
|
||||||
|
for iter_num, results in sorted(iterations_results.items()):
|
||||||
|
if agent_role not in results or not results[agent_role]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
agent_results = results[agent_role]
|
||||||
|
agent_id = agent_results[0].agent_id
|
||||||
|
|
||||||
|
aggregated_result = self._aggregate_agent_results(
|
||||||
|
agent_id=agent_id,
|
||||||
|
agent_role=agent_role,
|
||||||
|
results=agent_results,
|
||||||
|
strategy=AggregationStrategy.SIMPLE_AVERAGE
|
||||||
|
)
|
||||||
|
|
||||||
|
valid_scores = [score.score for score in aggregated_result.metrics.values()
|
||||||
|
if score.score is not None]
|
||||||
|
if valid_scores:
|
||||||
|
avg_score = sum(valid_scores) / len(valid_scores)
|
||||||
|
agent_scores_by_iteration[iter_num] = avg_score
|
||||||
|
|
||||||
|
agent_metrics_by_iteration[iter_num] = aggregated_result.metrics
|
||||||
|
|
||||||
|
if not agent_scores_by_iteration:
|
||||||
|
continue
|
||||||
|
|
||||||
|
avg_across_iterations = sum(agent_scores_by_iteration.values()) / len(agent_scores_by_iteration)
|
||||||
|
|
||||||
|
row = [f"[bold]{agent_role}[/bold]"]
|
||||||
|
|
||||||
|
for iter_num in sorted(iterations_results.keys()):
|
||||||
|
if iter_num in agent_scores_by_iteration:
|
||||||
|
score = agent_scores_by_iteration[iter_num]
|
||||||
|
if score >= 8.0:
|
||||||
|
color = "green"
|
||||||
|
elif score >= 6.0:
|
||||||
|
color = "cyan"
|
||||||
|
elif score >= 4.0:
|
||||||
|
color = "yellow"
|
||||||
|
else:
|
||||||
|
color = "red"
|
||||||
|
row.append(f"[bold {color}]{score:.1f}[/]")
|
||||||
|
else:
|
||||||
|
row.append("-")
|
||||||
|
|
||||||
|
if avg_across_iterations >= 8.0:
|
||||||
|
color = "green"
|
||||||
|
elif avg_across_iterations >= 6.0:
|
||||||
|
color = "cyan"
|
||||||
|
elif avg_across_iterations >= 4.0:
|
||||||
|
color = "yellow"
|
||||||
|
else:
|
||||||
|
color = "red"
|
||||||
|
row.append(f"[bold {color}]{avg_across_iterations:.1f}[/]")
|
||||||
|
|
||||||
|
table.add_row(*row)
|
||||||
|
|
||||||
|
all_metrics: set[Any] = set()
|
||||||
|
for metrics in agent_metrics_by_iteration.values():
|
||||||
|
all_metrics.update(metrics.keys())
|
||||||
|
|
||||||
|
for metric in sorted(all_metrics, key=lambda x: x.value):
|
||||||
|
metric_scores = []
|
||||||
|
|
||||||
|
row = [f" - {metric.title()}"]
|
||||||
|
|
||||||
|
for iter_num in sorted(iterations_results.keys()):
|
||||||
|
if (iter_num in agent_metrics_by_iteration and
|
||||||
|
metric in agent_metrics_by_iteration[iter_num]):
|
||||||
|
metric_score = agent_metrics_by_iteration[iter_num][metric].score
|
||||||
|
if metric_score is not None:
|
||||||
|
metric_scores.append(metric_score)
|
||||||
|
if metric_score >= 8.0:
|
||||||
|
color = "green"
|
||||||
|
elif metric_score >= 6.0:
|
||||||
|
color = "cyan"
|
||||||
|
elif metric_score >= 4.0:
|
||||||
|
color = "yellow"
|
||||||
|
else:
|
||||||
|
color = "red"
|
||||||
|
row.append(f"[{color}]{metric_score:.1f}[/]")
|
||||||
|
else:
|
||||||
|
row.append("[dim]N/A[/dim]")
|
||||||
|
else:
|
||||||
|
row.append("-")
|
||||||
|
|
||||||
|
if metric_scores:
|
||||||
|
avg = sum(metric_scores) / len(metric_scores)
|
||||||
|
if avg >= 8.0:
|
||||||
|
color = "green"
|
||||||
|
elif avg >= 6.0:
|
||||||
|
color = "cyan"
|
||||||
|
elif avg >= 4.0:
|
||||||
|
color = "yellow"
|
||||||
|
else:
|
||||||
|
color = "red"
|
||||||
|
row.append(f"[{color}]{avg:.1f}[/]")
|
||||||
|
else:
|
||||||
|
row.append("-")
|
||||||
|
|
||||||
|
table.add_row(*row)
|
||||||
|
|
||||||
|
table.add_row(*[""] * (len(sorted(iterations_results.keys())) + 2))
|
||||||
|
|
||||||
|
self.console_formatter.print(table)
|
||||||
|
self.console_formatter.print("\n")
|
||||||
|
|
||||||
|
def _aggregate_agent_results(
|
||||||
|
self,
|
||||||
|
agent_id: str,
|
||||||
|
agent_role: str,
|
||||||
|
results: Sequence[AgentEvaluationResult],
|
||||||
|
strategy: AggregationStrategy = AggregationStrategy.SIMPLE_AVERAGE,
|
||||||
|
) -> AgentAggregatedEvaluationResult:
|
||||||
|
metrics_by_category: dict[MetricCategory, list[EvaluationScore]] = defaultdict(list)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
for metric_name, evaluation_score in result.metrics.items():
|
||||||
|
metrics_by_category[metric_name].append(evaluation_score)
|
||||||
|
|
||||||
|
aggregated_metrics: dict[MetricCategory, EvaluationScore] = {}
|
||||||
|
for category, scores in metrics_by_category.items():
|
||||||
|
valid_scores = [s.score for s in scores if s.score is not None]
|
||||||
|
avg_score = sum(valid_scores) / len(valid_scores) if valid_scores else None
|
||||||
|
|
||||||
|
feedbacks = [s.feedback for s in scores if s.feedback]
|
||||||
|
|
||||||
|
feedback_summary = None
|
||||||
|
if feedbacks:
|
||||||
|
if len(feedbacks) > 1:
|
||||||
|
feedback_summary = self._summarize_feedbacks(
|
||||||
|
agent_role=agent_role,
|
||||||
|
metric=category.title(),
|
||||||
|
feedbacks=feedbacks,
|
||||||
|
scores=[s.score for s in scores],
|
||||||
|
strategy=strategy
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
feedback_summary = feedbacks[0]
|
||||||
|
|
||||||
|
aggregated_metrics[category] = EvaluationScore(
|
||||||
|
score=avg_score,
|
||||||
|
feedback=feedback_summary
|
||||||
|
)
|
||||||
|
|
||||||
|
overall_score = None
|
||||||
|
if aggregated_metrics:
|
||||||
|
valid_scores = [m.score for m in aggregated_metrics.values() if m.score is not None]
|
||||||
|
if valid_scores:
|
||||||
|
overall_score = sum(valid_scores) / len(valid_scores)
|
||||||
|
|
||||||
|
return AgentAggregatedEvaluationResult(
|
||||||
|
agent_id=agent_id,
|
||||||
|
agent_role=agent_role,
|
||||||
|
metrics=aggregated_metrics,
|
||||||
|
overall_score=overall_score,
|
||||||
|
task_count=len(results),
|
||||||
|
aggregation_strategy=strategy
|
||||||
|
)
|
||||||
|
|
||||||
|
def _summarize_feedbacks(
|
||||||
|
self,
|
||||||
|
agent_role: str,
|
||||||
|
metric: str,
|
||||||
|
feedbacks: List[str],
|
||||||
|
scores: List[float | None],
|
||||||
|
strategy: AggregationStrategy
|
||||||
|
) -> str:
|
||||||
|
if len(feedbacks) <= 2 and all(len(fb) < 200 for fb in feedbacks):
|
||||||
|
return "\n\n".join([f"Feedback {i+1}: {fb}" for i, fb in enumerate(feedbacks)])
|
||||||
|
|
||||||
|
try:
|
||||||
|
llm = create_llm()
|
||||||
|
|
||||||
|
formatted_feedbacks = []
|
||||||
|
for i, (feedback, score) in enumerate(zip(feedbacks, scores)):
|
||||||
|
if len(feedback) > 500:
|
||||||
|
feedback = feedback[:500] + "..."
|
||||||
|
score_text = f"{score:.1f}" if score is not None else "N/A"
|
||||||
|
formatted_feedbacks.append(f"Feedback #{i+1} (Score: {score_text}):\n{feedback}")
|
||||||
|
|
||||||
|
all_feedbacks = "\n\n" + "\n\n---\n\n".join(formatted_feedbacks)
|
||||||
|
|
||||||
|
strategy_guidance = ""
|
||||||
|
if strategy == AggregationStrategy.BEST_PERFORMANCE:
|
||||||
|
strategy_guidance = "Focus on the highest-scoring aspects and strengths demonstrated."
|
||||||
|
elif strategy == AggregationStrategy.WORST_PERFORMANCE:
|
||||||
|
strategy_guidance = "Focus on areas that need improvement and common issues across tasks."
|
||||||
|
else:
|
||||||
|
strategy_guidance = "Provide a balanced analysis of strengths and weaknesses across all tasks."
|
||||||
|
|
||||||
|
prompt = [
|
||||||
|
{"role": "system", "content": f"""You are an expert evaluator creating a comprehensive summary of agent performance feedback.
|
||||||
|
Your job is to synthesize multiple feedback points about the same metric across different tasks.
|
||||||
|
|
||||||
|
Create a concise, insightful summary that captures the key patterns and themes from all feedback.
|
||||||
|
{strategy_guidance}
|
||||||
|
|
||||||
|
Your summary should be:
|
||||||
|
1. Specific and concrete (not vague or general)
|
||||||
|
2. Focused on actionable insights
|
||||||
|
3. Highlighting patterns across tasks
|
||||||
|
4. 150-250 words in length
|
||||||
|
|
||||||
|
The summary should be directly usable as final feedback for the agent's performance on this metric."""},
|
||||||
|
{"role": "user", "content": f"""I need a synthesized summary of the following feedback for:
|
||||||
|
|
||||||
|
Agent Role: {agent_role}
|
||||||
|
Metric: {metric.title()}
|
||||||
|
|
||||||
|
{all_feedbacks}
|
||||||
|
"""}
|
||||||
|
]
|
||||||
|
assert llm is not None
|
||||||
|
response = llm.call(prompt)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return "Synthesized from multiple tasks: " + "\n\n".join([f"- {fb[:500]}..." for fb in feedbacks])
|
||||||
234
src/crewai/experimental/evaluation/evaluation_listener.py
Normal file
234
src/crewai/experimental/evaluation/evaluation_listener.py
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
from crewai.agent import Agent
|
||||||
|
from crewai.task import Task
|
||||||
|
from crewai.utilities.events.base_event_listener import BaseEventListener
|
||||||
|
from crewai.utilities.events.crewai_event_bus import CrewAIEventsBus
|
||||||
|
from crewai.utilities.events.agent_events import (
|
||||||
|
AgentExecutionStartedEvent,
|
||||||
|
AgentExecutionCompletedEvent,
|
||||||
|
LiteAgentExecutionStartedEvent,
|
||||||
|
LiteAgentExecutionCompletedEvent
|
||||||
|
)
|
||||||
|
from crewai.utilities.events.tool_usage_events import (
|
||||||
|
ToolUsageFinishedEvent,
|
||||||
|
ToolUsageErrorEvent,
|
||||||
|
ToolExecutionErrorEvent,
|
||||||
|
ToolSelectionErrorEvent,
|
||||||
|
ToolValidateInputErrorEvent
|
||||||
|
)
|
||||||
|
from crewai.utilities.events.llm_events import (
|
||||||
|
LLMCallStartedEvent,
|
||||||
|
LLMCallCompletedEvent
|
||||||
|
)
|
||||||
|
|
||||||
|
class EvaluationTraceCallback(BaseEventListener):
|
||||||
|
"""Event listener for collecting execution traces for evaluation.
|
||||||
|
|
||||||
|
This listener attaches to the event bus to collect detailed information
|
||||||
|
about the execution process, including agent steps, tool uses, knowledge
|
||||||
|
retrievals, and final output - all for use in agent evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_instance = None
|
||||||
|
|
||||||
|
def __new__(cls):
|
||||||
|
if cls._instance is None:
|
||||||
|
cls._instance = super().__new__(cls)
|
||||||
|
cls._instance._initialized = False
|
||||||
|
return cls._instance
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if not hasattr(self, "_initialized") or not self._initialized:
|
||||||
|
super().__init__()
|
||||||
|
self.traces = {}
|
||||||
|
self.current_agent_id = None
|
||||||
|
self.current_task_id = None
|
||||||
|
self._initialized = True
|
||||||
|
|
||||||
|
def setup_listeners(self, event_bus: CrewAIEventsBus):
|
||||||
|
@event_bus.on(AgentExecutionStartedEvent)
|
||||||
|
def on_agent_started(source, event: AgentExecutionStartedEvent):
|
||||||
|
self.on_agent_start(event.agent, event.task)
|
||||||
|
|
||||||
|
@event_bus.on(LiteAgentExecutionStartedEvent)
|
||||||
|
def on_lite_agent_started(source, event: LiteAgentExecutionStartedEvent):
|
||||||
|
self.on_lite_agent_start(event.agent_info)
|
||||||
|
|
||||||
|
@event_bus.on(AgentExecutionCompletedEvent)
|
||||||
|
def on_agent_completed(source, event: AgentExecutionCompletedEvent):
|
||||||
|
self.on_agent_finish(event.agent, event.task, event.output)
|
||||||
|
|
||||||
|
@event_bus.on(LiteAgentExecutionCompletedEvent)
|
||||||
|
def on_lite_agent_completed(source, event: LiteAgentExecutionCompletedEvent):
|
||||||
|
self.on_lite_agent_finish(event.output)
|
||||||
|
|
||||||
|
@event_bus.on(ToolUsageFinishedEvent)
|
||||||
|
def on_tool_completed(source, event: ToolUsageFinishedEvent):
|
||||||
|
self.on_tool_use(event.tool_name, event.tool_args, event.output, success=True)
|
||||||
|
|
||||||
|
@event_bus.on(ToolUsageErrorEvent)
|
||||||
|
def on_tool_usage_error(source, event: ToolUsageErrorEvent):
|
||||||
|
self.on_tool_use(event.tool_name, event.tool_args, event.error,
|
||||||
|
success=False, error_type="usage_error")
|
||||||
|
|
||||||
|
@event_bus.on(ToolExecutionErrorEvent)
|
||||||
|
def on_tool_execution_error(source, event: ToolExecutionErrorEvent):
|
||||||
|
self.on_tool_use(event.tool_name, event.tool_args, event.error,
|
||||||
|
success=False, error_type="execution_error")
|
||||||
|
|
||||||
|
@event_bus.on(ToolSelectionErrorEvent)
|
||||||
|
def on_tool_selection_error(source, event: ToolSelectionErrorEvent):
|
||||||
|
self.on_tool_use(event.tool_name, event.tool_args, event.error,
|
||||||
|
success=False, error_type="selection_error")
|
||||||
|
|
||||||
|
@event_bus.on(ToolValidateInputErrorEvent)
|
||||||
|
def on_tool_validate_input_error(source, event: ToolValidateInputErrorEvent):
|
||||||
|
self.on_tool_use(event.tool_name, event.tool_args, event.error,
|
||||||
|
success=False, error_type="validation_error")
|
||||||
|
|
||||||
|
@event_bus.on(LLMCallStartedEvent)
|
||||||
|
def on_llm_call_started(source, event: LLMCallStartedEvent):
|
||||||
|
self.on_llm_call_start(event.messages, event.tools)
|
||||||
|
|
||||||
|
@event_bus.on(LLMCallCompletedEvent)
|
||||||
|
def on_llm_call_completed(source, event: LLMCallCompletedEvent):
|
||||||
|
self.on_llm_call_end(event.messages, event.response)
|
||||||
|
|
||||||
|
def on_lite_agent_start(self, agent_info: dict[str, Any]):
|
||||||
|
self.current_agent_id = agent_info['id']
|
||||||
|
self.current_task_id = "lite_task"
|
||||||
|
|
||||||
|
trace_key = f"{self.current_agent_id}_{self.current_task_id}"
|
||||||
|
self._init_trace(
|
||||||
|
trace_key=trace_key,
|
||||||
|
agent_id=self.current_agent_id,
|
||||||
|
task_id=self.current_task_id,
|
||||||
|
tool_uses=[],
|
||||||
|
llm_calls=[],
|
||||||
|
start_time=datetime.now(),
|
||||||
|
final_output=None
|
||||||
|
)
|
||||||
|
|
||||||
|
def _init_trace(self, trace_key: str, **kwargs: Any):
|
||||||
|
self.traces[trace_key] = kwargs
|
||||||
|
|
||||||
|
def on_agent_start(self, agent: Agent, task: Task):
|
||||||
|
self.current_agent_id = agent.id
|
||||||
|
self.current_task_id = task.id
|
||||||
|
|
||||||
|
trace_key = f"{agent.id}_{task.id}"
|
||||||
|
self._init_trace(
|
||||||
|
trace_key=trace_key,
|
||||||
|
agent_id=agent.id,
|
||||||
|
task_id=task.id,
|
||||||
|
tool_uses=[],
|
||||||
|
llm_calls=[],
|
||||||
|
start_time=datetime.now(),
|
||||||
|
final_output=None
|
||||||
|
)
|
||||||
|
|
||||||
|
def on_agent_finish(self, agent: Agent, task: Task, output: Any):
|
||||||
|
trace_key = f"{agent.id}_{task.id}"
|
||||||
|
if trace_key in self.traces:
|
||||||
|
self.traces[trace_key]["final_output"] = output
|
||||||
|
self.traces[trace_key]["end_time"] = datetime.now()
|
||||||
|
|
||||||
|
self._reset_current()
|
||||||
|
|
||||||
|
def _reset_current(self):
|
||||||
|
self.current_agent_id = None
|
||||||
|
self.current_task_id = None
|
||||||
|
|
||||||
|
def on_lite_agent_finish(self, output: Any):
|
||||||
|
trace_key = f"{self.current_agent_id}_lite_task"
|
||||||
|
if trace_key in self.traces:
|
||||||
|
self.traces[trace_key]["final_output"] = output
|
||||||
|
self.traces[trace_key]["end_time"] = datetime.now()
|
||||||
|
|
||||||
|
self._reset_current()
|
||||||
|
|
||||||
|
def on_tool_use(self, tool_name: str, tool_args: dict[str, Any] | str, result: Any,
|
||||||
|
success: bool = True, error_type: str | None = None):
|
||||||
|
if not self.current_agent_id or not self.current_task_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
trace_key = f"{self.current_agent_id}_{self.current_task_id}"
|
||||||
|
if trace_key in self.traces:
|
||||||
|
tool_use = {
|
||||||
|
"tool": tool_name,
|
||||||
|
"args": tool_args,
|
||||||
|
"result": result,
|
||||||
|
"success": success,
|
||||||
|
"timestamp": datetime.now()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add error information if applicable
|
||||||
|
if not success and error_type:
|
||||||
|
tool_use["error"] = True
|
||||||
|
tool_use["error_type"] = error_type
|
||||||
|
|
||||||
|
self.traces[trace_key]["tool_uses"].append(tool_use)
|
||||||
|
|
||||||
|
def on_llm_call_start(self, messages: str | Sequence[dict[str, Any]] | None, tools: Sequence[dict[str, Any]] | None = None):
|
||||||
|
if not self.current_agent_id or not self.current_task_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
trace_key = f"{self.current_agent_id}_{self.current_task_id}"
|
||||||
|
if trace_key not in self.traces:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.current_llm_call = {
|
||||||
|
"messages": messages,
|
||||||
|
"tools": tools,
|
||||||
|
"start_time": datetime.now(),
|
||||||
|
"response": None,
|
||||||
|
"end_time": None
|
||||||
|
}
|
||||||
|
|
||||||
|
def on_llm_call_end(self, messages: str | list[dict[str, Any]] | None, response: Any):
|
||||||
|
if not self.current_agent_id or not self.current_task_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
trace_key = f"{self.current_agent_id}_{self.current_task_id}"
|
||||||
|
if trace_key not in self.traces:
|
||||||
|
return
|
||||||
|
|
||||||
|
total_tokens = 0
|
||||||
|
if hasattr(response, "usage") and hasattr(response.usage, "total_tokens"):
|
||||||
|
total_tokens = response.usage.total_tokens
|
||||||
|
|
||||||
|
current_time = datetime.now()
|
||||||
|
start_time = None
|
||||||
|
if hasattr(self, "current_llm_call") and self.current_llm_call:
|
||||||
|
start_time = self.current_llm_call.get("start_time")
|
||||||
|
|
||||||
|
if not start_time:
|
||||||
|
start_time = current_time
|
||||||
|
llm_call = {
|
||||||
|
"messages": messages,
|
||||||
|
"response": response,
|
||||||
|
"start_time": start_time,
|
||||||
|
"end_time": current_time,
|
||||||
|
"total_tokens": total_tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
self.traces[trace_key]["llm_calls"].append(llm_call)
|
||||||
|
|
||||||
|
if hasattr(self, "current_llm_call"):
|
||||||
|
self.current_llm_call = {}
|
||||||
|
|
||||||
|
def get_trace(self, agent_id: str, task_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
trace_key = f"{agent_id}_{task_id}"
|
||||||
|
return self.traces.get(trace_key)
|
||||||
|
|
||||||
|
|
||||||
|
def create_evaluation_callbacks() -> EvaluationTraceCallback:
|
||||||
|
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||||
|
|
||||||
|
callback = EvaluationTraceCallback()
|
||||||
|
callback.setup_listeners(crewai_event_bus)
|
||||||
|
return callback
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
from crewai.experimental.evaluation.experiment.runner import ExperimentRunner
|
||||||
|
from crewai.experimental.evaluation.experiment.result import ExperimentResults, ExperimentResult
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"ExperimentRunner",
|
||||||
|
"ExperimentResults",
|
||||||
|
"ExperimentResult"
|
||||||
|
]
|
||||||
122
src/crewai/experimental/evaluation/experiment/result.py
Normal file
122
src/crewai/experimental/evaluation/experiment/result.py
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
class ExperimentResult(BaseModel):
|
||||||
|
identifier: str
|
||||||
|
inputs: dict[str, Any]
|
||||||
|
score: int | dict[str, int | float]
|
||||||
|
expected_score: int | dict[str, int | float]
|
||||||
|
passed: bool
|
||||||
|
agent_evaluations: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
class ExperimentResults:
|
||||||
|
def __init__(self, results: list[ExperimentResult], metadata: dict[str, Any] | None = None):
|
||||||
|
self.results = results
|
||||||
|
self.metadata = metadata or {}
|
||||||
|
self.timestamp = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.experiment.result_display import ExperimentResultsDisplay
|
||||||
|
self.display = ExperimentResultsDisplay()
|
||||||
|
|
||||||
|
def to_json(self, filepath: str | None = None) -> dict[str, Any]:
|
||||||
|
data = {
|
||||||
|
"timestamp": self.timestamp.isoformat(),
|
||||||
|
"metadata": self.metadata,
|
||||||
|
"results": [r.model_dump(exclude={"agent_evaluations"}) for r in self.results]
|
||||||
|
}
|
||||||
|
|
||||||
|
if filepath:
|
||||||
|
with open(filepath, 'w') as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
self.display.console.print(f"[green]Results saved to {filepath}[/green]")
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def compare_with_baseline(self, baseline_filepath: str, save_current: bool = True, print_summary: bool = False) -> dict[str, Any]:
|
||||||
|
baseline_runs = []
|
||||||
|
|
||||||
|
if os.path.exists(baseline_filepath) and os.path.getsize(baseline_filepath) > 0:
|
||||||
|
try:
|
||||||
|
with open(baseline_filepath, 'r') as f:
|
||||||
|
baseline_data = json.load(f)
|
||||||
|
|
||||||
|
if isinstance(baseline_data, dict) and "timestamp" in baseline_data:
|
||||||
|
baseline_runs = [baseline_data]
|
||||||
|
elif isinstance(baseline_data, list):
|
||||||
|
baseline_runs = baseline_data
|
||||||
|
except (json.JSONDecodeError, FileNotFoundError) as e:
|
||||||
|
self.display.console.print(f"[yellow]Warning: Could not load baseline file: {str(e)}[/yellow]")
|
||||||
|
|
||||||
|
if not baseline_runs:
|
||||||
|
if save_current:
|
||||||
|
current_data = self.to_json()
|
||||||
|
with open(baseline_filepath, 'w') as f:
|
||||||
|
json.dump([current_data], f, indent=2)
|
||||||
|
self.display.console.print(f"[green]Saved current results as new baseline to {baseline_filepath}[/green]")
|
||||||
|
return {"is_baseline": True, "changes": {}}
|
||||||
|
|
||||||
|
baseline_runs.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
|
||||||
|
latest_run = baseline_runs[0]
|
||||||
|
|
||||||
|
comparison = self._compare_with_run(latest_run)
|
||||||
|
|
||||||
|
if print_summary:
|
||||||
|
self.display.comparison_summary(comparison, latest_run["timestamp"])
|
||||||
|
|
||||||
|
if save_current:
|
||||||
|
current_data = self.to_json()
|
||||||
|
baseline_runs.append(current_data)
|
||||||
|
with open(baseline_filepath, 'w') as f:
|
||||||
|
json.dump(baseline_runs, f, indent=2)
|
||||||
|
self.display.console.print(f"[green]Added current results to baseline file {baseline_filepath}[/green]")
|
||||||
|
|
||||||
|
return comparison
|
||||||
|
|
||||||
|
def _compare_with_run(self, baseline_run: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
baseline_results = baseline_run.get("results", [])
|
||||||
|
|
||||||
|
baseline_lookup = {}
|
||||||
|
for result in baseline_results:
|
||||||
|
test_identifier = result.get("identifier")
|
||||||
|
if test_identifier:
|
||||||
|
baseline_lookup[test_identifier] = result
|
||||||
|
|
||||||
|
improved = []
|
||||||
|
regressed = []
|
||||||
|
unchanged = []
|
||||||
|
new_tests = []
|
||||||
|
|
||||||
|
for result in self.results:
|
||||||
|
test_identifier = result.identifier
|
||||||
|
if not test_identifier or test_identifier not in baseline_lookup:
|
||||||
|
new_tests.append(test_identifier)
|
||||||
|
continue
|
||||||
|
|
||||||
|
baseline_result = baseline_lookup[test_identifier]
|
||||||
|
baseline_passed = baseline_result.get("passed", False)
|
||||||
|
if result.passed and not baseline_passed:
|
||||||
|
improved.append(test_identifier)
|
||||||
|
elif not result.passed and baseline_passed:
|
||||||
|
regressed.append(test_identifier)
|
||||||
|
else:
|
||||||
|
unchanged.append(test_identifier)
|
||||||
|
|
||||||
|
missing_tests = []
|
||||||
|
current_test_identifiers = {result.identifier for result in self.results}
|
||||||
|
for result in baseline_results:
|
||||||
|
test_identifier = result.get("identifier")
|
||||||
|
if test_identifier and test_identifier not in current_test_identifiers:
|
||||||
|
missing_tests.append(test_identifier)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"improved": improved,
|
||||||
|
"regressed": regressed,
|
||||||
|
"unchanged": unchanged,
|
||||||
|
"new_tests": new_tests,
|
||||||
|
"missing_tests": missing_tests,
|
||||||
|
"total_compared": len(improved) + len(regressed) + len(unchanged),
|
||||||
|
"baseline_timestamp": baseline_run.get("timestamp", "unknown")
|
||||||
|
}
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
from typing import Dict, Any
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.panel import Panel
|
||||||
|
from crewai.experimental.evaluation.experiment.result import ExperimentResults
|
||||||
|
|
||||||
|
class ExperimentResultsDisplay:
|
||||||
|
def __init__(self):
|
||||||
|
self.console = Console()
|
||||||
|
|
||||||
|
def summary(self, experiment_results: ExperimentResults):
|
||||||
|
total = len(experiment_results.results)
|
||||||
|
passed = sum(1 for r in experiment_results.results if r.passed)
|
||||||
|
|
||||||
|
table = Table(title="Experiment Summary")
|
||||||
|
table.add_column("Metric", style="cyan")
|
||||||
|
table.add_column("Value", style="green")
|
||||||
|
|
||||||
|
table.add_row("Total Test Cases", str(total))
|
||||||
|
table.add_row("Passed", str(passed))
|
||||||
|
table.add_row("Failed", str(total - passed))
|
||||||
|
table.add_row("Success Rate", f"{(passed / total * 100):.1f}%" if total > 0 else "N/A")
|
||||||
|
|
||||||
|
self.console.print(table)
|
||||||
|
|
||||||
|
def comparison_summary(self, comparison: Dict[str, Any], baseline_timestamp: str):
|
||||||
|
self.console.print(Panel(f"[bold]Comparison with baseline run from {baseline_timestamp}[/bold]",
|
||||||
|
expand=False))
|
||||||
|
|
||||||
|
table = Table(title="Results Comparison")
|
||||||
|
table.add_column("Metric", style="cyan")
|
||||||
|
table.add_column("Count", style="white")
|
||||||
|
table.add_column("Details", style="dim")
|
||||||
|
|
||||||
|
improved = comparison.get("improved", [])
|
||||||
|
if improved:
|
||||||
|
details = ", ".join([f"{test_identifier}" for test_identifier in improved[:3]])
|
||||||
|
if len(improved) > 3:
|
||||||
|
details += f" and {len(improved) - 3} more"
|
||||||
|
table.add_row("✅ Improved", str(len(improved)), details)
|
||||||
|
else:
|
||||||
|
table.add_row("✅ Improved", "0", "")
|
||||||
|
|
||||||
|
regressed = comparison.get("regressed", [])
|
||||||
|
if regressed:
|
||||||
|
details = ", ".join([f"{test_identifier}" for test_identifier in regressed[:3]])
|
||||||
|
if len(regressed) > 3:
|
||||||
|
details += f" and {len(regressed) - 3} more"
|
||||||
|
table.add_row("❌ Regressed", str(len(regressed)), details, style="red")
|
||||||
|
else:
|
||||||
|
table.add_row("❌ Regressed", "0", "")
|
||||||
|
|
||||||
|
unchanged = comparison.get("unchanged", [])
|
||||||
|
table.add_row("⏺ Unchanged", str(len(unchanged)), "")
|
||||||
|
|
||||||
|
new_tests = comparison.get("new_tests", [])
|
||||||
|
if new_tests:
|
||||||
|
details = ", ".join(new_tests[:3])
|
||||||
|
if len(new_tests) > 3:
|
||||||
|
details += f" and {len(new_tests) - 3} more"
|
||||||
|
table.add_row("➕ New Tests", str(len(new_tests)), details)
|
||||||
|
|
||||||
|
missing_tests = comparison.get("missing_tests", [])
|
||||||
|
if missing_tests:
|
||||||
|
details = ", ".join(missing_tests[:3])
|
||||||
|
if len(missing_tests) > 3:
|
||||||
|
details += f" and {len(missing_tests) - 3} more"
|
||||||
|
table.add_row("➖ Missing Tests", str(len(missing_tests)), details)
|
||||||
|
|
||||||
|
self.console.print(table)
|
||||||
125
src/crewai/experimental/evaluation/experiment/runner.py
Normal file
125
src/crewai/experimental/evaluation/experiment/runner.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
from collections import defaultdict
|
||||||
|
from hashlib import md5
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from crewai import Crew, Agent
|
||||||
|
from crewai.experimental.evaluation import AgentEvaluator, create_default_evaluator
|
||||||
|
from crewai.experimental.evaluation.experiment.result_display import ExperimentResultsDisplay
|
||||||
|
from crewai.experimental.evaluation.experiment.result import ExperimentResults, ExperimentResult
|
||||||
|
from crewai.experimental.evaluation.evaluation_display import AgentAggregatedEvaluationResult
|
||||||
|
|
||||||
|
class ExperimentRunner:
|
||||||
|
def __init__(self, dataset: list[dict[str, Any]]):
|
||||||
|
self.dataset = dataset or []
|
||||||
|
self.evaluator: AgentEvaluator | None = None
|
||||||
|
self.display = ExperimentResultsDisplay()
|
||||||
|
|
||||||
|
def run(self, crew: Crew | None = None, agents: list[Agent] | None = None, print_summary: bool = False) -> ExperimentResults:
|
||||||
|
if crew and not agents:
|
||||||
|
agents = crew.agents
|
||||||
|
|
||||||
|
assert agents is not None
|
||||||
|
self.evaluator = create_default_evaluator(agents=agents)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for test_case in self.dataset:
|
||||||
|
self.evaluator.reset_iterations_results()
|
||||||
|
result = self._run_test_case(test_case=test_case, crew=crew, agents=agents)
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
experiment_results = ExperimentResults(results)
|
||||||
|
|
||||||
|
if print_summary:
|
||||||
|
self.display.summary(experiment_results)
|
||||||
|
|
||||||
|
return experiment_results
|
||||||
|
|
||||||
|
def _run_test_case(self, test_case: dict[str, Any], agents: list[Agent], crew: Crew | None = None) -> ExperimentResult:
|
||||||
|
inputs = test_case["inputs"]
|
||||||
|
expected_score = test_case["expected_score"]
|
||||||
|
identifier = test_case.get("identifier") or md5(str(test_case).encode(), usedforsecurity=False).hexdigest()
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.display.console.print(f"[dim]Running crew with input: {str(inputs)[:50]}...[/dim]")
|
||||||
|
self.display.console.print("\n")
|
||||||
|
if crew:
|
||||||
|
crew.kickoff(inputs=inputs)
|
||||||
|
else:
|
||||||
|
for agent in agents:
|
||||||
|
agent.kickoff(**inputs)
|
||||||
|
|
||||||
|
assert self.evaluator is not None
|
||||||
|
agent_evaluations = self.evaluator.get_agent_evaluation()
|
||||||
|
|
||||||
|
actual_score = self._extract_scores(agent_evaluations)
|
||||||
|
|
||||||
|
passed = self._assert_scores(expected_score, actual_score)
|
||||||
|
return ExperimentResult(
|
||||||
|
identifier=identifier,
|
||||||
|
inputs=inputs,
|
||||||
|
score=actual_score,
|
||||||
|
expected_score=expected_score,
|
||||||
|
passed=passed,
|
||||||
|
agent_evaluations=agent_evaluations
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.display.console.print(f"[red]Error running test case: {str(e)}[/red]")
|
||||||
|
return ExperimentResult(
|
||||||
|
identifier=identifier,
|
||||||
|
inputs=inputs,
|
||||||
|
score=0,
|
||||||
|
expected_score=expected_score,
|
||||||
|
passed=False
|
||||||
|
)
|
||||||
|
|
||||||
|
def _extract_scores(self, agent_evaluations: dict[str, AgentAggregatedEvaluationResult]) -> float | dict[str, float]:
|
||||||
|
all_scores: dict[str, list[float]] = defaultdict(list)
|
||||||
|
for evaluation in agent_evaluations.values():
|
||||||
|
for metric_name, score in evaluation.metrics.items():
|
||||||
|
if score.score is not None:
|
||||||
|
all_scores[metric_name.value].append(score.score)
|
||||||
|
|
||||||
|
avg_scores = {m: sum(s)/len(s) for m, s in all_scores.items()}
|
||||||
|
|
||||||
|
if len(avg_scores) == 1:
|
||||||
|
return list(avg_scores.values())[0]
|
||||||
|
|
||||||
|
return avg_scores
|
||||||
|
|
||||||
|
def _assert_scores(self, expected: float | dict[str, float],
|
||||||
|
actual: float | dict[str, float]) -> bool:
|
||||||
|
"""
|
||||||
|
Compare expected and actual scores, and return whether the test case passed.
|
||||||
|
|
||||||
|
The rules for comparison are as follows:
|
||||||
|
- If both expected and actual scores are single numbers, the actual score must be >= expected.
|
||||||
|
- If expected is a single number and actual is a dict, compare against the average of actual values.
|
||||||
|
- If expected is a dict and actual is a single number, actual must be >= all expected values.
|
||||||
|
- If both are dicts, actual must have matching keys with values >= expected values.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(expected, (int, float)) and isinstance(actual, (int, float)):
|
||||||
|
return actual >= expected
|
||||||
|
|
||||||
|
if isinstance(expected, dict) and isinstance(actual, (int, float)):
|
||||||
|
return all(actual >= exp_score for exp_score in expected.values())
|
||||||
|
|
||||||
|
if isinstance(expected, (int, float)) and isinstance(actual, dict):
|
||||||
|
if not actual:
|
||||||
|
return False
|
||||||
|
avg_score = sum(actual.values()) / len(actual)
|
||||||
|
return avg_score >= expected
|
||||||
|
|
||||||
|
if isinstance(expected, dict) and isinstance(actual, dict):
|
||||||
|
if not expected:
|
||||||
|
return True
|
||||||
|
matching_keys = set(expected.keys()) & set(actual.keys())
|
||||||
|
if not matching_keys:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# All matching keys must have actual >= expected
|
||||||
|
return all(actual[key] >= expected[key] for key in matching_keys)
|
||||||
|
|
||||||
|
return False
|
||||||
30
src/crewai/experimental/evaluation/json_parser.py
Normal file
30
src/crewai/experimental/evaluation/json_parser.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
"""Robust JSON parsing utilities for evaluation responses."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
def extract_json_from_llm_response(text: str) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
return json.loads(text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
json_patterns = [
|
||||||
|
# Standard markdown code blocks with json
|
||||||
|
r'```json\s*([\s\S]*?)\s*```',
|
||||||
|
# Code blocks without language specifier
|
||||||
|
r'```\s*([\s\S]*?)\s*```',
|
||||||
|
# Inline code with JSON
|
||||||
|
r'`([{\\[].*[}\]])`',
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in json_patterns:
|
||||||
|
matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
|
||||||
|
for match in matches:
|
||||||
|
try:
|
||||||
|
return json.loads(match.strip())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
raise ValueError("No valid JSON found in the response")
|
||||||
26
src/crewai/experimental/evaluation/metrics/__init__.py
Normal file
26
src/crewai/experimental/evaluation/metrics/__init__.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from crewai.experimental.evaluation.metrics.reasoning_metrics import (
|
||||||
|
ReasoningEfficiencyEvaluator
|
||||||
|
)
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.metrics.tools_metrics import (
|
||||||
|
ToolSelectionEvaluator,
|
||||||
|
ParameterExtractionEvaluator,
|
||||||
|
ToolInvocationEvaluator
|
||||||
|
)
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.metrics.goal_metrics import (
|
||||||
|
GoalAlignmentEvaluator
|
||||||
|
)
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.metrics.semantic_quality_metrics import (
|
||||||
|
SemanticQualityEvaluator
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"ReasoningEfficiencyEvaluator",
|
||||||
|
"ToolSelectionEvaluator",
|
||||||
|
"ParameterExtractionEvaluator",
|
||||||
|
"ToolInvocationEvaluator",
|
||||||
|
"GoalAlignmentEvaluator",
|
||||||
|
"SemanticQualityEvaluator"
|
||||||
|
]
|
||||||
69
src/crewai/experimental/evaluation/metrics/goal_metrics.py
Normal file
69
src/crewai/experimental/evaluation/metrics/goal_metrics.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from crewai.agent import Agent
|
||||||
|
from crewai.task import Task
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory
|
||||||
|
from crewai.experimental.evaluation.json_parser import extract_json_from_llm_response
|
||||||
|
|
||||||
|
class GoalAlignmentEvaluator(BaseEvaluator):
|
||||||
|
@property
|
||||||
|
def metric_category(self) -> MetricCategory:
|
||||||
|
return MetricCategory.GOAL_ALIGNMENT
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
agent: Agent,
|
||||||
|
execution_trace: Dict[str, Any],
|
||||||
|
final_output: Any,
|
||||||
|
task: Task | None = None,
|
||||||
|
) -> EvaluationScore:
|
||||||
|
task_context = ""
|
||||||
|
if task is not None:
|
||||||
|
task_context = f"Task description: {task.description}\nExpected output: {task.expected_output}\n"
|
||||||
|
|
||||||
|
prompt = [
|
||||||
|
{"role": "system", "content": """You are an expert evaluator assessing how well an AI agent's output aligns with its assigned task goal.
|
||||||
|
|
||||||
|
Score the agent's goal alignment on a scale from 0-10 where:
|
||||||
|
- 0: Complete misalignment, agent did not understand or attempt the task goal
|
||||||
|
- 5: Partial alignment, agent attempted the task but missed key requirements
|
||||||
|
- 10: Perfect alignment, agent fully satisfied all task requirements
|
||||||
|
|
||||||
|
Consider:
|
||||||
|
1. Did the agent correctly interpret the task goal?
|
||||||
|
2. Did the final output directly address the requirements?
|
||||||
|
3. Did the agent focus on relevant aspects of the task?
|
||||||
|
4. Did the agent provide all requested information or deliverables?
|
||||||
|
|
||||||
|
Return your evaluation as JSON with fields 'score' (number) and 'feedback' (string).
|
||||||
|
"""},
|
||||||
|
{"role": "user", "content": f"""
|
||||||
|
Agent role: {agent.role}
|
||||||
|
Agent goal: {agent.goal}
|
||||||
|
{task_context}
|
||||||
|
|
||||||
|
Agent's final output:
|
||||||
|
{final_output}
|
||||||
|
|
||||||
|
Evaluate how well the agent's output aligns with the assigned task goal.
|
||||||
|
"""}
|
||||||
|
]
|
||||||
|
assert self.llm is not None
|
||||||
|
response = self.llm.call(prompt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
evaluation_data: dict[str, Any] = extract_json_from_llm_response(response)
|
||||||
|
assert evaluation_data is not None
|
||||||
|
|
||||||
|
return EvaluationScore(
|
||||||
|
score=evaluation_data.get("score", 0),
|
||||||
|
feedback=evaluation_data.get("feedback", response),
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback=f"Failed to parse evaluation. Raw response: {response}",
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
361
src/crewai/experimental/evaluation/metrics/reasoning_metrics.py
Normal file
361
src/crewai/experimental/evaluation/metrics/reasoning_metrics.py
Normal file
@@ -0,0 +1,361 @@
|
|||||||
|
"""Agent reasoning efficiency evaluators.
|
||||||
|
|
||||||
|
This module provides evaluator implementations for:
|
||||||
|
- Reasoning efficiency
|
||||||
|
- Loop detection
|
||||||
|
- Thinking-to-action ratio
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
import numpy as np
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
from crewai.agent import Agent
|
||||||
|
from crewai.task import Task
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory
|
||||||
|
from crewai.experimental.evaluation.json_parser import extract_json_from_llm_response
|
||||||
|
from crewai.tasks.task_output import TaskOutput
|
||||||
|
|
||||||
|
class ReasoningPatternType(Enum):
|
||||||
|
EFFICIENT = "efficient" # Good reasoning flow
|
||||||
|
LOOP = "loop" # Agent is stuck in a loop
|
||||||
|
VERBOSE = "verbose" # Agent is unnecessarily verbose
|
||||||
|
INDECISIVE = "indecisive" # Agent struggles to make decisions
|
||||||
|
SCATTERED = "scattered" # Agent jumps between topics without focus
|
||||||
|
|
||||||
|
|
||||||
|
class ReasoningEfficiencyEvaluator(BaseEvaluator):
|
||||||
|
@property
|
||||||
|
def metric_category(self) -> MetricCategory:
|
||||||
|
return MetricCategory.REASONING_EFFICIENCY
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
agent: Agent,
|
||||||
|
execution_trace: Dict[str, Any],
|
||||||
|
final_output: TaskOutput | str,
|
||||||
|
task: Task | None = None,
|
||||||
|
) -> EvaluationScore:
|
||||||
|
task_context = ""
|
||||||
|
if task is not None:
|
||||||
|
task_context = f"Task description: {task.description}\nExpected output: {task.expected_output}\n"
|
||||||
|
|
||||||
|
llm_calls = execution_trace.get("llm_calls", [])
|
||||||
|
|
||||||
|
if not llm_calls or len(llm_calls) < 2:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback="Insufficient LLM calls to evaluate reasoning efficiency."
|
||||||
|
)
|
||||||
|
|
||||||
|
total_calls = len(llm_calls)
|
||||||
|
total_tokens = sum(call.get("total_tokens", 0) for call in llm_calls)
|
||||||
|
avg_tokens_per_call = total_tokens / total_calls if total_calls > 0 else 0
|
||||||
|
time_intervals = []
|
||||||
|
has_reliable_timing = True
|
||||||
|
for i in range(1, len(llm_calls)):
|
||||||
|
start_time = llm_calls[i-1].get("end_time")
|
||||||
|
end_time = llm_calls[i].get("start_time")
|
||||||
|
if start_time and end_time and start_time != end_time:
|
||||||
|
try:
|
||||||
|
interval = end_time - start_time
|
||||||
|
time_intervals.append(interval.total_seconds() if hasattr(interval, 'total_seconds') else 0)
|
||||||
|
except Exception:
|
||||||
|
has_reliable_timing = False
|
||||||
|
else:
|
||||||
|
has_reliable_timing = False
|
||||||
|
|
||||||
|
loop_detected, loop_details = self._detect_loops(llm_calls)
|
||||||
|
pattern_analysis = self._analyze_reasoning_patterns(llm_calls)
|
||||||
|
|
||||||
|
efficiency_metrics = {
|
||||||
|
"total_llm_calls": total_calls,
|
||||||
|
"total_tokens": total_tokens,
|
||||||
|
"avg_tokens_per_call": avg_tokens_per_call,
|
||||||
|
"reasoning_pattern": pattern_analysis["primary_pattern"].value,
|
||||||
|
"loops_detected": loop_detected,
|
||||||
|
}
|
||||||
|
|
||||||
|
if has_reliable_timing and time_intervals:
|
||||||
|
efficiency_metrics["avg_time_between_calls"] = np.mean(time_intervals)
|
||||||
|
|
||||||
|
loop_info = f"Detected {len(loop_details)} potential reasoning loops." if loop_detected else "No significant reasoning loops detected."
|
||||||
|
|
||||||
|
call_samples = self._get_call_samples(llm_calls)
|
||||||
|
|
||||||
|
final_output = final_output.raw if isinstance(final_output, TaskOutput) else final_output
|
||||||
|
|
||||||
|
prompt = [
|
||||||
|
{"role": "system", "content": """You are an expert evaluator assessing the reasoning efficiency of an AI agent's thought process.
|
||||||
|
|
||||||
|
Evaluate the agent's reasoning efficiency across these five key subcategories:
|
||||||
|
|
||||||
|
1. Focus (0-10): How well the agent stays on topic and avoids unnecessary tangents
|
||||||
|
2. Progression (0-10): How effectively the agent builds on previous thoughts rather than repeating or circling
|
||||||
|
3. Decision Quality (0-10): How decisively and appropriately the agent makes decisions
|
||||||
|
4. Conciseness (0-10): How efficiently the agent communicates without unnecessary verbosity
|
||||||
|
5. Loop Avoidance (0-10): How well the agent avoids getting stuck in repetitive thinking patterns
|
||||||
|
|
||||||
|
For each subcategory, provide a score from 0-10 where:
|
||||||
|
- 0: Completely inefficient
|
||||||
|
- 5: Moderately efficient
|
||||||
|
- 10: Highly efficient
|
||||||
|
|
||||||
|
The overall score should be a weighted average of these subcategories.
|
||||||
|
|
||||||
|
Return your evaluation as JSON with the following structure:
|
||||||
|
{
|
||||||
|
"overall_score": float,
|
||||||
|
"scores": {
|
||||||
|
"focus": float,
|
||||||
|
"progression": float,
|
||||||
|
"decision_quality": float,
|
||||||
|
"conciseness": float,
|
||||||
|
"loop_avoidance": float
|
||||||
|
},
|
||||||
|
"feedback": string (general feedback about overall reasoning efficiency),
|
||||||
|
"optimization_suggestions": string (concrete suggestions for improving reasoning efficiency),
|
||||||
|
"detected_patterns": string (describe any inefficient reasoning patterns you observe)
|
||||||
|
}"""},
|
||||||
|
{"role": "user", "content": f"""
|
||||||
|
Agent role: {agent.role}
|
||||||
|
{task_context}
|
||||||
|
|
||||||
|
Reasoning efficiency metrics:
|
||||||
|
- Total LLM calls: {efficiency_metrics["total_llm_calls"]}
|
||||||
|
- Average tokens per call: {efficiency_metrics["avg_tokens_per_call"]:.1f}
|
||||||
|
- Primary reasoning pattern: {efficiency_metrics["reasoning_pattern"]}
|
||||||
|
- {loop_info}
|
||||||
|
{"- Average time between calls: {:.2f} seconds".format(efficiency_metrics.get("avg_time_between_calls", 0)) if "avg_time_between_calls" in efficiency_metrics else ""}
|
||||||
|
|
||||||
|
Sample of agent reasoning flow (chronological sequence):
|
||||||
|
{call_samples}
|
||||||
|
|
||||||
|
Agent's final output:
|
||||||
|
{final_output[:500]}... (truncated)
|
||||||
|
|
||||||
|
Evaluate the reasoning efficiency of this agent based on these interaction patterns.
|
||||||
|
Identify any inefficient reasoning patterns and provide specific suggestions for optimization.
|
||||||
|
"""}
|
||||||
|
]
|
||||||
|
|
||||||
|
assert self.llm is not None
|
||||||
|
response = self.llm.call(prompt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
evaluation_data = extract_json_from_llm_response(response)
|
||||||
|
|
||||||
|
scores = evaluation_data.get("scores", {})
|
||||||
|
focus = scores.get("focus", 5.0)
|
||||||
|
progression = scores.get("progression", 5.0)
|
||||||
|
decision_quality = scores.get("decision_quality", 5.0)
|
||||||
|
conciseness = scores.get("conciseness", 5.0)
|
||||||
|
loop_avoidance = scores.get("loop_avoidance", 5.0)
|
||||||
|
|
||||||
|
overall_score = evaluation_data.get("overall_score", evaluation_data.get("score", 5.0))
|
||||||
|
feedback = evaluation_data.get("feedback", "No detailed feedback provided.")
|
||||||
|
optimization_suggestions = evaluation_data.get("optimization_suggestions", "No specific suggestions provided.")
|
||||||
|
|
||||||
|
detailed_feedback = "Reasoning Efficiency Evaluation:\n"
|
||||||
|
detailed_feedback += f"• Focus: {focus}/10 - Staying on topic without tangents\n"
|
||||||
|
detailed_feedback += f"• Progression: {progression}/10 - Building on previous thinking\n"
|
||||||
|
detailed_feedback += f"• Decision Quality: {decision_quality}/10 - Making appropriate decisions\n"
|
||||||
|
detailed_feedback += f"• Conciseness: {conciseness}/10 - Communicating efficiently\n"
|
||||||
|
detailed_feedback += f"• Loop Avoidance: {loop_avoidance}/10 - Avoiding repetitive patterns\n\n"
|
||||||
|
|
||||||
|
detailed_feedback += f"Feedback:\n{feedback}\n\n"
|
||||||
|
detailed_feedback += f"Optimization Suggestions:\n{optimization_suggestions}"
|
||||||
|
|
||||||
|
return EvaluationScore(
|
||||||
|
score=float(overall_score),
|
||||||
|
feedback=detailed_feedback,
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to parse reasoning efficiency evaluation: {e}")
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback=f"Failed to parse reasoning efficiency evaluation. Raw response: {response[:200]}...",
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
|
||||||
|
def _detect_loops(self, llm_calls: List[Dict]) -> Tuple[bool, List[Dict]]:
|
||||||
|
loop_details = []
|
||||||
|
|
||||||
|
messages = []
|
||||||
|
for call in llm_calls:
|
||||||
|
content = call.get("response", "")
|
||||||
|
if isinstance(content, str):
|
||||||
|
messages.append(content)
|
||||||
|
elif isinstance(content, list) and len(content) > 0:
|
||||||
|
# Handle message list format
|
||||||
|
for msg in content:
|
||||||
|
if isinstance(msg, dict) and "content" in msg:
|
||||||
|
messages.append(msg["content"])
|
||||||
|
|
||||||
|
# Simple n-gram based similarity detection
|
||||||
|
# For a more robust implementation, consider using embedding-based similarity
|
||||||
|
for i in range(len(messages) - 2):
|
||||||
|
for j in range(i + 1, len(messages) - 1):
|
||||||
|
# Check for repeated patterns (simplistic approach)
|
||||||
|
# A more sophisticated approach would use semantic similarity
|
||||||
|
similarity = self._calculate_text_similarity(messages[i], messages[j])
|
||||||
|
if similarity > 0.7: # Arbitrary threshold
|
||||||
|
loop_details.append({
|
||||||
|
"first_occurrence": i,
|
||||||
|
"second_occurrence": j,
|
||||||
|
"similarity": similarity,
|
||||||
|
"snippet": messages[i][:100] + "..."
|
||||||
|
})
|
||||||
|
|
||||||
|
return len(loop_details) > 0, loop_details
|
||||||
|
|
||||||
|
def _calculate_text_similarity(self, text1: str, text2: str) -> float:
|
||||||
|
text1 = re.sub(r'\s+', ' ', text1.lower()).strip()
|
||||||
|
text2 = re.sub(r'\s+', ' ', text2.lower()).strip()
|
||||||
|
|
||||||
|
# Simple Jaccard similarity on word sets
|
||||||
|
words1 = set(text1.split())
|
||||||
|
words2 = set(text2.split())
|
||||||
|
|
||||||
|
intersection = len(words1.intersection(words2))
|
||||||
|
union = len(words1.union(words2))
|
||||||
|
|
||||||
|
return intersection / union if union > 0 else 0.0
|
||||||
|
|
||||||
|
def _analyze_reasoning_patterns(self, llm_calls: List[Dict]) -> Dict[str, Any]:
|
||||||
|
call_lengths = []
|
||||||
|
response_times = []
|
||||||
|
|
||||||
|
for call in llm_calls:
|
||||||
|
content = call.get("response", "")
|
||||||
|
if isinstance(content, str):
|
||||||
|
call_lengths.append(len(content))
|
||||||
|
elif isinstance(content, list) and len(content) > 0:
|
||||||
|
# Handle message list format
|
||||||
|
total_length = 0
|
||||||
|
for msg in content:
|
||||||
|
if isinstance(msg, dict) and "content" in msg:
|
||||||
|
total_length += len(msg["content"])
|
||||||
|
call_lengths.append(total_length)
|
||||||
|
|
||||||
|
start_time = call.get("start_time")
|
||||||
|
end_time = call.get("end_time")
|
||||||
|
if start_time and end_time:
|
||||||
|
try:
|
||||||
|
response_times.append(end_time - start_time)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
avg_length = np.mean(call_lengths) if call_lengths else 0
|
||||||
|
std_length = np.std(call_lengths) if call_lengths else 0
|
||||||
|
length_trend = self._calculate_trend(call_lengths)
|
||||||
|
|
||||||
|
primary_pattern = ReasoningPatternType.EFFICIENT
|
||||||
|
details = "Agent demonstrates efficient reasoning patterns."
|
||||||
|
|
||||||
|
loop_score = self._calculate_loop_likelihood(call_lengths, response_times)
|
||||||
|
if loop_score > 0.7:
|
||||||
|
primary_pattern = ReasoningPatternType.LOOP
|
||||||
|
details = "Agent appears to be stuck in repetitive thinking patterns."
|
||||||
|
elif avg_length > 1000 and std_length / avg_length < 0.3:
|
||||||
|
primary_pattern = ReasoningPatternType.VERBOSE
|
||||||
|
details = "Agent is consistently verbose across interactions."
|
||||||
|
elif len(llm_calls) > 10 and length_trend > 0.5:
|
||||||
|
primary_pattern = ReasoningPatternType.INDECISIVE
|
||||||
|
details = "Agent shows signs of indecisiveness with increasing message lengths."
|
||||||
|
elif std_length / avg_length > 0.8:
|
||||||
|
primary_pattern = ReasoningPatternType.SCATTERED
|
||||||
|
details = "Agent shows inconsistent reasoning flow with highly variable responses."
|
||||||
|
|
||||||
|
return {
|
||||||
|
"primary_pattern": primary_pattern,
|
||||||
|
"details": details,
|
||||||
|
"metrics": {
|
||||||
|
"avg_length": avg_length,
|
||||||
|
"std_length": std_length,
|
||||||
|
"length_trend": length_trend,
|
||||||
|
"loop_score": loop_score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _calculate_trend(self, values: Sequence[float | int]) -> float:
|
||||||
|
if not values or len(values) < 2:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
try:
|
||||||
|
x = np.arange(len(values))
|
||||||
|
y = np.array(values)
|
||||||
|
|
||||||
|
# Simple linear regression
|
||||||
|
slope = np.polyfit(x, y, 1)[0]
|
||||||
|
|
||||||
|
# Normalize slope to -1 to 1 range
|
||||||
|
max_possible_slope = max(values) - min(values)
|
||||||
|
if max_possible_slope > 0:
|
||||||
|
normalized_slope = slope / max_possible_slope
|
||||||
|
return max(min(normalized_slope, 1.0), -1.0)
|
||||||
|
return 0.0
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def _calculate_loop_likelihood(self, call_lengths: Sequence[float], response_times: Sequence[float]) -> float:
|
||||||
|
if not call_lengths or len(call_lengths) < 3:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
indicators = []
|
||||||
|
|
||||||
|
if len(call_lengths) >= 4:
|
||||||
|
repeated_lengths = 0
|
||||||
|
for i in range(len(call_lengths) - 2):
|
||||||
|
ratio = call_lengths[i] / call_lengths[i + 2] if call_lengths[i + 2] > 0 else 0
|
||||||
|
if 0.85 <= ratio <= 1.15:
|
||||||
|
repeated_lengths += 1
|
||||||
|
|
||||||
|
length_repetition_score = repeated_lengths / (len(call_lengths) - 2)
|
||||||
|
indicators.append(length_repetition_score)
|
||||||
|
|
||||||
|
if response_times and len(response_times) >= 3:
|
||||||
|
try:
|
||||||
|
std_time = np.std(response_times)
|
||||||
|
mean_time = np.mean(response_times)
|
||||||
|
if mean_time > 0:
|
||||||
|
time_consistency = 1.0 - (std_time / mean_time)
|
||||||
|
indicators.append(max(0, time_consistency - 0.3) * 1.5)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return np.mean(indicators) if indicators else 0.0
|
||||||
|
|
||||||
|
def _get_call_samples(self, llm_calls: List[Dict]) -> str:
|
||||||
|
samples = []
|
||||||
|
|
||||||
|
if len(llm_calls) <= 6:
|
||||||
|
sample_indices = list(range(len(llm_calls)))
|
||||||
|
else:
|
||||||
|
sample_indices = [0, 1, len(llm_calls) // 2 - 1, len(llm_calls) // 2,
|
||||||
|
len(llm_calls) - 2, len(llm_calls) - 1]
|
||||||
|
|
||||||
|
for idx in sample_indices:
|
||||||
|
call = llm_calls[idx]
|
||||||
|
content = call.get("response", "")
|
||||||
|
|
||||||
|
if isinstance(content, str):
|
||||||
|
sample = content
|
||||||
|
elif isinstance(content, list) and len(content) > 0:
|
||||||
|
sample_parts = []
|
||||||
|
for msg in content:
|
||||||
|
if isinstance(msg, dict) and "content" in msg:
|
||||||
|
sample_parts.append(msg["content"])
|
||||||
|
sample = "\n".join(sample_parts)
|
||||||
|
else:
|
||||||
|
sample = str(content)
|
||||||
|
|
||||||
|
truncated = sample[:200] + "..." if len(sample) > 200 else sample
|
||||||
|
samples.append(f"Call {idx + 1}:\n{truncated}\n")
|
||||||
|
|
||||||
|
return "\n".join(samples)
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from crewai.agent import Agent
|
||||||
|
from crewai.task import Task
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory
|
||||||
|
from crewai.experimental.evaluation.json_parser import extract_json_from_llm_response
|
||||||
|
|
||||||
|
class SemanticQualityEvaluator(BaseEvaluator):
|
||||||
|
@property
|
||||||
|
def metric_category(self) -> MetricCategory:
|
||||||
|
return MetricCategory.SEMANTIC_QUALITY
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
agent: Agent,
|
||||||
|
execution_trace: Dict[str, Any],
|
||||||
|
final_output: Any,
|
||||||
|
task: Task | None = None,
|
||||||
|
) -> EvaluationScore:
|
||||||
|
task_context = ""
|
||||||
|
if task is not None:
|
||||||
|
task_context = f"Task description: {task.description}"
|
||||||
|
prompt = [
|
||||||
|
{"role": "system", "content": """You are an expert evaluator assessing the semantic quality of an AI agent's output.
|
||||||
|
|
||||||
|
Score the semantic quality on a scale from 0-10 where:
|
||||||
|
- 0: Completely incoherent, confusing, or logically flawed output
|
||||||
|
- 5: Moderately clear and logical output with some issues
|
||||||
|
- 10: Exceptionally clear, coherent, and logically sound output
|
||||||
|
|
||||||
|
Consider:
|
||||||
|
1. Is the output well-structured and organized?
|
||||||
|
2. Is the reasoning logical and well-supported?
|
||||||
|
3. Is the language clear, precise, and appropriate for the task?
|
||||||
|
4. Are claims supported by evidence when appropriate?
|
||||||
|
5. Is the output free from contradictions and logical fallacies?
|
||||||
|
|
||||||
|
Return your evaluation as JSON with fields 'score' (number) and 'feedback' (string).
|
||||||
|
"""},
|
||||||
|
{"role": "user", "content": f"""
|
||||||
|
Agent role: {agent.role}
|
||||||
|
{task_context}
|
||||||
|
|
||||||
|
Agent's final output:
|
||||||
|
{final_output}
|
||||||
|
|
||||||
|
Evaluate the semantic quality and reasoning of this output.
|
||||||
|
"""}
|
||||||
|
]
|
||||||
|
|
||||||
|
assert self.llm is not None
|
||||||
|
response = self.llm.call(prompt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
evaluation_data: dict[str, Any] = extract_json_from_llm_response(response)
|
||||||
|
assert evaluation_data is not None
|
||||||
|
return EvaluationScore(
|
||||||
|
score=float(evaluation_data["score"]) if evaluation_data.get("score") is not None else None,
|
||||||
|
feedback=evaluation_data.get("feedback", response),
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback=f"Failed to parse evaluation. Raw response: {response}",
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
410
src/crewai/experimental/evaluation/metrics/tools_metrics.py
Normal file
410
src/crewai/experimental/evaluation/metrics/tools_metrics.py
Normal file
@@ -0,0 +1,410 @@
|
|||||||
|
import json
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory
|
||||||
|
from crewai.experimental.evaluation.json_parser import extract_json_from_llm_response
|
||||||
|
from crewai.agent import Agent
|
||||||
|
from crewai.task import Task
|
||||||
|
|
||||||
|
|
||||||
|
class ToolSelectionEvaluator(BaseEvaluator):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def metric_category(self) -> MetricCategory:
|
||||||
|
return MetricCategory.TOOL_SELECTION
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
agent: Agent,
|
||||||
|
execution_trace: Dict[str, Any],
|
||||||
|
final_output: str,
|
||||||
|
task: Task | None = None,
|
||||||
|
) -> EvaluationScore:
|
||||||
|
task_context = ""
|
||||||
|
if task is not None:
|
||||||
|
task_context = f"Task description: {task.description}"
|
||||||
|
|
||||||
|
tool_uses = execution_trace.get("tool_uses", [])
|
||||||
|
tool_count = len(tool_uses)
|
||||||
|
unique_tool_types = set([tool.get("tool", "Unknown tool") for tool in tool_uses])
|
||||||
|
|
||||||
|
if tool_count == 0:
|
||||||
|
if not agent.tools:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback="Agent had no tools available to use."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback="Agent had tools available but didn't use any."
|
||||||
|
)
|
||||||
|
|
||||||
|
available_tools_info = ""
|
||||||
|
if agent.tools:
|
||||||
|
for tool in agent.tools:
|
||||||
|
available_tools_info += f"- {tool.name}: {tool.description}\n"
|
||||||
|
else:
|
||||||
|
available_tools_info = "No tools available"
|
||||||
|
|
||||||
|
tool_types_summary = "Tools selected by the agent:\n"
|
||||||
|
for tool_type in sorted(unique_tool_types):
|
||||||
|
tool_types_summary += f"- {tool_type}\n"
|
||||||
|
|
||||||
|
prompt = [
|
||||||
|
{"role": "system", "content": """You are an expert evaluator assessing if an AI agent selected the most appropriate tools for a given task.
|
||||||
|
|
||||||
|
You must evaluate based on these 2 criteria:
|
||||||
|
1. Relevance (0-10): Were the tools chosen directly aligned with the task's goals?
|
||||||
|
2. Coverage (0-10): Did the agent select ALL appropriate tools from the AVAILABLE tools?
|
||||||
|
|
||||||
|
IMPORTANT:
|
||||||
|
- ONLY consider tools that are listed as available to the agent
|
||||||
|
- DO NOT suggest tools that aren't in the 'Available tools' list
|
||||||
|
- DO NOT evaluate the quality or accuracy of tool outputs/results
|
||||||
|
- DO NOT evaluate how many times each tool was used
|
||||||
|
- DO NOT evaluate how the agent used the parameters
|
||||||
|
- DO NOT evaluate whether the agent interpreted the task correctly
|
||||||
|
|
||||||
|
Focus ONLY on whether the correct CATEGORIES of tools were selected from what was available.
|
||||||
|
|
||||||
|
Return your evaluation as JSON with these fields:
|
||||||
|
- scores: {"relevance": number, "coverage": number}
|
||||||
|
- overall_score: number (average of all scores, 0-10)
|
||||||
|
- feedback: string (focused ONLY on tool selection decisions from available tools)
|
||||||
|
- improvement_suggestions: string (ONLY suggest better selection from the AVAILABLE tools list, NOT new tools)
|
||||||
|
"""},
|
||||||
|
{"role": "user", "content": f"""
|
||||||
|
Agent role: {agent.role}
|
||||||
|
{task_context}
|
||||||
|
|
||||||
|
Available tools for this agent:
|
||||||
|
{available_tools_info}
|
||||||
|
|
||||||
|
{tool_types_summary}
|
||||||
|
|
||||||
|
Based ONLY on the task description and comparing the AVAILABLE tools with those that were selected (listed above), evaluate if the agent selected the appropriate tool types for this task.
|
||||||
|
|
||||||
|
IMPORTANT:
|
||||||
|
- ONLY evaluate selection from tools listed as available
|
||||||
|
- DO NOT suggest new tools that aren't in the available tools list
|
||||||
|
- DO NOT evaluate tool usage or results
|
||||||
|
"""}
|
||||||
|
]
|
||||||
|
assert self.llm is not None
|
||||||
|
response = self.llm.call(prompt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
evaluation_data = extract_json_from_llm_response(response)
|
||||||
|
assert evaluation_data is not None
|
||||||
|
|
||||||
|
scores = evaluation_data.get("scores", {})
|
||||||
|
relevance = scores.get("relevance", 5.0)
|
||||||
|
coverage = scores.get("coverage", 5.0)
|
||||||
|
overall_score = float(evaluation_data.get("overall_score", 5.0))
|
||||||
|
|
||||||
|
feedback = "Tool Selection Evaluation:\n"
|
||||||
|
feedback += f"• Relevance: {relevance}/10 - Selection of appropriate tool types for the task\n"
|
||||||
|
feedback += f"• Coverage: {coverage}/10 - Selection of all necessary tool types\n"
|
||||||
|
if "improvement_suggestions" in evaluation_data:
|
||||||
|
feedback += f"Improvement Suggestions:\n{evaluation_data['improvement_suggestions']}"
|
||||||
|
else:
|
||||||
|
feedback += evaluation_data.get("feedback", "No detailed feedback available.")
|
||||||
|
|
||||||
|
return EvaluationScore(
|
||||||
|
score=overall_score,
|
||||||
|
feedback=feedback,
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback=f"Error evaluating tool selection: {e}",
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ParameterExtractionEvaluator(BaseEvaluator):
|
||||||
|
@property
|
||||||
|
def metric_category(self) -> MetricCategory:
|
||||||
|
return MetricCategory.PARAMETER_EXTRACTION
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
agent: Agent,
|
||||||
|
execution_trace: Dict[str, Any],
|
||||||
|
final_output: str,
|
||||||
|
task: Task | None = None,
|
||||||
|
) -> EvaluationScore:
|
||||||
|
task_context = ""
|
||||||
|
if task is not None:
|
||||||
|
task_context = f"Task description: {task.description}"
|
||||||
|
tool_uses = execution_trace.get("tool_uses", [])
|
||||||
|
tool_count = len(tool_uses)
|
||||||
|
|
||||||
|
if tool_count == 0:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback="No tool usage detected. Cannot evaluate parameter extraction."
|
||||||
|
)
|
||||||
|
|
||||||
|
validation_errors = []
|
||||||
|
for tool_use in tool_uses:
|
||||||
|
if not tool_use.get("success", True) and tool_use.get("error_type") == "validation_error":
|
||||||
|
validation_errors.append({
|
||||||
|
"tool": tool_use.get("tool", "Unknown tool"),
|
||||||
|
"error": tool_use.get("result"),
|
||||||
|
"args": tool_use.get("args", {})
|
||||||
|
})
|
||||||
|
|
||||||
|
validation_error_rate = len(validation_errors) / tool_count if tool_count > 0 else 0
|
||||||
|
|
||||||
|
param_samples = []
|
||||||
|
for i, tool_use in enumerate(tool_uses[:5]):
|
||||||
|
tool_name = tool_use.get("tool", "Unknown tool")
|
||||||
|
tool_args = tool_use.get("args", {})
|
||||||
|
success = tool_use.get("success", True) and not tool_use.get("error", False)
|
||||||
|
error_type = tool_use.get("error_type", "") if not success else ""
|
||||||
|
|
||||||
|
is_validation_error = error_type == "validation_error"
|
||||||
|
|
||||||
|
sample = f"Tool use #{i+1} - {tool_name}:\n"
|
||||||
|
sample += f"- Parameters: {json.dumps(tool_args, indent=2)}\n"
|
||||||
|
sample += f"- Success: {'No' if not success else 'Yes'}"
|
||||||
|
|
||||||
|
if is_validation_error:
|
||||||
|
sample += " (PARAMETER VALIDATION ERROR)\n"
|
||||||
|
sample += f"- Error: {tool_use.get('result', 'Unknown error')}"
|
||||||
|
elif not success:
|
||||||
|
sample += f" (Other error: {error_type})\n"
|
||||||
|
|
||||||
|
param_samples.append(sample)
|
||||||
|
|
||||||
|
validation_errors_info = ""
|
||||||
|
if validation_errors:
|
||||||
|
validation_errors_info = f"\nParameter validation errors detected: {len(validation_errors)} ({validation_error_rate:.1%} of tool uses)\n"
|
||||||
|
for i, err in enumerate(validation_errors[:3]):
|
||||||
|
tool_name = err.get("tool", "Unknown tool")
|
||||||
|
error_msg = err.get("error", "Unknown error")
|
||||||
|
args = err.get("args", {})
|
||||||
|
validation_errors_info += f"\nValidation Error #{i+1}:\n- Tool: {tool_name}\n- Args: {json.dumps(args, indent=2)}\n- Error: {error_msg}"
|
||||||
|
|
||||||
|
if len(validation_errors) > 3:
|
||||||
|
validation_errors_info += f"\n...and {len(validation_errors) - 3} more validation errors."
|
||||||
|
param_samples_text = "\n\n".join(param_samples)
|
||||||
|
prompt = [
|
||||||
|
{"role": "system", "content": """You are an expert evaluator assessing how well an AI agent extracts and formats PARAMETER VALUES for tool calls.
|
||||||
|
|
||||||
|
Your job is to evaluate ONLY whether the agent used the correct parameter VALUES, not whether the right tools were selected or how the tools were invoked.
|
||||||
|
|
||||||
|
Evaluate parameter extraction based on these criteria:
|
||||||
|
1. Accuracy (0-10): Are parameter values correctly identified from the context/task?
|
||||||
|
2. Formatting (0-10): Are values formatted correctly for each tool's requirements?
|
||||||
|
3. Completeness (0-10): Are all required parameter values provided, with no missing information?
|
||||||
|
|
||||||
|
IMPORTANT: DO NOT evaluate:
|
||||||
|
- Whether the right tool was chosen (that's the ToolSelectionEvaluator's job)
|
||||||
|
- How the tools were structurally invoked (that's the ToolInvocationEvaluator's job)
|
||||||
|
- The quality of results from tools
|
||||||
|
|
||||||
|
Focus ONLY on the PARAMETER VALUES - whether they were correctly extracted from the context, properly formatted, and complete.
|
||||||
|
|
||||||
|
Validation errors are important signals that parameter values weren't properly extracted or formatted.
|
||||||
|
|
||||||
|
Return your evaluation as JSON with these fields:
|
||||||
|
- scores: {"accuracy": number, "formatting": number, "completeness": number}
|
||||||
|
- overall_score: number (average of all scores, 0-10)
|
||||||
|
- feedback: string (focused ONLY on parameter value extraction quality)
|
||||||
|
- improvement_suggestions: string (concrete suggestions for better parameter VALUE extraction)
|
||||||
|
"""},
|
||||||
|
{"role": "user", "content": f"""
|
||||||
|
Agent role: {agent.role}
|
||||||
|
{task_context}
|
||||||
|
|
||||||
|
Parameter extraction examples:
|
||||||
|
{param_samples_text}
|
||||||
|
{validation_errors_info}
|
||||||
|
|
||||||
|
Evaluate the quality of the agent's parameter extraction for this task.
|
||||||
|
"""}
|
||||||
|
]
|
||||||
|
|
||||||
|
assert self.llm is not None
|
||||||
|
response = self.llm.call(prompt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
evaluation_data = extract_json_from_llm_response(response)
|
||||||
|
assert evaluation_data is not None
|
||||||
|
|
||||||
|
scores = evaluation_data.get("scores", {})
|
||||||
|
accuracy = scores.get("accuracy", 5.0)
|
||||||
|
formatting = scores.get("formatting", 5.0)
|
||||||
|
completeness = scores.get("completeness", 5.0)
|
||||||
|
|
||||||
|
overall_score = float(evaluation_data.get("overall_score", 5.0))
|
||||||
|
|
||||||
|
feedback = "Parameter Extraction Evaluation:\n"
|
||||||
|
feedback += f"• Accuracy: {accuracy}/10 - Correctly identifying required parameters\n"
|
||||||
|
feedback += f"• Formatting: {formatting}/10 - Properly formatting parameters for tools\n"
|
||||||
|
feedback += f"• Completeness: {completeness}/10 - Including all necessary information\n\n"
|
||||||
|
|
||||||
|
if "improvement_suggestions" in evaluation_data:
|
||||||
|
feedback += f"Improvement Suggestions:\n{evaluation_data['improvement_suggestions']}"
|
||||||
|
else:
|
||||||
|
feedback += evaluation_data.get("feedback", "No detailed feedback available.")
|
||||||
|
|
||||||
|
return EvaluationScore(
|
||||||
|
score=overall_score,
|
||||||
|
feedback=feedback,
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback=f"Error evaluating parameter extraction: {e}",
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ToolInvocationEvaluator(BaseEvaluator):
|
||||||
|
@property
|
||||||
|
def metric_category(self) -> MetricCategory:
|
||||||
|
return MetricCategory.TOOL_INVOCATION
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
agent: Agent,
|
||||||
|
execution_trace: Dict[str, Any],
|
||||||
|
final_output: str,
|
||||||
|
task: Task | None = None,
|
||||||
|
) -> EvaluationScore:
|
||||||
|
task_context = ""
|
||||||
|
if task is not None:
|
||||||
|
task_context = f"Task description: {task.description}"
|
||||||
|
tool_uses = execution_trace.get("tool_uses", [])
|
||||||
|
tool_errors = []
|
||||||
|
tool_count = len(tool_uses)
|
||||||
|
|
||||||
|
if tool_count == 0:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback="No tool usage detected. Cannot evaluate tool invocation."
|
||||||
|
)
|
||||||
|
|
||||||
|
for tool_use in tool_uses:
|
||||||
|
if not tool_use.get("success", True) or tool_use.get("error", False):
|
||||||
|
error_info = {
|
||||||
|
"tool": tool_use.get("tool", "Unknown tool"),
|
||||||
|
"error": tool_use.get("result"),
|
||||||
|
"error_type": tool_use.get("error_type", "unknown_error")
|
||||||
|
}
|
||||||
|
tool_errors.append(error_info)
|
||||||
|
|
||||||
|
error_rate = len(tool_errors) / tool_count if tool_count > 0 else 0
|
||||||
|
|
||||||
|
error_types = {}
|
||||||
|
for error in tool_errors:
|
||||||
|
error_type = error.get("error_type", "unknown_error")
|
||||||
|
if error_type not in error_types:
|
||||||
|
error_types[error_type] = 0
|
||||||
|
error_types[error_type] += 1
|
||||||
|
|
||||||
|
invocation_samples = []
|
||||||
|
for i, tool_use in enumerate(tool_uses[:5]):
|
||||||
|
tool_name = tool_use.get("tool", "Unknown tool")
|
||||||
|
tool_args = tool_use.get("args", {})
|
||||||
|
success = tool_use.get("success", True) and not tool_use.get("error", False)
|
||||||
|
error_type = tool_use.get("error_type", "") if not success else ""
|
||||||
|
error_msg = tool_use.get("result", "No error") if not success else "No error"
|
||||||
|
|
||||||
|
sample = f"Tool invocation #{i+1}:\n"
|
||||||
|
sample += f"- Tool: {tool_name}\n"
|
||||||
|
sample += f"- Parameters: {json.dumps(tool_args, indent=2)}\n"
|
||||||
|
sample += f"- Success: {'No' if not success else 'Yes'}\n"
|
||||||
|
if not success:
|
||||||
|
sample += f"- Error type: {error_type}\n"
|
||||||
|
sample += f"- Error: {error_msg}"
|
||||||
|
invocation_samples.append(sample)
|
||||||
|
|
||||||
|
error_type_summary = ""
|
||||||
|
if error_types:
|
||||||
|
error_type_summary = "Error type breakdown:\n"
|
||||||
|
for error_type, count in error_types.items():
|
||||||
|
error_type_summary += f"- {error_type}: {count} occurrences ({(count/tool_count):.1%})\n"
|
||||||
|
|
||||||
|
invocation_samples_text = "\n\n".join(invocation_samples)
|
||||||
|
prompt = [
|
||||||
|
{"role": "system", "content": """You are an expert evaluator assessing how correctly an AI agent's tool invocations are STRUCTURED.
|
||||||
|
|
||||||
|
Your job is to evaluate ONLY the structural and syntactical aspects of how the agent called tools, NOT which tools were selected or what parameter values were used.
|
||||||
|
|
||||||
|
Evaluate the agent's tool invocation based on these criteria:
|
||||||
|
1. Structure (0-10): Does the tool call follow the expected syntax and format?
|
||||||
|
2. Error Handling (0-10): Does the agent handle tool errors appropriately?
|
||||||
|
3. Invocation Patterns (0-10): Are tool calls properly sequenced, batched, or managed?
|
||||||
|
|
||||||
|
Error types that indicate invocation issues:
|
||||||
|
- execution_error: The tool was called correctly but failed during execution
|
||||||
|
- usage_error: General errors in how the tool was used structurally
|
||||||
|
|
||||||
|
IMPORTANT: DO NOT evaluate:
|
||||||
|
- Whether the right tool was chosen (that's the ToolSelectionEvaluator's job)
|
||||||
|
- Whether the parameter values are correct (that's the ParameterExtractionEvaluator's job)
|
||||||
|
- The quality of results from tools
|
||||||
|
|
||||||
|
Focus ONLY on HOW tools were invoked - the structure, format, and handling of the invocation process.
|
||||||
|
|
||||||
|
Return your evaluation as JSON with these fields:
|
||||||
|
- scores: {"structure": number, "error_handling": number, "invocation_patterns": number}
|
||||||
|
- overall_score: number (average of all scores, 0-10)
|
||||||
|
- feedback: string (focused ONLY on structural aspects of tool invocation)
|
||||||
|
- improvement_suggestions: string (concrete suggestions for better structuring of tool calls)
|
||||||
|
"""},
|
||||||
|
{"role": "user", "content": f"""
|
||||||
|
Agent role: {agent.role}
|
||||||
|
{task_context}
|
||||||
|
|
||||||
|
Tool invocation examples:
|
||||||
|
{invocation_samples_text}
|
||||||
|
|
||||||
|
Tool error rate: {error_rate:.2%} ({len(tool_errors)} errors out of {tool_count} invocations)
|
||||||
|
{error_type_summary}
|
||||||
|
|
||||||
|
Evaluate the quality of the agent's tool invocation structure during this task.
|
||||||
|
"""}
|
||||||
|
]
|
||||||
|
|
||||||
|
assert self.llm is not None
|
||||||
|
response = self.llm.call(prompt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
evaluation_data = extract_json_from_llm_response(response)
|
||||||
|
assert evaluation_data is not None
|
||||||
|
scores = evaluation_data.get("scores", {})
|
||||||
|
structure = scores.get("structure", 5.0)
|
||||||
|
error_handling = scores.get("error_handling", 5.0)
|
||||||
|
invocation_patterns = scores.get("invocation_patterns", 5.0)
|
||||||
|
|
||||||
|
overall_score = float(evaluation_data.get("overall_score", 5.0))
|
||||||
|
|
||||||
|
feedback = "Tool Invocation Evaluation:\n"
|
||||||
|
feedback += f"• Structure: {structure}/10 - Following proper syntax and format\n"
|
||||||
|
feedback += f"• Error Handling: {error_handling}/10 - Appropriately handling tool errors\n"
|
||||||
|
feedback += f"• Invocation Patterns: {invocation_patterns}/10 - Proper sequencing and management of calls\n\n"
|
||||||
|
|
||||||
|
if "improvement_suggestions" in evaluation_data:
|
||||||
|
feedback += f"Improvement Suggestions:\n{evaluation_data['improvement_suggestions']}"
|
||||||
|
else:
|
||||||
|
feedback += evaluation_data.get("feedback", "No detailed feedback available.")
|
||||||
|
|
||||||
|
return EvaluationScore(
|
||||||
|
score=overall_score,
|
||||||
|
feedback=feedback,
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return EvaluationScore(
|
||||||
|
score=None,
|
||||||
|
feedback=f"Error evaluating tool invocation: {e}",
|
||||||
|
raw_response=response
|
||||||
|
)
|
||||||
52
src/crewai/experimental/evaluation/testing.py
Normal file
52
src/crewai/experimental/evaluation/testing.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import inspect
|
||||||
|
|
||||||
|
from typing_extensions import Any
|
||||||
|
import warnings
|
||||||
|
from crewai.experimental.evaluation.experiment import ExperimentResults, ExperimentRunner
|
||||||
|
from crewai import Crew, Agent
|
||||||
|
|
||||||
|
def assert_experiment_successfully(experiment_results: ExperimentResults, baseline_filepath: str | None = None) -> None:
|
||||||
|
failed_tests = [result for result in experiment_results.results if not result.passed]
|
||||||
|
|
||||||
|
if failed_tests:
|
||||||
|
detailed_failures: list[str] = []
|
||||||
|
|
||||||
|
for result in failed_tests:
|
||||||
|
expected = result.expected_score
|
||||||
|
actual = result.score
|
||||||
|
detailed_failures.append(f"- {result.identifier}: expected {expected}, got {actual}")
|
||||||
|
|
||||||
|
failure_details = "\n".join(detailed_failures)
|
||||||
|
raise AssertionError(f"The following test cases failed:\n{failure_details}")
|
||||||
|
|
||||||
|
baseline_filepath = baseline_filepath or _get_baseline_filepath_fallback()
|
||||||
|
comparison = experiment_results.compare_with_baseline(baseline_filepath=baseline_filepath)
|
||||||
|
assert_experiment_no_regression(comparison)
|
||||||
|
|
||||||
|
def assert_experiment_no_regression(comparison_result: dict[str, list[str]]) -> None:
|
||||||
|
regressed = comparison_result.get("regressed", [])
|
||||||
|
if regressed:
|
||||||
|
raise AssertionError(f"Regression detected! The following tests that previously passed now fail: {regressed}")
|
||||||
|
|
||||||
|
missing_tests = comparison_result.get("missing_tests", [])
|
||||||
|
if missing_tests:
|
||||||
|
warnings.warn(
|
||||||
|
f"Warning: {len(missing_tests)} tests from the baseline are missing in the current run: {missing_tests}",
|
||||||
|
UserWarning
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_experiment(dataset: list[dict[str, Any]], crew: Crew | None = None, agents: list[Agent] | None = None, verbose: bool = False) -> ExperimentResults:
|
||||||
|
runner = ExperimentRunner(dataset=dataset)
|
||||||
|
|
||||||
|
return runner.run(agents=agents, crew=crew, print_summary=verbose)
|
||||||
|
|
||||||
|
def _get_baseline_filepath_fallback() -> str:
|
||||||
|
test_func_name = "experiment_fallback"
|
||||||
|
|
||||||
|
try:
|
||||||
|
current_frame = inspect.currentframe()
|
||||||
|
if current_frame is not None:
|
||||||
|
test_func_name = current_frame.f_back.f_back.f_code.co_name # type: ignore[union-attr]
|
||||||
|
except Exception:
|
||||||
|
...
|
||||||
|
return f"{test_func_name}_results.json"
|
||||||
@@ -28,7 +28,7 @@ from pydantic import (
|
|||||||
InstanceOf,
|
InstanceOf,
|
||||||
PrivateAttr,
|
PrivateAttr,
|
||||||
model_validator,
|
model_validator,
|
||||||
field_validator,
|
field_validator
|
||||||
)
|
)
|
||||||
|
|
||||||
from crewai.agents.agent_builder.base_agent import BaseAgent
|
from crewai.agents.agent_builder.base_agent import BaseAgent
|
||||||
@@ -40,7 +40,7 @@ from crewai.agents.parser import (
|
|||||||
OutputParserException,
|
OutputParserException,
|
||||||
)
|
)
|
||||||
from crewai.flow.flow_trackable import FlowTrackable
|
from crewai.flow.flow_trackable import FlowTrackable
|
||||||
from crewai.llm import LLM
|
from crewai.llm import LLM, BaseLLM
|
||||||
from crewai.tools.base_tool import BaseTool
|
from crewai.tools.base_tool import BaseTool
|
||||||
from crewai.tools.structured_tool import CrewStructuredTool
|
from crewai.tools.structured_tool import CrewStructuredTool
|
||||||
from crewai.utilities import I18N
|
from crewai.utilities import I18N
|
||||||
@@ -135,7 +135,7 @@ class LiteAgent(FlowTrackable, BaseModel):
|
|||||||
role: str = Field(description="Role of the agent")
|
role: str = Field(description="Role of the agent")
|
||||||
goal: str = Field(description="Goal of the agent")
|
goal: str = Field(description="Goal of the agent")
|
||||||
backstory: str = Field(description="Backstory of the agent")
|
backstory: str = Field(description="Backstory of the agent")
|
||||||
llm: Optional[Union[str, InstanceOf[LLM], Any]] = Field(
|
llm: Optional[Union[str, InstanceOf[BaseLLM], Any]] = Field(
|
||||||
default=None, description="Language model that will run the agent"
|
default=None, description="Language model that will run the agent"
|
||||||
)
|
)
|
||||||
tools: List[BaseTool] = Field(
|
tools: List[BaseTool] = Field(
|
||||||
@@ -209,8 +209,8 @@ class LiteAgent(FlowTrackable, BaseModel):
|
|||||||
def setup_llm(self):
|
def setup_llm(self):
|
||||||
"""Set up the LLM and other components after initialization."""
|
"""Set up the LLM and other components after initialization."""
|
||||||
self.llm = create_llm(self.llm)
|
self.llm = create_llm(self.llm)
|
||||||
if not isinstance(self.llm, LLM):
|
if not isinstance(self.llm, BaseLLM):
|
||||||
raise ValueError("Unable to create LLM instance")
|
raise ValueError(f"Expected LLM instance of type BaseLLM, got {type(self.llm).__name__}")
|
||||||
|
|
||||||
# Initialize callbacks
|
# Initialize callbacks
|
||||||
token_callback = TokenCalcHandler(token_cost_process=self._token_process)
|
token_callback = TokenCalcHandler(token_cost_process=self._token_process)
|
||||||
@@ -232,7 +232,8 @@ class LiteAgent(FlowTrackable, BaseModel):
|
|||||||
elif isinstance(self.guardrail, str):
|
elif isinstance(self.guardrail, str):
|
||||||
from crewai.tasks.llm_guardrail import LLMGuardrail
|
from crewai.tasks.llm_guardrail import LLMGuardrail
|
||||||
|
|
||||||
assert isinstance(self.llm, LLM)
|
if not isinstance(self.llm, BaseLLM):
|
||||||
|
raise TypeError(f"Guardrail requires LLM instance of type BaseLLM, got {type(self.llm).__name__}")
|
||||||
|
|
||||||
self._guardrail = LLMGuardrail(description=self.guardrail, llm=self.llm)
|
self._guardrail = LLMGuardrail(description=self.guardrail, llm=self.llm)
|
||||||
|
|
||||||
@@ -304,6 +305,7 @@ class LiteAgent(FlowTrackable, BaseModel):
|
|||||||
"""
|
"""
|
||||||
# Create agent info for event emission
|
# Create agent info for event emission
|
||||||
agent_info = {
|
agent_info = {
|
||||||
|
"id": self.id,
|
||||||
"role": self.role,
|
"role": self.role,
|
||||||
"goal": self.goal,
|
"goal": self.goal,
|
||||||
"backstory": self.backstory,
|
"backstory": self.backstory,
|
||||||
@@ -537,6 +539,7 @@ class LiteAgent(FlowTrackable, BaseModel):
|
|||||||
crewai_event_bus.emit(
|
crewai_event_bus.emit(
|
||||||
self,
|
self,
|
||||||
event=LLMCallCompletedEvent(
|
event=LLMCallCompletedEvent(
|
||||||
|
messages=self._messages,
|
||||||
response=answer,
|
response=answer,
|
||||||
call_type=LLMCallType.LLM_CALL,
|
call_type=LLMCallType.LLM_CALL,
|
||||||
from_agent=self,
|
from_agent=self,
|
||||||
@@ -619,4 +622,4 @@ class LiteAgent(FlowTrackable, BaseModel):
|
|||||||
|
|
||||||
def _append_message(self, text: str, role: str = "assistant") -> None:
|
def _append_message(self, text: str, role: str = "assistant") -> None:
|
||||||
"""Append a message to the message list with the given role."""
|
"""Append a message to the message list with the given role."""
|
||||||
self._messages.append(format_message_for_llm(text, role=role))
|
self._messages.append(format_message_for_llm(text, role=role))
|
||||||
@@ -508,7 +508,6 @@ class LLM(BaseLLM):
|
|||||||
# Enable tool calls using streaming
|
# Enable tool calls using streaming
|
||||||
if "tool_calls" in delta:
|
if "tool_calls" in delta:
|
||||||
tool_calls = delta["tool_calls"]
|
tool_calls = delta["tool_calls"]
|
||||||
|
|
||||||
if tool_calls:
|
if tool_calls:
|
||||||
result = self._handle_streaming_tool_calls(
|
result = self._handle_streaming_tool_calls(
|
||||||
tool_calls=tool_calls,
|
tool_calls=tool_calls,
|
||||||
@@ -517,6 +516,7 @@ class LLM(BaseLLM):
|
|||||||
from_task=from_task,
|
from_task=from_task,
|
||||||
from_agent=from_agent,
|
from_agent=from_agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
if result is not None:
|
if result is not None:
|
||||||
chunk_content = result
|
chunk_content = result
|
||||||
|
|
||||||
@@ -631,7 +631,7 @@ class LLM(BaseLLM):
|
|||||||
# Log token usage if available in streaming mode
|
# Log token usage if available in streaming mode
|
||||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||||
# Emit completion event and return response
|
# Emit completion event and return response
|
||||||
self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL, from_task, from_agent)
|
self._handle_emit_call_events(response=full_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"])
|
||||||
return full_response
|
return full_response
|
||||||
|
|
||||||
# --- 9) Handle tool calls if present
|
# --- 9) Handle tool calls if present
|
||||||
@@ -643,7 +643,7 @@ class LLM(BaseLLM):
|
|||||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||||
|
|
||||||
# --- 11) Emit completion event and return response
|
# --- 11) Emit completion event and return response
|
||||||
self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL, from_task, from_agent)
|
self._handle_emit_call_events(response=full_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"])
|
||||||
return full_response
|
return full_response
|
||||||
|
|
||||||
except ContextWindowExceededError as e:
|
except ContextWindowExceededError as e:
|
||||||
@@ -655,7 +655,7 @@ class LLM(BaseLLM):
|
|||||||
logging.error(f"Error in streaming response: {str(e)}")
|
logging.error(f"Error in streaming response: {str(e)}")
|
||||||
if full_response.strip():
|
if full_response.strip():
|
||||||
logging.warning(f"Returning partial response despite error: {str(e)}")
|
logging.warning(f"Returning partial response despite error: {str(e)}")
|
||||||
self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL, from_task, from_agent)
|
self._handle_emit_call_events(response=full_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"])
|
||||||
return full_response
|
return full_response
|
||||||
|
|
||||||
# Emit failed event and re-raise the exception
|
# Emit failed event and re-raise the exception
|
||||||
@@ -809,7 +809,7 @@ class LLM(BaseLLM):
|
|||||||
|
|
||||||
# --- 5) If no tool calls or no available functions, return the text response directly
|
# --- 5) If no tool calls or no available functions, return the text response directly
|
||||||
if not tool_calls or not available_functions:
|
if not tool_calls or not available_functions:
|
||||||
self._handle_emit_call_events(text_response, LLMCallType.LLM_CALL, from_task, from_agent)
|
self._handle_emit_call_events(response=text_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"])
|
||||||
return text_response
|
return text_response
|
||||||
|
|
||||||
# --- 6) Handle tool calls if present
|
# --- 6) Handle tool calls if present
|
||||||
@@ -818,7 +818,7 @@ class LLM(BaseLLM):
|
|||||||
return tool_result
|
return tool_result
|
||||||
|
|
||||||
# --- 7) If tool call handling didn't return a result, emit completion event and return text response
|
# --- 7) If tool call handling didn't return a result, emit completion event and return text response
|
||||||
self._handle_emit_call_events(text_response, LLMCallType.LLM_CALL, from_task, from_agent)
|
self._handle_emit_call_events(response=text_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"])
|
||||||
return text_response
|
return text_response
|
||||||
|
|
||||||
def _handle_tool_call(
|
def _handle_tool_call(
|
||||||
@@ -861,6 +861,7 @@ class LLM(BaseLLM):
|
|||||||
tool_args=function_args,
|
tool_args=function_args,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
result = fn(**function_args)
|
result = fn(**function_args)
|
||||||
crewai_event_bus.emit(
|
crewai_event_bus.emit(
|
||||||
self,
|
self,
|
||||||
@@ -874,7 +875,7 @@ class LLM(BaseLLM):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# --- 3.3) Emit success event
|
# --- 3.3) Emit success event
|
||||||
self._handle_emit_call_events(result, LLMCallType.TOOL_CALL)
|
self._handle_emit_call_events(response=result, call_type=LLMCallType.TOOL_CALL)
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# --- 3.4) Handle execution errors
|
# --- 3.4) Handle execution errors
|
||||||
@@ -991,17 +992,20 @@ class LLM(BaseLLM):
|
|||||||
logging.error(f"LiteLLM call failed: {str(e)}")
|
logging.error(f"LiteLLM call failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _handle_emit_call_events(self, response: Any, call_type: LLMCallType, from_task: Optional[Any] = None, from_agent: Optional[Any] = None):
|
def _handle_emit_call_events(self, response: Any, call_type: LLMCallType, from_task: Optional[Any] = None, from_agent: Optional[Any] = None, messages: str | list[dict[str, Any]] | None = None):
|
||||||
"""Handle the events for the LLM call.
|
"""Handle the events for the LLM call.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
response (str): The response from the LLM call.
|
response (str): The response from the LLM call.
|
||||||
call_type (str): The type of call, either "tool_call" or "llm_call".
|
call_type (str): The type of call, either "tool_call" or "llm_call".
|
||||||
|
from_task: Optional task object
|
||||||
|
from_agent: Optional agent object
|
||||||
|
messages: Optional messages object
|
||||||
"""
|
"""
|
||||||
assert hasattr(crewai_event_bus, "emit")
|
assert hasattr(crewai_event_bus, "emit")
|
||||||
crewai_event_bus.emit(
|
crewai_event_bus.emit(
|
||||||
self,
|
self,
|
||||||
event=LLMCallCompletedEvent(response=response, call_type=call_type, from_task=from_task, from_agent=from_agent),
|
event=LLMCallCompletedEvent(messages=messages, response=response, call_type=call_type, from_task=from_task, from_agent=from_agent),
|
||||||
)
|
)
|
||||||
|
|
||||||
def _format_messages_for_provider(
|
def _format_messages_for_provider(
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ class Task(BaseModel):
|
|||||||
description: Descriptive text detailing task's purpose and execution.
|
description: Descriptive text detailing task's purpose and execution.
|
||||||
expected_output: Clear definition of expected task outcome.
|
expected_output: Clear definition of expected task outcome.
|
||||||
output_file: File path for storing task output.
|
output_file: File path for storing task output.
|
||||||
|
create_directory: Whether to create the directory for output_file if it doesn't exist.
|
||||||
output_json: Pydantic model for structuring JSON output.
|
output_json: Pydantic model for structuring JSON output.
|
||||||
output_pydantic: Pydantic model for task output.
|
output_pydantic: Pydantic model for task output.
|
||||||
security_config: Security configuration including fingerprinting.
|
security_config: Security configuration including fingerprinting.
|
||||||
@@ -97,7 +98,7 @@ class Task(BaseModel):
|
|||||||
)
|
)
|
||||||
context: Union[List["Task"], None, _NotSpecified] = Field(
|
context: Union[List["Task"], None, _NotSpecified] = Field(
|
||||||
description="Other tasks that will have their output used as context for this task.",
|
description="Other tasks that will have their output used as context for this task.",
|
||||||
default=NOT_SPECIFIED
|
default=NOT_SPECIFIED,
|
||||||
)
|
)
|
||||||
async_execution: Optional[bool] = Field(
|
async_execution: Optional[bool] = Field(
|
||||||
description="Whether the task should be executed asynchronously or not.",
|
description="Whether the task should be executed asynchronously or not.",
|
||||||
@@ -115,6 +116,10 @@ class Task(BaseModel):
|
|||||||
description="A file path to be used to create a file output.",
|
description="A file path to be used to create a file output.",
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
create_directory: Optional[bool] = Field(
|
||||||
|
description="Whether to create the directory for output_file if it doesn't exist.",
|
||||||
|
default=True,
|
||||||
|
)
|
||||||
output: Optional[TaskOutput] = Field(
|
output: Optional[TaskOutput] = Field(
|
||||||
description="Task output, it's final result after being executed", default=None
|
description="Task output, it's final result after being executed", default=None
|
||||||
)
|
)
|
||||||
@@ -158,9 +163,7 @@ class Task(BaseModel):
|
|||||||
end_time: Optional[datetime.datetime] = Field(
|
end_time: Optional[datetime.datetime] = Field(
|
||||||
default=None, description="End time of the task execution"
|
default=None, description="End time of the task execution"
|
||||||
)
|
)
|
||||||
model_config = {
|
model_config = {"arbitrary_types_allowed": True}
|
||||||
"arbitrary_types_allowed": True
|
|
||||||
}
|
|
||||||
|
|
||||||
@field_validator("guardrail")
|
@field_validator("guardrail")
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -204,7 +207,6 @@ class Task(BaseModel):
|
|||||||
# Check return annotation if present, but don't require it
|
# Check return annotation if present, but don't require it
|
||||||
return_annotation = sig.return_annotation
|
return_annotation = sig.return_annotation
|
||||||
if return_annotation != inspect.Signature.empty:
|
if return_annotation != inspect.Signature.empty:
|
||||||
|
|
||||||
return_annotation_args = get_args(return_annotation)
|
return_annotation_args = get_args(return_annotation)
|
||||||
if not (
|
if not (
|
||||||
get_origin(return_annotation) is tuple
|
get_origin(return_annotation) is tuple
|
||||||
@@ -437,7 +439,7 @@ class Task(BaseModel):
|
|||||||
guardrail_result = process_guardrail(
|
guardrail_result = process_guardrail(
|
||||||
output=task_output,
|
output=task_output,
|
||||||
guardrail=self._guardrail,
|
guardrail=self._guardrail,
|
||||||
retry_count=self.retry_count
|
retry_count=self.retry_count,
|
||||||
)
|
)
|
||||||
if not guardrail_result.success:
|
if not guardrail_result.success:
|
||||||
if self.retry_count >= self.max_retries:
|
if self.retry_count >= self.max_retries:
|
||||||
@@ -510,8 +512,6 @@ class Task(BaseModel):
|
|||||||
)
|
)
|
||||||
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||||
|
|
||||||
result = self._guardrail(task_output)
|
|
||||||
|
|
||||||
crewai_event_bus.emit(
|
crewai_event_bus.emit(
|
||||||
self,
|
self,
|
||||||
LLMGuardrailStartedEvent(
|
LLMGuardrailStartedEvent(
|
||||||
@@ -519,7 +519,13 @@ class Task(BaseModel):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
guardrail_result = GuardrailResult.from_tuple(result)
|
try:
|
||||||
|
result = self._guardrail(task_output)
|
||||||
|
guardrail_result = GuardrailResult.from_tuple(result)
|
||||||
|
except Exception as e:
|
||||||
|
guardrail_result = GuardrailResult(
|
||||||
|
success=False, result=None, error=f"Guardrail execution error: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
crewai_event_bus.emit(
|
crewai_event_bus.emit(
|
||||||
self,
|
self,
|
||||||
@@ -752,8 +758,10 @@ Follow these guidelines:
|
|||||||
resolved_path = Path(self.output_file).expanduser().resolve()
|
resolved_path = Path(self.output_file).expanduser().resolve()
|
||||||
directory = resolved_path.parent
|
directory = resolved_path.parent
|
||||||
|
|
||||||
if not directory.exists():
|
if self.create_directory and not directory.exists():
|
||||||
directory.mkdir(parents=True, exist_ok=True)
|
directory.mkdir(parents=True, exist_ok=True)
|
||||||
|
elif not self.create_directory and not directory.exists():
|
||||||
|
raise RuntimeError(f"Directory {directory} does not exist and create_directory is False")
|
||||||
|
|
||||||
with resolved_path.open("w", encoding="utf-8") as file:
|
with resolved_path.open("w", encoding="utf-8") as file:
|
||||||
if isinstance(result, dict):
|
if isinstance(result, dict):
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
from typing import Any, Optional, Tuple
|
from typing import Any, Tuple
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai.agent import Agent, LiteAgentOutput
|
from crewai.agent import Agent, LiteAgentOutput
|
||||||
from crewai.llm import LLM
|
from crewai.llm import BaseLLM
|
||||||
from crewai.task import Task
|
|
||||||
from crewai.tasks.task_output import TaskOutput
|
from crewai.tasks.task_output import TaskOutput
|
||||||
|
|
||||||
|
|
||||||
@@ -32,11 +31,11 @@ class LLMGuardrail:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
description: str,
|
description: str,
|
||||||
llm: LLM,
|
llm: BaseLLM,
|
||||||
):
|
):
|
||||||
self.description = description
|
self.description = description
|
||||||
|
|
||||||
self.llm: LLM = llm
|
self.llm: BaseLLM = llm
|
||||||
|
|
||||||
def _validate_output(self, task_output: TaskOutput) -> LiteAgentOutput:
|
def _validate_output(self, task_output: TaskOutput) -> LiteAgentOutput:
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
|
|||||||
1
src/crewai/utilities/crew/__init__.py
Normal file
1
src/crewai/utilities/crew/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Crew-specific utilities."""
|
||||||
16
src/crewai/utilities/crew/crew_context.py
Normal file
16
src/crewai/utilities/crew/crew_context.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
"""Context management utilities for tracking crew and task execution context using OpenTelemetry baggage."""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from opentelemetry import baggage
|
||||||
|
|
||||||
|
from crewai.utilities.crew.models import CrewContext
|
||||||
|
|
||||||
|
|
||||||
|
def get_crew_context() -> Optional[CrewContext]:
|
||||||
|
"""Get the current crew context from OpenTelemetry baggage.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
CrewContext instance containing crew context information, or None if no context is set
|
||||||
|
"""
|
||||||
|
return baggage.get_baggage("crew_context")
|
||||||
16
src/crewai/utilities/crew/models.py
Normal file
16
src/crewai/utilities/crew/models.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
"""Models for crew-related data structures."""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CrewContext(BaseModel):
|
||||||
|
"""Model representing crew context information."""
|
||||||
|
|
||||||
|
id: Optional[str] = Field(
|
||||||
|
default=None, description="Unique identifier for the crew"
|
||||||
|
)
|
||||||
|
key: Optional[str] = Field(
|
||||||
|
default=None, description="Optional crew key/name for identification"
|
||||||
|
)
|
||||||
@@ -155,6 +155,7 @@ class CrewEvaluator:
|
|||||||
)
|
)
|
||||||
|
|
||||||
console = Console()
|
console = Console()
|
||||||
|
console.print("\n")
|
||||||
console.print(table)
|
console.print(table)
|
||||||
|
|
||||||
def evaluate(self, task_output: TaskOutput):
|
def evaluate(self, task_output: TaskOutput):
|
||||||
|
|||||||
@@ -17,6 +17,9 @@ from .agent_events import (
|
|||||||
AgentExecutionStartedEvent,
|
AgentExecutionStartedEvent,
|
||||||
AgentExecutionCompletedEvent,
|
AgentExecutionCompletedEvent,
|
||||||
AgentExecutionErrorEvent,
|
AgentExecutionErrorEvent,
|
||||||
|
AgentEvaluationStartedEvent,
|
||||||
|
AgentEvaluationCompletedEvent,
|
||||||
|
AgentEvaluationFailedEvent,
|
||||||
)
|
)
|
||||||
from .task_events import (
|
from .task_events import (
|
||||||
TaskStartedEvent,
|
TaskStartedEvent,
|
||||||
@@ -74,6 +77,9 @@ __all__ = [
|
|||||||
"AgentExecutionStartedEvent",
|
"AgentExecutionStartedEvent",
|
||||||
"AgentExecutionCompletedEvent",
|
"AgentExecutionCompletedEvent",
|
||||||
"AgentExecutionErrorEvent",
|
"AgentExecutionErrorEvent",
|
||||||
|
"AgentEvaluationStartedEvent",
|
||||||
|
"AgentEvaluationCompletedEvent",
|
||||||
|
"AgentEvaluationFailedEvent",
|
||||||
"TaskStartedEvent",
|
"TaskStartedEvent",
|
||||||
"TaskCompletedEvent",
|
"TaskCompletedEvent",
|
||||||
"TaskFailedEvent",
|
"TaskFailedEvent",
|
||||||
|
|||||||
@@ -123,3 +123,28 @@ class AgentLogsExecutionEvent(BaseEvent):
|
|||||||
type: str = "agent_logs_execution"
|
type: str = "agent_logs_execution"
|
||||||
|
|
||||||
model_config = {"arbitrary_types_allowed": True}
|
model_config = {"arbitrary_types_allowed": True}
|
||||||
|
|
||||||
|
# Agent Eval events
|
||||||
|
class AgentEvaluationStartedEvent(BaseEvent):
|
||||||
|
agent_id: str
|
||||||
|
agent_role: str
|
||||||
|
task_id: str | None = None
|
||||||
|
iteration: int
|
||||||
|
type: str = "agent_evaluation_started"
|
||||||
|
|
||||||
|
class AgentEvaluationCompletedEvent(BaseEvent):
|
||||||
|
agent_id: str
|
||||||
|
agent_role: str
|
||||||
|
task_id: str | None = None
|
||||||
|
iteration: int
|
||||||
|
metric_category: Any
|
||||||
|
score: Any
|
||||||
|
type: str = "agent_evaluation_completed"
|
||||||
|
|
||||||
|
class AgentEvaluationFailedEvent(BaseEvent):
|
||||||
|
agent_id: str
|
||||||
|
agent_role: str
|
||||||
|
task_id: str | None = None
|
||||||
|
iteration: int
|
||||||
|
error: str
|
||||||
|
type: str = "agent_evaluation_failed"
|
||||||
|
|||||||
@@ -22,6 +22,10 @@ from crewai.utilities.events.llm_events import (
|
|||||||
LLMCallStartedEvent,
|
LLMCallStartedEvent,
|
||||||
LLMStreamChunkEvent,
|
LLMStreamChunkEvent,
|
||||||
)
|
)
|
||||||
|
from crewai.utilities.events.llm_guardrail_events import (
|
||||||
|
LLMGuardrailStartedEvent,
|
||||||
|
LLMGuardrailCompletedEvent,
|
||||||
|
)
|
||||||
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
|
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
|
||||||
|
|
||||||
from .agent_events import (
|
from .agent_events import (
|
||||||
@@ -370,6 +374,23 @@ class EventListener(BaseEventListener):
|
|||||||
print(content, end="", flush=True)
|
print(content, end="", flush=True)
|
||||||
self.next_chunk = self.text_stream.tell()
|
self.next_chunk = self.text_stream.tell()
|
||||||
|
|
||||||
|
# ----------- LLM GUARDRAIL EVENTS -----------
|
||||||
|
|
||||||
|
@crewai_event_bus.on(LLMGuardrailStartedEvent)
|
||||||
|
def on_llm_guardrail_started(source, event: LLMGuardrailStartedEvent):
|
||||||
|
guardrail_str = str(event.guardrail)
|
||||||
|
guardrail_name = (
|
||||||
|
guardrail_str[:50] + "..." if len(guardrail_str) > 50 else guardrail_str
|
||||||
|
)
|
||||||
|
|
||||||
|
self.formatter.handle_guardrail_started(guardrail_name, event.retry_count)
|
||||||
|
|
||||||
|
@crewai_event_bus.on(LLMGuardrailCompletedEvent)
|
||||||
|
def on_llm_guardrail_completed(source, event: LLMGuardrailCompletedEvent):
|
||||||
|
self.formatter.handle_guardrail_completed(
|
||||||
|
event.success, event.error, event.retry_count
|
||||||
|
)
|
||||||
|
|
||||||
@crewai_event_bus.on(CrewTestStartedEvent)
|
@crewai_event_bus.on(CrewTestStartedEvent)
|
||||||
def on_crew_test_started(source, event: CrewTestStartedEvent):
|
def on_crew_test_started(source, event: CrewTestStartedEvent):
|
||||||
cloned_crew = source.copy()
|
cloned_crew = source.copy()
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from .agent_events import (
|
|||||||
AgentExecutionCompletedEvent,
|
AgentExecutionCompletedEvent,
|
||||||
AgentExecutionErrorEvent,
|
AgentExecutionErrorEvent,
|
||||||
AgentExecutionStartedEvent,
|
AgentExecutionStartedEvent,
|
||||||
|
LiteAgentExecutionCompletedEvent,
|
||||||
)
|
)
|
||||||
from .crew_events import (
|
from .crew_events import (
|
||||||
CrewKickoffCompletedEvent,
|
CrewKickoffCompletedEvent,
|
||||||
@@ -80,6 +81,7 @@ EventTypes = Union[
|
|||||||
CrewTrainFailedEvent,
|
CrewTrainFailedEvent,
|
||||||
AgentExecutionStartedEvent,
|
AgentExecutionStartedEvent,
|
||||||
AgentExecutionCompletedEvent,
|
AgentExecutionCompletedEvent,
|
||||||
|
LiteAgentExecutionCompletedEvent,
|
||||||
TaskStartedEvent,
|
TaskStartedEvent,
|
||||||
TaskCompletedEvent,
|
TaskCompletedEvent,
|
||||||
TaskFailedEvent,
|
TaskFailedEvent,
|
||||||
|
|||||||
@@ -48,8 +48,8 @@ class LLMCallStartedEvent(LLMEventBase):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
type: str = "llm_call_started"
|
type: str = "llm_call_started"
|
||||||
messages: Union[str, List[Dict[str, Any]]]
|
messages: Optional[Union[str, List[Dict[str, Any]]]] = None
|
||||||
tools: Optional[List[dict]] = None
|
tools: Optional[List[dict[str, Any]]] = None
|
||||||
callbacks: Optional[List[Any]] = None
|
callbacks: Optional[List[Any]] = None
|
||||||
available_functions: Optional[Dict[str, Any]] = None
|
available_functions: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
@@ -58,10 +58,10 @@ class LLMCallCompletedEvent(LLMEventBase):
|
|||||||
"""Event emitted when a LLM call completes"""
|
"""Event emitted when a LLM call completes"""
|
||||||
|
|
||||||
type: str = "llm_call_completed"
|
type: str = "llm_call_completed"
|
||||||
|
messages: str | list[dict[str, Any]] | None = None
|
||||||
response: Any
|
response: Any
|
||||||
call_type: LLMCallType
|
call_type: LLMCallType
|
||||||
|
|
||||||
|
|
||||||
class LLMCallFailedEvent(LLMEventBase):
|
class LLMCallFailedEvent(LLMEventBase):
|
||||||
"""Event emitted when a LLM call fails"""
|
"""Event emitted when a LLM call fails"""
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
from inspect import getsource
|
||||||
from typing import Any, Callable, Optional, Union
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
from crewai.utilities.events.base_events import BaseEvent
|
from crewai.utilities.events.base_events import BaseEvent
|
||||||
@@ -16,23 +17,26 @@ class LLMGuardrailStartedEvent(BaseEvent):
|
|||||||
retry_count: int
|
retry_count: int
|
||||||
|
|
||||||
def __init__(self, **data):
|
def __init__(self, **data):
|
||||||
from inspect import getsource
|
|
||||||
|
|
||||||
from crewai.tasks.llm_guardrail import LLMGuardrail
|
from crewai.tasks.llm_guardrail import LLMGuardrail
|
||||||
from crewai.tasks.hallucination_guardrail import HallucinationGuardrail
|
from crewai.tasks.hallucination_guardrail import HallucinationGuardrail
|
||||||
|
|
||||||
super().__init__(**data)
|
super().__init__(**data)
|
||||||
|
|
||||||
if isinstance(self.guardrail, LLMGuardrail) or isinstance(
|
if isinstance(self.guardrail, (LLMGuardrail, HallucinationGuardrail)):
|
||||||
self.guardrail, HallucinationGuardrail
|
|
||||||
):
|
|
||||||
self.guardrail = self.guardrail.description.strip()
|
self.guardrail = self.guardrail.description.strip()
|
||||||
elif isinstance(self.guardrail, Callable):
|
elif isinstance(self.guardrail, Callable):
|
||||||
self.guardrail = getsource(self.guardrail).strip()
|
self.guardrail = getsource(self.guardrail).strip()
|
||||||
|
|
||||||
|
|
||||||
class LLMGuardrailCompletedEvent(BaseEvent):
|
class LLMGuardrailCompletedEvent(BaseEvent):
|
||||||
"""Event emitted when a guardrail task completes"""
|
"""Event emitted when a guardrail task completes
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
success: Whether the guardrail validation passed
|
||||||
|
result: The validation result
|
||||||
|
error: Error message if validation failed
|
||||||
|
retry_count: The number of times the guardrail has been retried
|
||||||
|
"""
|
||||||
|
|
||||||
type: str = "llm_guardrail_completed"
|
type: str = "llm_guardrail_completed"
|
||||||
success: bool
|
success: bool
|
||||||
|
|||||||
@@ -1473,9 +1473,7 @@ class ConsoleFormatter:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
memory_branch = branch_to_use.add("")
|
memory_branch = branch_to_use.add("")
|
||||||
self.update_tree_label(
|
self.update_tree_label(memory_branch, "🧠", "Memory Retrieval Started", "blue")
|
||||||
memory_branch, "🧠", "Memory Retrieval Started", "blue"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.print(tree_to_use)
|
self.print(tree_to_use)
|
||||||
self.print()
|
self.print()
|
||||||
@@ -1549,7 +1547,6 @@ class ConsoleFormatter:
|
|||||||
if memory_content:
|
if memory_content:
|
||||||
add_panel()
|
add_panel()
|
||||||
|
|
||||||
|
|
||||||
def handle_memory_query_completed(
|
def handle_memory_query_completed(
|
||||||
self,
|
self,
|
||||||
agent_branch: Optional[Tree],
|
agent_branch: Optional[Tree],
|
||||||
@@ -1616,11 +1613,8 @@ class ConsoleFormatter:
|
|||||||
sources_branch.add(f"❌ {memory_type} - Error: {error}")
|
sources_branch.add(f"❌ {memory_type} - Error: {error}")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def handle_memory_save_started(
|
def handle_memory_save_started(
|
||||||
self,
|
self, agent_branch: Optional[Tree], crew_tree: Optional[Tree]
|
||||||
agent_branch: Optional[Tree],
|
|
||||||
crew_tree: Optional[Tree]
|
|
||||||
) -> None:
|
) -> None:
|
||||||
if not self.verbose:
|
if not self.verbose:
|
||||||
return None
|
return None
|
||||||
@@ -1633,7 +1627,7 @@ class ConsoleFormatter:
|
|||||||
|
|
||||||
for child in tree_to_use.children:
|
for child in tree_to_use.children:
|
||||||
if "Memory Update" in str(child.label):
|
if "Memory Update" in str(child.label):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
memory_branch = tree_to_use.add("")
|
memory_branch = tree_to_use.add("")
|
||||||
self.update_tree_label(
|
self.update_tree_label(
|
||||||
@@ -1700,4 +1694,62 @@ class ConsoleFormatter:
|
|||||||
memory_branch.add(content)
|
memory_branch.add(content)
|
||||||
|
|
||||||
self.print(tree_to_use)
|
self.print(tree_to_use)
|
||||||
self.print()
|
self.print()
|
||||||
|
|
||||||
|
def handle_guardrail_started(
|
||||||
|
self,
|
||||||
|
guardrail_name: str,
|
||||||
|
retry_count: int,
|
||||||
|
) -> None:
|
||||||
|
"""Display guardrail evaluation started status.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
guardrail_name: Name/description of the guardrail being evaluated.
|
||||||
|
retry_count: Zero-based retry count (0 = first attempt).
|
||||||
|
"""
|
||||||
|
if not self.verbose:
|
||||||
|
return
|
||||||
|
|
||||||
|
content = self.create_status_content(
|
||||||
|
"Guardrail Evaluation Started",
|
||||||
|
guardrail_name,
|
||||||
|
"yellow",
|
||||||
|
Status="🔄 Evaluating",
|
||||||
|
Attempt=f"{retry_count + 1}",
|
||||||
|
)
|
||||||
|
self.print_panel(content, "🛡️ Guardrail Check", "yellow")
|
||||||
|
|
||||||
|
def handle_guardrail_completed(
|
||||||
|
self,
|
||||||
|
success: bool,
|
||||||
|
error: Optional[str],
|
||||||
|
retry_count: int,
|
||||||
|
) -> None:
|
||||||
|
"""Display guardrail evaluation result.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
success: Whether validation passed.
|
||||||
|
error: Error message if validation failed.
|
||||||
|
retry_count: Zero-based retry count.
|
||||||
|
"""
|
||||||
|
if not self.verbose:
|
||||||
|
return
|
||||||
|
|
||||||
|
if success:
|
||||||
|
content = self.create_status_content(
|
||||||
|
"Guardrail Passed",
|
||||||
|
"Validation Successful",
|
||||||
|
"green",
|
||||||
|
Status="✅ Validated",
|
||||||
|
Attempts=f"{retry_count + 1}",
|
||||||
|
)
|
||||||
|
self.print_panel(content, "🛡️ Guardrail Success", "green")
|
||||||
|
else:
|
||||||
|
content = self.create_status_content(
|
||||||
|
"Guardrail Failed",
|
||||||
|
"Validation Error",
|
||||||
|
"red",
|
||||||
|
Error=str(error) if error else "Unknown error",
|
||||||
|
Attempts=f"{retry_count + 1}",
|
||||||
|
)
|
||||||
|
self.print_panel(content, "🛡️ Guardrail Failed", "red")
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user