Compare commits

..

1 Commits

Author SHA1 Message Date
Lucas Gomide
df4754301a docs: add docs about Memory Events 2025-07-02 12:04:17 -03:00
99 changed files with 3315 additions and 8502 deletions

View File

@@ -260,7 +260,7 @@ def handle_success(self):
# Handle success case # Handle success case
pass pass
@listen("failure_path") @listen("failure_path")
def handle_failure(self): def handle_failure(self):
# Handle failure case # Handle failure case
pass pass
@@ -288,7 +288,7 @@ class SelectiveFlow(Flow):
def critical_step(self): def critical_step(self):
# Only this method's state is persisted # Only this method's state is persisted
self.state["important_data"] = "value" self.state["important_data"] = "value"
@start() @start()
def temporary_step(self): def temporary_step(self):
# This method's state is not persisted # This method's state is not persisted
@@ -322,20 +322,20 @@ flow.plot("workflow_diagram") # Generates HTML visualization
class CyclicFlow(Flow): class CyclicFlow(Flow):
max_iterations = 5 max_iterations = 5
current_iteration = 0 current_iteration = 0
@start("loop") @start("loop")
def process_iteration(self): def process_iteration(self):
if self.current_iteration >= self.max_iterations: if self.current_iteration >= self.max_iterations:
return return
# Process current iteration # Process current iteration
self.current_iteration += 1 self.current_iteration += 1
@router(process_iteration) @router(process_iteration)
def check_continue(self): def check_continue(self):
if self.current_iteration < self.max_iterations: if self.current_iteration < self.max_iterations:
return "loop" # Continue cycling return "loop" # Continue cycling
return "complete" return "complete"
@listen("complete") @listen("complete")
def finalize(self): def finalize(self):
# Final processing # Final processing
@@ -369,7 +369,7 @@ def risky_operation(self):
self.state["success"] = False self.state["success"] = False
return None return None
@listen(risky_operation) @listen(risky_operation)
def handle_result(self, result): def handle_result(self, result):
if self.state.get("success", False): if self.state.get("success", False):
# Handle success case # Handle success case
@@ -390,7 +390,7 @@ class CrewOrchestrationFlow(Flow[WorkflowState]):
result = research_crew.crew().kickoff(inputs={"topic": self.state.research_topic}) result = research_crew.crew().kickoff(inputs={"topic": self.state.research_topic})
self.state.research_results = result.raw self.state.research_results = result.raw
return result return result
@listen(research_phase) @listen(research_phase)
def analysis_phase(self, research_results): def analysis_phase(self, research_results):
analysis_crew = AnalysisCrew() analysis_crew = AnalysisCrew()
@@ -400,13 +400,13 @@ class CrewOrchestrationFlow(Flow[WorkflowState]):
}) })
self.state.analysis_results = result.raw self.state.analysis_results = result.raw
return result return result
@router(analysis_phase) @router(analysis_phase)
def decide_next_action(self): def decide_next_action(self):
if self.state.analysis_results.confidence > 0.7: if self.state.analysis_results.confidence > 0.7:
return "generate_report" return "generate_report"
return "additional_research" return "additional_research"
@listen("generate_report") @listen("generate_report")
def final_report(self): def final_report(self):
reporting_crew = ReportingCrew() reporting_crew = ReportingCrew()
@@ -439,7 +439,7 @@ class CrewOrchestrationFlow(Flow[WorkflowState]):
## CrewAI Version Compatibility: ## CrewAI Version Compatibility:
- Stay updated with CrewAI releases for new features and bug fixes - Stay updated with CrewAI releases for new features and bug fixes
- Test crew functionality when upgrading CrewAI versions - Test crew functionality when upgrading CrewAI versions
- Use version constraints in pyproject.toml (e.g., "crewai[tools]>=0.140.0,<1.0.0") - Use version constraints in pyproject.toml (e.g., "crewai[tools]>=0.134.0,<1.0.0")
- Monitor deprecation warnings for future compatibility - Monitor deprecation warnings for future compatibility
## Code Examples and Implementation Patterns ## Code Examples and Implementation Patterns
@@ -464,22 +464,22 @@ class ResearchOutput(BaseModel):
@CrewBase @CrewBase
class ResearchCrew(): class ResearchCrew():
"""Advanced research crew with structured outputs and validation""" """Advanced research crew with structured outputs and validation"""
agents: List[BaseAgent] agents: List[BaseAgent]
tasks: List[Task] tasks: List[Task]
@before_kickoff @before_kickoff
def setup_environment(self): def setup_environment(self):
"""Initialize environment before crew execution""" """Initialize environment before crew execution"""
print("🚀 Setting up research environment...") print("🚀 Setting up research environment...")
# Validate API keys, create directories, etc. # Validate API keys, create directories, etc.
@after_kickoff @after_kickoff
def cleanup_and_report(self, output): def cleanup_and_report(self, output):
"""Handle post-execution tasks""" """Handle post-execution tasks"""
print(f"✅ Research completed. Generated {len(output.tasks_output)} task outputs") print(f"✅ Research completed. Generated {len(output.tasks_output)} task outputs")
print(f"📊 Token usage: {output.token_usage}") print(f"📊 Token usage: {output.token_usage}")
@agent @agent
def researcher(self) -> Agent: def researcher(self) -> Agent:
return Agent( return Agent(
@@ -490,7 +490,7 @@ class ResearchCrew():
max_iter=15, max_iter=15,
max_execution_time=1800 max_execution_time=1800
) )
@agent @agent
def analyst(self) -> Agent: def analyst(self) -> Agent:
return Agent( return Agent(
@@ -499,7 +499,7 @@ class ResearchCrew():
verbose=True, verbose=True,
memory=True memory=True
) )
@task @task
def research_task(self) -> Task: def research_task(self) -> Task:
return Task( return Task(
@@ -507,7 +507,7 @@ class ResearchCrew():
agent=self.researcher(), agent=self.researcher(),
output_pydantic=ResearchOutput output_pydantic=ResearchOutput
) )
@task @task
def validation_task(self) -> Task: def validation_task(self) -> Task:
return Task( return Task(
@@ -517,7 +517,7 @@ class ResearchCrew():
guardrail=self.validate_research_quality, guardrail=self.validate_research_quality,
max_retries=3 max_retries=3
) )
def validate_research_quality(self, output) -> tuple[bool, str]: def validate_research_quality(self, output) -> tuple[bool, str]:
"""Custom guardrail to ensure research quality""" """Custom guardrail to ensure research quality"""
content = output.raw content = output.raw
@@ -526,7 +526,7 @@ class ResearchCrew():
if not any(keyword in content.lower() for keyword in ['conclusion', 'finding', 'result']): if not any(keyword in content.lower() for keyword in ['conclusion', 'finding', 'result']):
return False, "Missing key analytical elements." return False, "Missing key analytical elements."
return True, content return True, content
@crew @crew
def crew(self) -> Crew: def crew(self) -> Crew:
return Crew( return Crew(
@@ -557,13 +557,13 @@ class RobustSearchTool(BaseTool):
name: str = "robust_search" name: str = "robust_search"
description: str = "Perform web search with retry logic and error handling" description: str = "Perform web search with retry logic and error handling"
args_schema: Type[BaseModel] = SearchInput args_schema: Type[BaseModel] = SearchInput
def __init__(self, api_key: Optional[str] = None, **kwargs): def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.api_key = api_key or os.getenv("SEARCH_API_KEY") self.api_key = api_key or os.getenv("SEARCH_API_KEY")
self.rate_limit_delay = 1.0 self.rate_limit_delay = 1.0
self.last_request_time = 0 self.last_request_time = 0
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10) wait=wait_exponential(multiplier=1, min=4, max=10)
@@ -575,43 +575,43 @@ class RobustSearchTool(BaseTool):
time_since_last = time.time() - self.last_request_time time_since_last = time.time() - self.last_request_time
if time_since_last < self.rate_limit_delay: if time_since_last < self.rate_limit_delay:
time.sleep(self.rate_limit_delay - time_since_last) time.sleep(self.rate_limit_delay - time_since_last)
# Input validation # Input validation
if not query or len(query.strip()) == 0: if not query or len(query.strip()) == 0:
return "Error: Empty search query provided" return "Error: Empty search query provided"
if len(query) > 500: if len(query) > 500:
return "Error: Search query too long (max 500 characters)" return "Error: Search query too long (max 500 characters)"
# Perform search # Perform search
results = self._perform_search(query, max_results, timeout) results = self._perform_search(query, max_results, timeout)
self.last_request_time = time.time() self.last_request_time = time.time()
return self._format_results(results) return self._format_results(results)
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
return f"Search timed out after {timeout} seconds" return f"Search timed out after {timeout} seconds"
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
return f"Search failed due to network error: {str(e)}" return f"Search failed due to network error: {str(e)}"
except Exception as e: except Exception as e:
return f"Unexpected error during search: {str(e)}" return f"Unexpected error during search: {str(e)}"
def _perform_search(self, query: str, max_results: int, timeout: int) -> List[dict]: def _perform_search(self, query: str, max_results: int, timeout: int) -> List[dict]:
"""Implement actual search logic here""" """Implement actual search logic here"""
# Your search API implementation # Your search API implementation
pass pass
def _format_results(self, results: List[dict]) -> str: def _format_results(self, results: List[dict]) -> str:
"""Format search results for LLM consumption""" """Format search results for LLM consumption"""
if not results: if not results:
return "No results found for the given query." return "No results found for the given query."
formatted = "Search Results:\n\n" formatted = "Search Results:\n\n"
for i, result in enumerate(results[:10], 1): for i, result in enumerate(results[:10], 1):
formatted += f"{i}. {result.get('title', 'No title')}\n" formatted += f"{i}. {result.get('title', 'No title')}\n"
formatted += f" URL: {result.get('url', 'No URL')}\n" formatted += f" URL: {result.get('url', 'No URL')}\n"
formatted += f" Summary: {result.get('snippet', 'No summary')}\n\n" formatted += f" Summary: {result.get('snippet', 'No summary')}\n\n"
return formatted return formatted
``` ```
@@ -623,20 +623,20 @@ from crewai.memory.storage.mem0_storage import Mem0Storage
class AdvancedMemoryManager: class AdvancedMemoryManager:
"""Enhanced memory management for CrewAI applications""" """Enhanced memory management for CrewAI applications"""
def __init__(self, crew, config: dict = None): def __init__(self, crew, config: dict = None):
self.crew = crew self.crew = crew
self.config = config or {} self.config = config or {}
self.setup_memory_systems() self.setup_memory_systems()
def setup_memory_systems(self): def setup_memory_systems(self):
"""Configure multiple memory systems""" """Configure multiple memory systems"""
# Short-term memory for current session # Short-term memory for current session
self.short_term = ShortTermMemory() self.short_term = ShortTermMemory()
# Long-term memory for cross-session persistence # Long-term memory for cross-session persistence
self.long_term = LongTermMemory() self.long_term = LongTermMemory()
# External memory with Mem0 (if configured) # External memory with Mem0 (if configured)
if self.config.get('use_external_memory'): if self.config.get('use_external_memory'):
self.external = ExternalMemory.create_storage( self.external = ExternalMemory.create_storage(
@@ -649,8 +649,8 @@ class AdvancedMemoryManager:
} }
} }
) )
def save_with_context(self, content: str, memory_type: str = "short_term", def save_with_context(self, content: str, memory_type: str = "short_term",
metadata: dict = None, agent: str = None): metadata: dict = None, agent: str = None):
"""Save content with enhanced metadata""" """Save content with enhanced metadata"""
enhanced_metadata = { enhanced_metadata = {
@@ -659,14 +659,14 @@ class AdvancedMemoryManager:
"crew_type": self.crew.__class__.__name__, "crew_type": self.crew.__class__.__name__,
**(metadata or {}) **(metadata or {})
} }
if memory_type == "short_term": if memory_type == "short_term":
self.short_term.save(content, enhanced_metadata, agent) self.short_term.save(content, enhanced_metadata, agent)
elif memory_type == "long_term": elif memory_type == "long_term":
self.long_term.save(content, enhanced_metadata, agent) self.long_term.save(content, enhanced_metadata, agent)
elif memory_type == "external" and hasattr(self, 'external'): elif memory_type == "external" and hasattr(self, 'external'):
self.external.save(content, enhanced_metadata, agent) self.external.save(content, enhanced_metadata, agent)
def search_across_memories(self, query: str, limit: int = 5) -> dict: def search_across_memories(self, query: str, limit: int = 5) -> dict:
"""Search across all memory systems""" """Search across all memory systems"""
results = { results = {
@@ -674,23 +674,23 @@ class AdvancedMemoryManager:
"long_term": [], "long_term": [],
"external": [] "external": []
} }
# Search short-term memory # Search short-term memory
results["short_term"] = self.short_term.search(query, limit=limit) results["short_term"] = self.short_term.search(query, limit=limit)
# Search long-term memory # Search long-term memory
results["long_term"] = self.long_term.search(query, limit=limit) results["long_term"] = self.long_term.search(query, limit=limit)
# Search external memory (if available) # Search external memory (if available)
if hasattr(self, 'external'): if hasattr(self, 'external'):
results["external"] = self.external.search(query, limit=limit) results["external"] = self.external.search(query, limit=limit)
return results return results
def cleanup_old_memories(self, days_threshold: int = 30): def cleanup_old_memories(self, days_threshold: int = 30):
"""Clean up old memories based on age""" """Clean up old memories based on age"""
cutoff_time = time.time() - (days_threshold * 24 * 60 * 60) cutoff_time = time.time() - (days_threshold * 24 * 60 * 60)
# Implement cleanup logic based on timestamps in metadata # Implement cleanup logic based on timestamps in metadata
# This would vary based on your specific storage implementation # This would vary based on your specific storage implementation
pass pass
@@ -719,12 +719,12 @@ class TaskMetrics:
class CrewMonitor: class CrewMonitor:
"""Comprehensive monitoring for CrewAI applications""" """Comprehensive monitoring for CrewAI applications"""
def __init__(self, crew_name: str, log_level: str = "INFO"): def __init__(self, crew_name: str, log_level: str = "INFO"):
self.crew_name = crew_name self.crew_name = crew_name
self.metrics: List[TaskMetrics] = [] self.metrics: List[TaskMetrics] = []
self.session_start = time.time() self.session_start = time.time()
# Setup logging # Setup logging
logging.basicConfig( logging.basicConfig(
level=getattr(logging, log_level), level=getattr(logging, log_level),
@@ -735,7 +735,7 @@ class CrewMonitor:
] ]
) )
self.logger = logging.getLogger(f"CrewAI.{crew_name}") self.logger = logging.getLogger(f"CrewAI.{crew_name}")
def start_task_monitoring(self, task_name: str, agent_name: str) -> dict: def start_task_monitoring(self, task_name: str, agent_name: str) -> dict:
"""Start monitoring a task execution""" """Start monitoring a task execution"""
context = { context = {
@@ -743,16 +743,16 @@ class CrewMonitor:
"agent_name": agent_name, "agent_name": agent_name,
"start_time": time.time() "start_time": time.time()
} }
self.logger.info(f"Task started: {task_name} by {agent_name}") self.logger.info(f"Task started: {task_name} by {agent_name}")
return context return context
def end_task_monitoring(self, context: dict, success: bool = True, def end_task_monitoring(self, context: dict, success: bool = True,
tokens_used: int = 0, error: str = None): tokens_used: int = 0, error: str = None):
"""End monitoring and record metrics""" """End monitoring and record metrics"""
end_time = time.time() end_time = time.time()
duration = end_time - context["start_time"] duration = end_time - context["start_time"]
# Get memory usage (if psutil is available) # Get memory usage (if psutil is available)
memory_usage = None memory_usage = None
try: try:
@@ -761,7 +761,7 @@ class CrewMonitor:
memory_usage = process.memory_info().rss / 1024 / 1024 # MB memory_usage = process.memory_info().rss / 1024 / 1024 # MB
except ImportError: except ImportError:
pass pass
metrics = TaskMetrics( metrics = TaskMetrics(
task_name=context["task_name"], task_name=context["task_name"],
agent_name=context["agent_name"], agent_name=context["agent_name"],
@@ -773,29 +773,29 @@ class CrewMonitor:
error_message=error, error_message=error,
memory_usage_mb=memory_usage memory_usage_mb=memory_usage
) )
self.metrics.append(metrics) self.metrics.append(metrics)
# Log the completion # Log the completion
status = "SUCCESS" if success else "FAILED" status = "SUCCESS" if success else "FAILED"
self.logger.info(f"Task {status}: {context['task_name']} " self.logger.info(f"Task {status}: {context['task_name']} "
f"(Duration: {duration:.2f}s, Tokens: {tokens_used})") f"(Duration: {duration:.2f}s, Tokens: {tokens_used})")
if error: if error:
self.logger.error(f"Task error: {error}") self.logger.error(f"Task error: {error}")
def get_performance_summary(self) -> Dict[str, Any]: def get_performance_summary(self) -> Dict[str, Any]:
"""Generate comprehensive performance summary""" """Generate comprehensive performance summary"""
if not self.metrics: if not self.metrics:
return {"message": "No metrics recorded yet"} return {"message": "No metrics recorded yet"}
successful_tasks = [m for m in self.metrics if m.success] successful_tasks = [m for m in self.metrics if m.success]
failed_tasks = [m for m in self.metrics if not m.success] failed_tasks = [m for m in self.metrics if not m.success]
total_duration = sum(m.duration for m in self.metrics) total_duration = sum(m.duration for m in self.metrics)
total_tokens = sum(m.tokens_used for m in self.metrics) total_tokens = sum(m.tokens_used for m in self.metrics)
avg_duration = total_duration / len(self.metrics) avg_duration = total_duration / len(self.metrics)
return { return {
"crew_name": self.crew_name, "crew_name": self.crew_name,
"session_duration": time.time() - self.session_start, "session_duration": time.time() - self.session_start,
@@ -811,7 +811,7 @@ class CrewMonitor:
"most_token_intensive": max(self.metrics, key=lambda x: x.tokens_used).task_name if self.metrics else None, "most_token_intensive": max(self.metrics, key=lambda x: x.tokens_used).task_name if self.metrics else None,
"common_errors": self._get_common_errors() "common_errors": self._get_common_errors()
} }
def _get_common_errors(self) -> Dict[str, int]: def _get_common_errors(self) -> Dict[str, int]:
"""Get frequency of common errors""" """Get frequency of common errors"""
error_counts = {} error_counts = {}
@@ -819,20 +819,20 @@ class CrewMonitor:
if metric.error_message: if metric.error_message:
error_counts[metric.error_message] = error_counts.get(metric.error_message, 0) + 1 error_counts[metric.error_message] = error_counts.get(metric.error_message, 0) + 1
return dict(sorted(error_counts.items(), key=lambda x: x[1], reverse=True)) return dict(sorted(error_counts.items(), key=lambda x: x[1], reverse=True))
def export_metrics(self, filename: str = None) -> str: def export_metrics(self, filename: str = None) -> str:
"""Export metrics to JSON file""" """Export metrics to JSON file"""
if not filename: if not filename:
filename = f"crew_metrics_{self.crew_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" filename = f"crew_metrics_{self.crew_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
export_data = { export_data = {
"summary": self.get_performance_summary(), "summary": self.get_performance_summary(),
"detailed_metrics": [asdict(m) for m in self.metrics] "detailed_metrics": [asdict(m) for m in self.metrics]
} }
with open(filename, 'w') as f: with open(filename, 'w') as f:
json.dump(export_data, f, indent=2, default=str) json.dump(export_data, f, indent=2, default=str)
self.logger.info(f"Metrics exported to {filename}") self.logger.info(f"Metrics exported to {filename}")
return filename return filename
@@ -847,10 +847,10 @@ def monitored_research_task(self) -> Task:
if context: if context:
tokens = getattr(task_output, 'token_usage', {}).get('total', 0) tokens = getattr(task_output, 'token_usage', {}).get('total', 0)
monitor.end_task_monitoring(context, success=True, tokens_used=tokens) monitor.end_task_monitoring(context, success=True, tokens_used=tokens)
# Start monitoring would be called before task execution # Start monitoring would be called before task execution
# This is a simplified example - in practice you'd integrate this into the task execution flow # This is a simplified example - in practice you'd integrate this into the task execution flow
return Task( return Task(
config=self.tasks_config['research_task'], config=self.tasks_config['research_task'],
agent=self.researcher(), agent=self.researcher(),
@@ -872,7 +872,7 @@ class ErrorSeverity(Enum):
class CrewError(Exception): class CrewError(Exception):
"""Base exception for CrewAI applications""" """Base exception for CrewAI applications"""
def __init__(self, message: str, severity: ErrorSeverity = ErrorSeverity.MEDIUM, def __init__(self, message: str, severity: ErrorSeverity = ErrorSeverity.MEDIUM,
context: dict = None): context: dict = None):
super().__init__(message) super().__init__(message)
self.severity = severity self.severity = severity
@@ -893,19 +893,19 @@ class ConfigurationError(CrewError):
class ErrorHandler: class ErrorHandler:
"""Centralized error handling for CrewAI applications""" """Centralized error handling for CrewAI applications"""
def __init__(self, crew_name: str): def __init__(self, crew_name: str):
self.crew_name = crew_name self.crew_name = crew_name
self.error_log: List[CrewError] = [] self.error_log: List[CrewError] = []
self.recovery_strategies: Dict[type, Callable] = {} self.recovery_strategies: Dict[type, Callable] = {}
def register_recovery_strategy(self, error_type: type, strategy: Callable): def register_recovery_strategy(self, error_type: type, strategy: Callable):
"""Register a recovery strategy for specific error types""" """Register a recovery strategy for specific error types"""
self.recovery_strategies[error_type] = strategy self.recovery_strategies[error_type] = strategy
def handle_error(self, error: Exception, context: dict = None) -> Any: def handle_error(self, error: Exception, context: dict = None) -> Any:
"""Handle errors with appropriate recovery strategies""" """Handle errors with appropriate recovery strategies"""
# Convert to CrewError if needed # Convert to CrewError if needed
if not isinstance(error, CrewError): if not isinstance(error, CrewError):
crew_error = CrewError( crew_error = CrewError(
@@ -915,11 +915,11 @@ class ErrorHandler:
) )
else: else:
crew_error = error crew_error = error
# Log the error # Log the error
self.error_log.append(crew_error) self.error_log.append(crew_error)
self._log_error(crew_error) self._log_error(crew_error)
# Apply recovery strategy if available # Apply recovery strategy if available
error_type = type(error) error_type = type(error)
if error_type in self.recovery_strategies: if error_type in self.recovery_strategies:
@@ -931,21 +931,21 @@ class ErrorHandler:
ErrorSeverity.HIGH, ErrorSeverity.HIGH,
{"original_error": str(error), "recovery_error": str(recovery_error)} {"original_error": str(error), "recovery_error": str(recovery_error)}
)) ))
# If critical, re-raise # If critical, re-raise
if crew_error.severity == ErrorSeverity.CRITICAL: if crew_error.severity == ErrorSeverity.CRITICAL:
raise crew_error raise crew_error
return None return None
def _log_error(self, error: CrewError): def _log_error(self, error: CrewError):
"""Log error with appropriate level based on severity""" """Log error with appropriate level based on severity"""
logger = logging.getLogger(f"CrewAI.{self.crew_name}.ErrorHandler") logger = logging.getLogger(f"CrewAI.{self.crew_name}.ErrorHandler")
error_msg = f"[{error.severity.value.upper()}] {error}" error_msg = f"[{error.severity.value.upper()}] {error}"
if error.context: if error.context:
error_msg += f" | Context: {error.context}" error_msg += f" | Context: {error.context}"
if error.severity in [ErrorSeverity.HIGH, ErrorSeverity.CRITICAL]: if error.severity in [ErrorSeverity.HIGH, ErrorSeverity.CRITICAL]:
logger.error(error_msg) logger.error(error_msg)
logger.error(f"Stack trace: {traceback.format_exc()}") logger.error(f"Stack trace: {traceback.format_exc()}")
@@ -953,16 +953,16 @@ class ErrorHandler:
logger.warning(error_msg) logger.warning(error_msg)
else: else:
logger.info(error_msg) logger.info(error_msg)
def get_error_summary(self) -> Dict[str, Any]: def get_error_summary(self) -> Dict[str, Any]:
"""Get summary of errors encountered""" """Get summary of errors encountered"""
if not self.error_log: if not self.error_log:
return {"total_errors": 0} return {"total_errors": 0}
severity_counts = {} severity_counts = {}
for error in self.error_log: for error in self.error_log:
severity_counts[error.severity.value] = severity_counts.get(error.severity.value, 0) + 1 severity_counts[error.severity.value] = severity_counts.get(error.severity.value, 0) + 1
return { return {
"total_errors": len(self.error_log), "total_errors": len(self.error_log),
"severity_breakdown": severity_counts, "severity_breakdown": severity_counts,
@@ -1004,7 +1004,7 @@ def robust_task(self) -> Task:
# Use fallback response # Use fallback response
return "Task failed, using fallback response" return "Task failed, using fallback response"
return wrapper return wrapper
return Task( return Task(
config=self.tasks_config['research_task'], config=self.tasks_config['research_task'],
agent=self.researcher() agent=self.researcher()
@@ -1020,60 +1020,60 @@ from pydantic import BaseSettings, Field, validator
class Environment(str, Enum): class Environment(str, Enum):
DEVELOPMENT = "development" DEVELOPMENT = "development"
TESTING = "testing" TESTING = "testing"
STAGING = "staging" STAGING = "staging"
PRODUCTION = "production" PRODUCTION = "production"
class CrewAISettings(BaseSettings): class CrewAISettings(BaseSettings):
"""Comprehensive settings management for CrewAI applications""" """Comprehensive settings management for CrewAI applications"""
# Environment # Environment
environment: Environment = Field(default=Environment.DEVELOPMENT) environment: Environment = Field(default=Environment.DEVELOPMENT)
debug: bool = Field(default=True) debug: bool = Field(default=True)
# API Keys (loaded from environment) # API Keys (loaded from environment)
openai_api_key: Optional[str] = Field(default=None, env="OPENAI_API_KEY") openai_api_key: Optional[str] = Field(default=None, env="OPENAI_API_KEY")
anthropic_api_key: Optional[str] = Field(default=None, env="ANTHROPIC_API_KEY") anthropic_api_key: Optional[str] = Field(default=None, env="ANTHROPIC_API_KEY")
serper_api_key: Optional[str] = Field(default=None, env="SERPER_API_KEY") serper_api_key: Optional[str] = Field(default=None, env="SERPER_API_KEY")
mem0_api_key: Optional[str] = Field(default=None, env="MEM0_API_KEY") mem0_api_key: Optional[str] = Field(default=None, env="MEM0_API_KEY")
# CrewAI Configuration # CrewAI Configuration
crew_max_rpm: int = Field(default=100) crew_max_rpm: int = Field(default=100)
crew_max_execution_time: int = Field(default=3600) # 1 hour crew_max_execution_time: int = Field(default=3600) # 1 hour
default_llm_model: str = Field(default="gpt-4") default_llm_model: str = Field(default="gpt-4")
fallback_llm_model: str = Field(default="gpt-3.5-turbo") fallback_llm_model: str = Field(default="gpt-3.5-turbo")
# Memory and Storage # Memory and Storage
crewai_storage_dir: str = Field(default="./storage", env="CREWAI_STORAGE_DIR") crewai_storage_dir: str = Field(default="./storage", env="CREWAI_STORAGE_DIR")
memory_enabled: bool = Field(default=True) memory_enabled: bool = Field(default=True)
memory_cleanup_interval: int = Field(default=86400) # 24 hours in seconds memory_cleanup_interval: int = Field(default=86400) # 24 hours in seconds
# Performance # Performance
enable_caching: bool = Field(default=True) enable_caching: bool = Field(default=True)
max_retries: int = Field(default=3) max_retries: int = Field(default=3)
retry_delay: float = Field(default=1.0) retry_delay: float = Field(default=1.0)
# Monitoring # Monitoring
enable_monitoring: bool = Field(default=True) enable_monitoring: bool = Field(default=True)
log_level: str = Field(default="INFO") log_level: str = Field(default="INFO")
metrics_export_interval: int = Field(default=3600) # 1 hour metrics_export_interval: int = Field(default=3600) # 1 hour
# Security # Security
input_sanitization: bool = Field(default=True) input_sanitization: bool = Field(default=True)
max_input_length: int = Field(default=10000) max_input_length: int = Field(default=10000)
allowed_file_types: list = Field(default=["txt", "md", "pdf", "docx"]) allowed_file_types: list = Field(default=["txt", "md", "pdf", "docx"])
@validator('environment', pre=True) @validator('environment', pre=True)
def set_debug_based_on_env(cls, v): def set_debug_based_on_env(cls, v):
return v return v
@validator('debug') @validator('debug')
def set_debug_from_env(cls, v, values): def set_debug_from_env(cls, v, values):
env = values.get('environment') env = values.get('environment')
if env == Environment.PRODUCTION: if env == Environment.PRODUCTION:
return False return False
return v return v
@validator('openai_api_key') @validator('openai_api_key')
def validate_openai_key(cls, v): def validate_openai_key(cls, v):
if not v: if not v:
@@ -1081,15 +1081,15 @@ class CrewAISettings(BaseSettings):
if not v.startswith('sk-'): if not v.startswith('sk-'):
raise ValueError("Invalid OpenAI API key format") raise ValueError("Invalid OpenAI API key format")
return v return v
@property @property
def is_production(self) -> bool: def is_production(self) -> bool:
return self.environment == Environment.PRODUCTION return self.environment == Environment.PRODUCTION
@property @property
def is_development(self) -> bool: def is_development(self) -> bool:
return self.environment == Environment.DEVELOPMENT return self.environment == Environment.DEVELOPMENT
def get_llm_config(self) -> Dict[str, Any]: def get_llm_config(self) -> Dict[str, Any]:
"""Get LLM configuration based on environment""" """Get LLM configuration based on environment"""
config = { config = {
@@ -1098,12 +1098,12 @@ class CrewAISettings(BaseSettings):
"max_tokens": 4000 if self.is_production else 2000, "max_tokens": 4000 if self.is_production else 2000,
"timeout": 60 "timeout": 60
} }
if self.is_development: if self.is_development:
config["model"] = self.fallback_llm_model config["model"] = self.fallback_llm_model
return config return config
def get_memory_config(self) -> Dict[str, Any]: def get_memory_config(self) -> Dict[str, Any]:
"""Get memory configuration""" """Get memory configuration"""
return { return {
@@ -1112,7 +1112,7 @@ class CrewAISettings(BaseSettings):
"cleanup_interval": self.memory_cleanup_interval, "cleanup_interval": self.memory_cleanup_interval,
"provider": "mem0" if self.mem0_api_key and self.is_production else "local" "provider": "mem0" if self.mem0_api_key and self.is_production else "local"
} }
class Config: class Config:
env_file = ".env" env_file = ".env"
env_file_encoding = 'utf-8' env_file_encoding = 'utf-8'
@@ -1125,25 +1125,25 @@ settings = CrewAISettings()
@CrewBase @CrewBase
class ConfigurableCrew(): class ConfigurableCrew():
"""Crew that uses centralized configuration""" """Crew that uses centralized configuration"""
def __init__(self): def __init__(self):
self.settings = settings self.settings = settings
self.validate_configuration() self.validate_configuration()
def validate_configuration(self): def validate_configuration(self):
"""Validate configuration before crew execution""" """Validate configuration before crew execution"""
required_keys = [self.settings.openai_api_key] required_keys = [self.settings.openai_api_key]
if not all(required_keys): if not all(required_keys):
raise ConfigurationError("Missing required API keys") raise ConfigurationError("Missing required API keys")
if not os.path.exists(self.settings.crewai_storage_dir): if not os.path.exists(self.settings.crewai_storage_dir):
os.makedirs(self.settings.crewai_storage_dir, exist_ok=True) os.makedirs(self.settings.crewai_storage_dir, exist_ok=True)
@agent @agent
def adaptive_agent(self) -> Agent: def adaptive_agent(self) -> Agent:
"""Agent that adapts to configuration""" """Agent that adapts to configuration"""
llm_config = self.settings.get_llm_config() llm_config = self.settings.get_llm_config()
return Agent( return Agent(
config=self.agents_config['researcher'], config=self.agents_config['researcher'],
llm=llm_config["model"], llm=llm_config["model"],
@@ -1163,7 +1163,7 @@ from crewai.tasks.task_output import TaskOutput
class CrewAITestFramework: class CrewAITestFramework:
"""Comprehensive testing framework for CrewAI applications""" """Comprehensive testing framework for CrewAI applications"""
@staticmethod @staticmethod
def create_mock_agent(role: str = "test_agent", tools: list = None) -> Mock: def create_mock_agent(role: str = "test_agent", tools: list = None) -> Mock:
"""Create a mock agent for testing""" """Create a mock agent for testing"""
@@ -1175,9 +1175,9 @@ class CrewAITestFramework:
mock_agent.llm = "gpt-3.5-turbo" mock_agent.llm = "gpt-3.5-turbo"
mock_agent.verbose = False mock_agent.verbose = False
return mock_agent return mock_agent
@staticmethod @staticmethod
def create_mock_task_output(content: str, success: bool = True, def create_mock_task_output(content: str, success: bool = True,
tokens: int = 100) -> TaskOutput: tokens: int = 100) -> TaskOutput:
"""Create a mock task output for testing""" """Create a mock task output for testing"""
return TaskOutput( return TaskOutput(
@@ -1187,13 +1187,13 @@ class CrewAITestFramework:
pydantic=None, pydantic=None,
json_dict=None json_dict=None
) )
@staticmethod @staticmethod
def create_test_crew(agents: list = None, tasks: list = None) -> Crew: def create_test_crew(agents: list = None, tasks: list = None) -> Crew:
"""Create a test crew with mock components""" """Create a test crew with mock components"""
test_agents = agents or [CrewAITestFramework.create_mock_agent()] test_agents = agents or [CrewAITestFramework.create_mock_agent()]
test_tasks = tasks or [] test_tasks = tasks or []
return Crew( return Crew(
agents=test_agents, agents=test_agents,
tasks=test_tasks, tasks=test_tasks,
@@ -1203,53 +1203,53 @@ class CrewAITestFramework:
# Example test cases # Example test cases
class TestResearchCrew: class TestResearchCrew:
"""Test cases for research crew functionality""" """Test cases for research crew functionality"""
def setup_method(self): def setup_method(self):
"""Setup test environment""" """Setup test environment"""
self.framework = CrewAITestFramework() self.framework = CrewAITestFramework()
self.mock_serper = Mock() self.mock_serper = Mock()
@patch('crewai_tools.SerperDevTool') @patch('crewai_tools.SerperDevTool')
def test_agent_creation(self, mock_serper_tool): def test_agent_creation(self, mock_serper_tool):
"""Test agent creation with proper configuration""" """Test agent creation with proper configuration"""
mock_serper_tool.return_value = self.mock_serper mock_serper_tool.return_value = self.mock_serper
crew = ResearchCrew() crew = ResearchCrew()
researcher = crew.researcher() researcher = crew.researcher()
assert researcher.role == "Senior Research Analyst" assert researcher.role == "Senior Research Analyst"
assert len(researcher.tools) > 0 assert len(researcher.tools) > 0
assert researcher.verbose is True assert researcher.verbose is True
def test_task_validation(self): def test_task_validation(self):
"""Test task validation logic""" """Test task validation logic"""
crew = ResearchCrew() crew = ResearchCrew()
# Test valid output # Test valid output
valid_output = self.framework.create_mock_task_output( valid_output = self.framework.create_mock_task_output(
"This is a comprehensive research summary with conclusions and findings." "This is a comprehensive research summary with conclusions and findings."
) )
is_valid, message = crew.validate_research_quality(valid_output) is_valid, message = crew.validate_research_quality(valid_output)
assert is_valid is True assert is_valid is True
# Test invalid output (too short) # Test invalid output (too short)
invalid_output = self.framework.create_mock_task_output("Too short") invalid_output = self.framework.create_mock_task_output("Too short")
is_valid, message = crew.validate_research_quality(invalid_output) is_valid, message = crew.validate_research_quality(invalid_output)
assert is_valid is False assert is_valid is False
assert "brief" in message.lower() assert "brief" in message.lower()
@patch('requests.get') @patch('requests.get')
def test_tool_error_handling(self, mock_requests): def test_tool_error_handling(self, mock_requests):
"""Test tool error handling and recovery""" """Test tool error handling and recovery"""
# Simulate network error # Simulate network error
mock_requests.side_effect = requests.exceptions.RequestException("Network error") mock_requests.side_effect = requests.exceptions.RequestException("Network error")
tool = RobustSearchTool() tool = RobustSearchTool()
result = tool._run("test query") result = tool._run("test query")
assert "network error" in result.lower() assert "network error" in result.lower()
assert "failed" in result.lower() assert "failed" in result.lower()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_crew_execution_flow(self): async def test_crew_execution_flow(self):
"""Test complete crew execution with mocked dependencies""" """Test complete crew execution with mocked dependencies"""
@@ -1257,18 +1257,18 @@ class TestResearchCrew:
mock_execute.return_value = self.framework.create_mock_task_output( mock_execute.return_value = self.framework.create_mock_task_output(
"Research completed successfully with findings and recommendations." "Research completed successfully with findings and recommendations."
) )
crew = ResearchCrew() crew = ResearchCrew()
result = crew.crew().kickoff(inputs={"topic": "AI testing"}) result = crew.crew().kickoff(inputs={"topic": "AI testing"})
assert result is not None assert result is not None
assert "successfully" in result.raw.lower() assert "successfully" in result.raw.lower()
def test_memory_integration(self): def test_memory_integration(self):
"""Test memory system integration""" """Test memory system integration"""
crew = ResearchCrew() crew = ResearchCrew()
memory_manager = AdvancedMemoryManager(crew) memory_manager = AdvancedMemoryManager(crew)
# Test saving to memory # Test saving to memory
test_content = "Important research finding about AI" test_content = "Important research finding about AI"
memory_manager.save_with_context( memory_manager.save_with_context(
@@ -1277,34 +1277,34 @@ class TestResearchCrew:
metadata={"importance": "high"}, metadata={"importance": "high"},
agent="researcher" agent="researcher"
) )
# Test searching memory # Test searching memory
results = memory_manager.search_across_memories("AI research") results = memory_manager.search_across_memories("AI research")
assert "short_term" in results assert "short_term" in results
def test_error_handling_workflow(self): def test_error_handling_workflow(self):
"""Test error handling and recovery mechanisms""" """Test error handling and recovery mechanisms"""
error_handler = ErrorHandler("test_crew") error_handler = ErrorHandler("test_crew")
# Test error registration and handling # Test error registration and handling
test_error = TaskExecutionError("Test task failed", ErrorSeverity.MEDIUM) test_error = TaskExecutionError("Test task failed", ErrorSeverity.MEDIUM)
result = error_handler.handle_error(test_error) result = error_handler.handle_error(test_error)
assert len(error_handler.error_log) == 1 assert len(error_handler.error_log) == 1
assert error_handler.error_log[0].severity == ErrorSeverity.MEDIUM assert error_handler.error_log[0].severity == ErrorSeverity.MEDIUM
def test_configuration_validation(self): def test_configuration_validation(self):
"""Test configuration validation""" """Test configuration validation"""
# Test with missing API key # Test with missing API key
with patch.dict(os.environ, {}, clear=True): with patch.dict(os.environ, {}, clear=True):
with pytest.raises(ValueError): with pytest.raises(ValueError):
settings = CrewAISettings() settings = CrewAISettings()
# Test with valid configuration # Test with valid configuration
with patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key"}): with patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key"}):
settings = CrewAISettings() settings = CrewAISettings()
assert settings.openai_api_key == "sk-test-key" assert settings.openai_api_key == "sk-test-key"
@pytest.mark.integration @pytest.mark.integration
def test_end_to_end_workflow(self): def test_end_to_end_workflow(self):
"""Integration test for complete workflow""" """Integration test for complete workflow"""
@@ -1315,41 +1315,41 @@ class TestResearchCrew:
# Performance testing # Performance testing
class TestCrewPerformance: class TestCrewPerformance:
"""Performance tests for CrewAI applications""" """Performance tests for CrewAI applications"""
def test_memory_usage(self): def test_memory_usage(self):
"""Test memory usage during crew execution""" """Test memory usage during crew execution"""
import psutil import psutil
import gc import gc
process = psutil.Process() process = psutil.Process()
initial_memory = process.memory_info().rss initial_memory = process.memory_info().rss
# Create and run crew multiple times # Create and run crew multiple times
for i in range(10): for i in range(10):
crew = ResearchCrew() crew = ResearchCrew()
# Simulate crew execution # Simulate crew execution
del crew del crew
gc.collect() gc.collect()
final_memory = process.memory_info().rss final_memory = process.memory_info().rss
memory_increase = final_memory - initial_memory memory_increase = final_memory - initial_memory
# Assert memory increase is reasonable (less than 100MB) # Assert memory increase is reasonable (less than 100MB)
assert memory_increase < 100 * 1024 * 1024 assert memory_increase < 100 * 1024 * 1024
def test_concurrent_execution(self): def test_concurrent_execution(self):
"""Test concurrent crew execution""" """Test concurrent crew execution"""
import concurrent.futures import concurrent.futures
def run_crew(crew_id): def run_crew(crew_id):
crew = ResearchCrew() crew = ResearchCrew()
# Simulate execution # Simulate execution
return f"crew_{crew_id}_completed" return f"crew_{crew_id}_completed"
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
futures = [executor.submit(run_crew, i) for i in range(5)] futures = [executor.submit(run_crew, i) for i in range(5)]
results = [future.result() for future in futures] results = [future.result() for future in futures]
assert len(results) == 5 assert len(results) == 5
assert all("completed" in result for result in results) assert all("completed" in result for result in results)
@@ -1400,7 +1400,7 @@ class TestCrewPerformance:
### Development: ### Development:
1. Always use .env files for sensitive configuration 1. Always use .env files for sensitive configuration
2. Implement comprehensive error handling and logging 2. Implement comprehensive error handling and logging
3. Use structured outputs with Pydantic for reliability 3. Use structured outputs with Pydantic for reliability
4. Test crew functionality with different input scenarios 4. Test crew functionality with different input scenarios
5. Follow CrewAI patterns and conventions consistently 5. Follow CrewAI patterns and conventions consistently
@@ -1426,4 +1426,4 @@ class TestCrewPerformance:
5. Use async patterns for I/O-bound operations 5. Use async patterns for I/O-bound operations
6. Implement proper connection pooling and resource management 6. Implement proper connection pooling and resource management
7. Profile and optimize critical paths 7. Profile and optimize critical paths
8. Plan for horizontal scaling when needed 8. Plan for horizontal scaling when needed

View File

@@ -7,18 +7,14 @@ permissions:
env: env:
OPENAI_API_KEY: fake-api-key OPENAI_API_KEY: fake-api-key
PYTHONUNBUFFERED: 1
jobs: jobs:
tests: tests:
name: tests (${{ matrix.python-version }})
runs-on: ubuntu-latest runs-on: ubuntu-latest
timeout-minutes: 15 timeout-minutes: 15
strategy: strategy:
fail-fast: true
matrix: matrix:
python-version: ['3.10', '3.11', '3.12', '3.13'] python-version: ['3.10', '3.11', '3.12', '3.13']
group: [1, 2, 3, 4, 5, 6, 7, 8]
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -27,9 +23,6 @@ jobs:
uses: astral-sh/setup-uv@v3 uses: astral-sh/setup-uv@v3
with: with:
enable-cache: true enable-cache: true
cache-dependency-glob: |
**/pyproject.toml
**/uv.lock
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }}
@@ -37,30 +30,5 @@ jobs:
- name: Install the project - name: Install the project
run: uv sync --dev --all-extras run: uv sync --dev --all-extras
- name: Install SQLite with FTS5 support - name: Run tests
run: | run: uv run pytest --block-network --timeout=60 -vv
# WORKAROUND: GitHub Actions' Ubuntu runner uses SQLite without FTS5 support compiled in.
# This is a temporary fix until the runner includes SQLite with FTS5 or Python's sqlite3
# module is compiled with FTS5 support by default.
# TODO: Remove this workaround once GitHub Actions runners include SQLite FTS5 support
# Install pysqlite3-binary which has FTS5 support
uv pip install pysqlite3-binary
# Create a sitecustomize.py to override sqlite3 with pysqlite3
mkdir -p .pytest_sqlite_override
echo "import sys; import pysqlite3; sys.modules['sqlite3'] = pysqlite3" > .pytest_sqlite_override/sitecustomize.py
# Test FTS5 availability
PYTHONPATH=.pytest_sqlite_override uv run python -c "import sqlite3; print(f'SQLite version: {sqlite3.sqlite_version}')"
PYTHONPATH=.pytest_sqlite_override uv run python -c "import sqlite3; conn = sqlite3.connect(':memory:'); conn.execute('CREATE VIRTUAL TABLE test USING fts5(content)'); print('FTS5 module available')"
- name: Run tests (group ${{ matrix.group }} of 8)
run: |
PYTHONPATH=.pytest_sqlite_override uv run pytest \
--block-network \
--timeout=30 \
-vv \
--splits 8 \
--group ${{ matrix.group }} \
--durations=10 \
-n auto \
--maxfail=3

View File

@@ -94,7 +94,7 @@
"pages": [ "pages": [
"en/guides/advanced/customizing-prompts", "en/guides/advanced/customizing-prompts",
"en/guides/advanced/fingerprinting" "en/guides/advanced/fingerprinting"
] ]
} }
] ]
@@ -296,8 +296,7 @@
"en/enterprise/features/webhook-streaming", "en/enterprise/features/webhook-streaming",
"en/enterprise/features/traces", "en/enterprise/features/traces",
"en/enterprise/features/hallucination-guardrail", "en/enterprise/features/hallucination-guardrail",
"en/enterprise/features/integrations", "en/enterprise/features/integrations"
"en/enterprise/features/agent-repositories"
] ]
}, },
{ {
@@ -374,7 +373,7 @@
} }
] ]
} }
] ]
}, },
{ {
@@ -731,7 +730,7 @@
} }
] ]
} }
] ]
} }
] ]
@@ -775,7 +774,7 @@
"destination": "/en/introduction" "destination": "/en/introduction"
}, },
{ {
"source": "/installation", "source": "/installation",
"destination": "/en/installation" "destination": "/en/installation"
}, },
{ {

View File

@@ -526,103 +526,6 @@ agent = Agent(
The context window management feature works automatically in the background. You don't need to call any special functions - just set `respect_context_window` to your preferred behavior and CrewAI handles the rest! The context window management feature works automatically in the background. You don't need to call any special functions - just set `respect_context_window` to your preferred behavior and CrewAI handles the rest!
</Note> </Note>
## Direct Agent Interaction with `kickoff()`
Agents can be used directly without going through a task or crew workflow using the `kickoff()` method. This provides a simpler way to interact with an agent when you don't need the full crew orchestration capabilities.
### How `kickoff()` Works
The `kickoff()` method allows you to send messages directly to an agent and get a response, similar to how you would interact with an LLM but with all the agent's capabilities (tools, reasoning, etc.).
```python Code
from crewai import Agent
from crewai_tools import SerperDevTool
# Create an agent
researcher = Agent(
role="AI Technology Researcher",
goal="Research the latest AI developments",
tools=[SerperDevTool()],
verbose=True
)
# Use kickoff() to interact directly with the agent
result = researcher.kickoff("What are the latest developments in language models?")
# Access the raw response
print(result.raw)
```
### Parameters and Return Values
| Parameter | Type | Description |
| :---------------- | :---------------------------------- | :------------------------------------------------------------------------ |
| `messages` | `Union[str, List[Dict[str, str]]]` | Either a string query or a list of message dictionaries with role/content |
| `response_format` | `Optional[Type[Any]]` | Optional Pydantic model for structured output |
The method returns a `LiteAgentOutput` object with the following properties:
- `raw`: String containing the raw output text
- `pydantic`: Parsed Pydantic model (if a `response_format` was provided)
- `agent_role`: Role of the agent that produced the output
- `usage_metrics`: Token usage metrics for the execution
### Structured Output
You can get structured output by providing a Pydantic model as the `response_format`:
```python Code
from pydantic import BaseModel
from typing import List
class ResearchFindings(BaseModel):
main_points: List[str]
key_technologies: List[str]
future_predictions: str
# Get structured output
result = researcher.kickoff(
"Summarize the latest developments in AI for 2025",
response_format=ResearchFindings
)
# Access structured data
print(result.pydantic.main_points)
print(result.pydantic.future_predictions)
```
### Multiple Messages
You can also provide a conversation history as a list of message dictionaries:
```python Code
messages = [
{"role": "user", "content": "I need information about large language models"},
{"role": "assistant", "content": "I'd be happy to help with that! What specifically would you like to know?"},
{"role": "user", "content": "What are the latest developments in 2025?"}
]
result = researcher.kickoff(messages)
```
### Async Support
An asynchronous version is available via `kickoff_async()` with the same parameters:
```python Code
import asyncio
async def main():
result = await researcher.kickoff_async("What are the latest developments in AI?")
print(result.raw)
asyncio.run(main())
```
<Note>
The `kickoff()` method uses a `LiteAgent` internally, which provides a simpler execution flow while preserving all of the agent's configuration (role, goal, backstory, tools, etc.).
</Note>
## Important Considerations and Best Practices ## Important Considerations and Best Practices
### Security and Code Execution ### Security and Code Execution

View File

@@ -4,8 +4,6 @@ description: Learn how to use the CrewAI CLI to interact with CrewAI.
icon: terminal icon: terminal
--- ---
<Warning>Since release 0.140.0, CrewAI Enterprise started a process of migrating their login provider. As such, the authentication flow via CLI was updated. Users that use Google to login, or that created their account after July 3rd, 2025 will be unable to log in with older versions of the `crewai` library.</Warning>
## Overview ## Overview
The CrewAI CLI provides a set of commands to interact with CrewAI, allowing you to create, train, run, and manage crews & flows. The CrewAI CLI provides a set of commands to interact with CrewAI, allowing you to create, train, run, and manage crews & flows.
@@ -188,7 +186,10 @@ def crew(self) -> Crew:
Deploy the crew or flow to [CrewAI Enterprise](https://app.crewai.com). Deploy the crew or flow to [CrewAI Enterprise](https://app.crewai.com).
- **Authentication**: You need to be authenticated to deploy to CrewAI Enterprise. - **Authentication**: You need to be authenticated to deploy to CrewAI Enterprise.
You can login or create an account with: ```shell Terminal
crewai signup
```
If you already have an account, you can login with:
```shell Terminal ```shell Terminal
crewai login crewai login
``` ```

View File

@@ -1,155 +0,0 @@
---
title: 'Agent Repositories'
description: 'Learn how to use Agent Repositories to share and reuse your agents across teams and projects'
icon: 'database'
---
Agent Repositories allow enterprise users to store, share, and reuse agent definitions across teams and projects. This feature enables organizations to maintain a centralized library of standardized agents, promoting consistency and reducing duplication of effort.
## Benefits of Agent Repositories
- **Standardization**: Maintain consistent agent definitions across your organization
- **Reusability**: Create an agent once and use it in multiple crews and projects
- **Governance**: Implement organization-wide policies for agent configurations
- **Collaboration**: Enable teams to share and build upon each other's work
## Using Agent Repositories
### Prerequisites
1. You must have an account at CrewAI, try the [free plan](https://app.crewai.com).
2. You need to be authenticated using the CrewAI CLI.
3. If you have more than one organization, make sure you are switched to the correct organization using the CLI command:
```bash
crewai org switch <org_id>
```
### Creating and Managing Agents in Repositories
To create and manage agents in repositories,Enterprise Dashboard.
### Loading Agents from Repositories
You can load agents from repositories in your code using the `from_repository` parameter:
```python
from crewai import Agent
# Create an agent by loading it from a repository
# The agent is loaded with all its predefined configurations
researcher = Agent(
from_repository="market-research-agent"
)
```
### Overriding Repository Settings
You can override specific settings from the repository by providing them in the configuration:
```python
researcher = Agent(
from_repository="market-research-agent",
goal="Research the latest trends in AI development", # Override the repository goal
verbose=True # Add a setting not in the repository
)
```
### Example: Creating a Crew with Repository Agents
```python
from crewai import Crew, Agent, Task
# Load agents from repositories
researcher = Agent(
from_repository="market-research-agent"
)
writer = Agent(
from_repository="content-writer-agent"
)
# Create tasks
research_task = Task(
description="Research the latest trends in AI",
agent=researcher
)
writing_task = Task(
description="Write a comprehensive report based on the research",
agent=writer
)
# Create the crew
crew = Crew(
agents=[researcher, writer],
tasks=[research_task, writing_task],
verbose=True
)
# Run the crew
result = crew.kickoff()
```
### Example: Using `kickoff()` with Repository Agents
You can also use repository agents directly with the `kickoff()` method for simpler interactions:
```python
from crewai import Agent
from pydantic import BaseModel
from typing import List
# Define a structured output format
class MarketAnalysis(BaseModel):
key_trends: List[str]
opportunities: List[str]
recommendation: str
# Load an agent from repository
analyst = Agent(
from_repository="market-analyst-agent",
verbose=True
)
# Get a free-form response
result = analyst.kickoff("Analyze the AI market in 2025")
print(result.raw) # Access the raw response
# Get structured output
structured_result = analyst.kickoff(
"Provide a structured analysis of the AI market in 2025",
response_format=MarketAnalysis
)
# Access structured data
print(f"Key Trends: {structured_result.pydantic.key_trends}")
print(f"Recommendation: {structured_result.pydantic.recommendation}")
```
## Best Practices
1. **Naming Convention**: Use clear, descriptive names for your repository agents
2. **Documentation**: Include comprehensive descriptions for each agent
3. **Tool Management**: Ensure that tools referenced by repository agents are available in your environment
4. **Access Control**: Manage permissions to ensure only authorized team members can modify repository agents
## Organization Management
To switch between organizations or see your current organization, use the CrewAI CLI:
```bash
# View current organization
crewai org current
# Switch to a different organization
crewai org switch <org_id>
# List all available organizations
crewai org list
```
<Note>
When loading agents from repositories, you must be authenticated and switched to the correct organization. If you receive errors, check your authentication status and organization settings using the CLI commands above.
</Note>

View File

@@ -41,8 +41,11 @@ The CLI provides the fastest way to deploy locally developed crews to the Enterp
First, you need to authenticate your CLI with the CrewAI Enterprise platform: First, you need to authenticate your CLI with the CrewAI Enterprise platform:
```bash ```bash
# If you already have a CrewAI Enterprise account, or want to create one: # If you already have a CrewAI Enterprise account
crewai login crewai login
# If you're creating a new account
crewai signup
``` ```
When you run either command, the CLI will: When you run either command, the CLI will:

View File

@@ -149,33 +149,34 @@ from crewai_tools import SerperDevTool
# Crie um agente com todos os parâmetros disponíveis # Crie um agente com todos os parâmetros disponíveis
agent = Agent( agent = Agent(
role="Cientista de Dados Sênior", role="Senior Data Scientist",
goal="Analisar e interpretar conjuntos de dados complexos para fornecer insights acionáveis", goal="Analyze and interpret complex datasets to provide actionable insights",
backstory="Com mais de 10 anos de experiência em ciência de dados e aprendizado de máquina, você é especialista em encontrar padrões em grandes volumes de dados.", backstory="With over 10 years of experience in data science and machine learning, "
llm="gpt-4", # Padrão: OPENAI_MODEL_NAME ou "gpt-4" "you excel at finding patterns in complex datasets.",
function_calling_llm=None, # Opcional: LLM separado para chamadas de ferramentas llm="gpt-4", # Default: OPENAI_MODEL_NAME or "gpt-4"
verbose=False, # Padrão: False function_calling_llm=None, # Optional: Separate LLM for tool calling
allow_delegation=False, # Padrão: False verbose=False, # Default: False
max_iter=20, # Padrão: 20 iterações allow_delegation=False, # Default: False
max_rpm=None, # Opcional: Limite de requisições por minuto max_iter=20, # Default: 20 iterations
max_execution_time=None, # Opcional: Tempo máximo de execução em segundos max_rpm=None, # Optional: Rate limit for API calls
max_retry_limit=2, # Padrão: 2 tentativas em caso de erro max_execution_time=None, # Optional: Maximum execution time in seconds
allow_code_execution=False, # Padrão: False max_retry_limit=2, # Default: 2 retries on error
code_execution_mode="safe", # Padrão: "safe" (opções: "safe", "unsafe") allow_code_execution=False, # Default: False
respect_context_window=True, # Padrão: True code_execution_mode="safe", # Default: "safe" (options: "safe", "unsafe")
use_system_prompt=True, # Padrão: True respect_context_window=True, # Default: True
multimodal=False, # Padrão: False use_system_prompt=True, # Default: True
inject_date=False, # Padrão: False multimodal=False, # Default: False
date_format="%Y-%m-%d", # Padrão: formato ISO inject_date=False, # Default: False
reasoning=False, # Padrão: False date_format="%Y-%m-%d", # Default: ISO format
max_reasoning_attempts=None, # Padrão: None reasoning=False, # Default: False
tools=[SerperDevTool()], # Opcional: Lista de ferramentas max_reasoning_attempts=None, # Default: None
knowledge_sources=None, # Opcional: Lista de fontes de conhecimento tools=[SerperDevTool()], # Optional: List of tools
embedder=None, # Opcional: Configuração de embedder customizado knowledge_sources=None, # Optional: List of knowledge sources
system_template=None, # Opcional: Template de prompt de sistema embedder=None, # Optional: Custom embedder configuration
prompt_template=None, # Opcional: Template de prompt customizado system_template=None, # Optional: Custom system prompt template
response_template=None, # Opcional: Template de resposta customizado prompt_template=None, # Optional: Custom prompt template
step_callback=None, # Opcional: Função de callback para monitoramento response_template=None, # Optional: Custom response template
step_callback=None, # Optional: Callback function for monitoring
) )
``` ```
@@ -184,62 +185,65 @@ Vamos detalhar algumas combinações de parâmetros-chave para casos de uso comu
#### Agente de Pesquisa Básico #### Agente de Pesquisa Básico
```python Code ```python Code
research_agent = Agent( research_agent = Agent(
role="Analista de Pesquisa", role="Research Analyst",
goal="Encontrar e resumir informações sobre tópicos específicos", goal="Find and summarize information about specific topics",
backstory="Você é um pesquisador experiente com atenção aos detalhes", backstory="You are an experienced researcher with attention to detail",
tools=[SerperDevTool()], tools=[SerperDevTool()],
verbose=True # Ativa logs para depuração verbose=True # Enable logging for debugging
) )
``` ```
#### Agente de Desenvolvimento de Código #### Agente de Desenvolvimento de Código
```python Code ```python Code
dev_agent = Agent( dev_agent = Agent(
role="Desenvolvedor Python Sênior", role="Senior Python Developer",
goal="Escrever e depurar códigos Python", goal="Write and debug Python code",
backstory="Desenvolvedor Python especialista com 10 anos de experiência", backstory="Expert Python developer with 10 years of experience",
allow_code_execution=True, allow_code_execution=True,
code_execution_mode="safe", # Usa Docker para segurança code_execution_mode="safe", # Uses Docker for safety
max_execution_time=300, # Limite de 5 minutos max_execution_time=300, # 5-minute timeout
max_retry_limit=3 # Mais tentativas para tarefas complexas max_retry_limit=3 # More retries for complex code tasks
) )
``` ```
#### Agente de Análise de Longa Duração #### Agente de Análise de Longa Duração
```python Code ```python Code
analysis_agent = Agent( analysis_agent = Agent(
role="Analista de Dados", role="Data Analyst",
goal="Realizar análise aprofundada de grandes conjuntos de dados", goal="Perform deep analysis of large datasets",
backstory="Especialista em análise de big data e reconhecimento de padrões", backstory="Specialized in big data analysis and pattern recognition",
memory=True, memory=True,
respect_context_window=True, respect_context_window=True,
max_rpm=10, # Limite de requisições por minuto max_rpm=10, # Limit API calls
function_calling_llm="gpt-4o-mini" # Modelo mais econômico para chamadas de ferramentas function_calling_llm="gpt-4o-mini" # Cheaper model for tool calls
) )
``` ```
#### Agente com Template Personalizado #### Agente com Template Personalizado
```python Code ```python Code
custom_agent = Agent( custom_agent = Agent(
role="Atendente de Suporte ao Cliente", role="Customer Service Representative",
goal="Auxiliar clientes com suas dúvidas e solicitações", goal="Assist customers with their inquiries",
backstory="Experiente em atendimento ao cliente com foco em satisfação", backstory="Experienced in customer support with a focus on satisfaction",
system_template="""<|start_header_id|>system<|end_header_id|>\n {{ .System }}<|eot_id|>""", system_template="""<|start_header_id|>system<|end_header_id|>
prompt_template="""<|start_header_id|>user<|end_header_id|>\n {{ .Prompt }}<|eot_id|>""", {{ .System }}<|eot_id|>""",
response_template="""<|start_header_id|>assistant<|end_header_id|>\n {{ .Response }}<|eot_id|>""", prompt_template="""<|start_header_id|>user<|end_header_id|>
{{ .Prompt }}<|eot_id|>""",
response_template="""<|start_header_id|>assistant<|end_header_id|>
{{ .Response }}<|eot_id|>""",
) )
``` ```
#### Agente Ciente de Data, com Raciocínio #### Agente Ciente de Data, com Raciocínio
```python Code ```python Code
strategic_agent = Agent( strategic_agent = Agent(
role="Analista de Mercado", role="Market Analyst",
goal="Acompanhar movimentos do mercado com referências de datas precisas e planejamento estratégico", goal="Track market movements with precise date references and strategic planning",
backstory="Especialista em análise financeira sensível ao tempo e relatórios estratégicos", backstory="Expert in time-sensitive financial analysis and strategic reporting",
inject_date=True, # Injeta automaticamente a data atual nas tarefas inject_date=True, # Automatically inject current date into tasks
date_format="%d de %B de %Y", # Exemplo: "21 de maio de 2025" date_format="%B %d, %Y", # Format as "May 21, 2025"
reasoning=True, # Ativa planejamento estratégico reasoning=True, # Enable strategic planning
max_reasoning_attempts=2, # Limite de iterações de planejamento max_reasoning_attempts=2, # Limit planning iterations
verbose=True verbose=True
) )
``` ```
@@ -247,12 +251,12 @@ strategic_agent = Agent(
#### Agente de Raciocínio #### Agente de Raciocínio
```python Code ```python Code
reasoning_agent = Agent( reasoning_agent = Agent(
role="Planejador Estratégico", role="Strategic Planner",
goal="Analisar problemas complexos e criar planos de execução detalhados", goal="Analyze complex problems and create detailed execution plans",
backstory="Especialista em planejamento estratégico que desmembra desafios complexos metodicamente", backstory="Expert strategic planner who methodically breaks down complex challenges",
reasoning=True, # Ativa raciocínio e planejamento reasoning=True, # Enable reasoning and planning
max_reasoning_attempts=3, # Limite de tentativas de raciocínio max_reasoning_attempts=3, # Limit reasoning attempts
max_iter=30, # Permite mais iterações para planejamento complexo max_iter=30, # Allow more iterations for complex planning
verbose=True verbose=True
) )
``` ```
@@ -260,10 +264,10 @@ reasoning_agent = Agent(
#### Agente Multimodal #### Agente Multimodal
```python Code ```python Code
multimodal_agent = Agent( multimodal_agent = Agent(
role="Analista de Conteúdo Visual", role="Visual Content Analyst",
goal="Analisar e processar tanto conteúdo textual quanto visual", goal="Analyze and process both text and visual content",
backstory="Especialista em análise multimodal combinando compreensão de texto e imagem", backstory="Specialized in multimodal analysis combining text and image understanding",
multimodal=True, # Ativa capacidades multimodais multimodal=True, # Enable multimodal capabilities
verbose=True verbose=True
) )
``` ```
@@ -332,8 +336,8 @@ wiki_tool = WikipediaTools()
# Adicionar ferramentas ao agente # Adicionar ferramentas ao agente
researcher = Agent( researcher = Agent(
role="Pesquisador de Tecnologia em IA", role="AI Technology Researcher",
goal="Pesquisar os últimos avanços em IA", goal="Research the latest AI developments",
tools=[search_tool, wiki_tool], tools=[search_tool, wiki_tool],
verbose=True verbose=True
) )
@@ -347,9 +351,9 @@ Agentes podem manter a memória de suas interações e usar contexto de tarefas
from crewai import Agent from crewai import Agent
analyst = Agent( analyst = Agent(
role="Analista de Dados", role="Data Analyst",
goal="Analisar e memorizar padrões complexos de dados", goal="Analyze and remember complex data patterns",
memory=True, # Ativa memória memory=True, # Enable memory
verbose=True verbose=True
) )
``` ```
@@ -376,10 +380,10 @@ Esta é a **configuração padrão e recomendada** para a maioria dos casos. Qua
```python Code ```python Code
# Agente com gerenciamento automático de contexto (padrão) # Agente com gerenciamento automático de contexto (padrão)
smart_agent = Agent( smart_agent = Agent(
role="Analista de Pesquisa", role="Research Analyst",
goal="Analisar grandes documentos e conjuntos de dados", goal="Analyze large documents and datasets",
backstory="Especialista em processar informações extensas", backstory="Expert at processing extensive information",
respect_context_window=True, # 🔑 Padrão: gerencia limites de contexto automaticamente respect_context_window=True, # 🔑 Default: auto-handle context limits
verbose=True verbose=True
) )
``` ```

View File

@@ -3,7 +3,6 @@ title: CLI
description: Aprenda a usar o CLI do CrewAI para interagir com o CrewAI. description: Aprenda a usar o CLI do CrewAI para interagir com o CrewAI.
icon: terminal icon: terminal
--- ---
<Warning>A partir da versão 0.140.0, a plataforma CrewAI Enterprise iniciou um processo de migração de seu provedor de login. Como resultado, o fluxo de autenticação via CLI foi atualizado. Usuários que utlizam o Google para fazer login, ou que criaram conta após 3 de julho de 2025 não poderão fazer login com versões anteriores da biblioteca `crewai`.</Warning>
## Visão Geral ## Visão Geral
@@ -76,20 +75,6 @@ Exemplo:
crewai train -n 10 -f my_training_data.pkl crewai train -n 10 -f my_training_data.pkl
``` ```
# Exemplo de uso programático do comando train
n_iterations = 2
inputs = {"topic": "Treinamento CrewAI"}
filename = "seu_modelo.pkl"
try:
SuaCrew().crew().train(
n_iterations=n_iterations,
inputs=inputs,
filename=filename
)
except Exception as e:
raise Exception(f"Ocorreu um erro ao treinar a crew: {e}")
### 4. Replay ### 4. Replay
Reexecute a execução do crew a partir de uma tarefa específica. Reexecute a execução do crew a partir de uma tarefa específica.

View File

@@ -15,18 +15,18 @@ from crewai import Agent, Crew, Task
# Enable collaboration for agents # Enable collaboration for agents
researcher = Agent( researcher = Agent(
role="Especialista em Pesquisa", role="Research Specialist",
goal="Realizar pesquisas aprofundadas sobre qualquer tema", goal="Conduct thorough research on any topic",
backstory="Pesquisador especialista com acesso a diversas fontes", backstory="Expert researcher with access to various sources",
allow_delegation=True, # 🔑 Configuração chave para colaboração allow_delegation=True, # 🔑 Key setting for collaboration
verbose=True verbose=True
) )
writer = Agent( writer = Agent(
role="Redator de Conteúdo", role="Content Writer",
goal="Criar conteúdo envolvente com base em pesquisas", goal="Create engaging content based on research",
backstory="Redator habilidoso que transforma pesquisas em conteúdo atraente", backstory="Skilled writer who transforms research into compelling content",
allow_delegation=True, # 🔑 Permite fazer perguntas a outros agentes allow_delegation=True, # 🔑 Enables asking questions to other agents
verbose=True verbose=True
) )
@@ -67,17 +67,19 @@ from crewai import Agent, Crew, Task, Process
# Create collaborative agents # Create collaborative agents
researcher = Agent( researcher = Agent(
role="Especialista em Pesquisa", role="Research Specialist",
goal="Realizar pesquisas aprofundadas sobre qualquer tema", goal="Find accurate, up-to-date information on any topic",
backstory="Pesquisador especialista com acesso a diversas fontes", backstory="""You're a meticulous researcher with expertise in finding
reliable sources and fact-checking information across various domains.""",
allow_delegation=True, allow_delegation=True,
verbose=True verbose=True
) )
writer = Agent( writer = Agent(
role="Redator de Conteúdo", role="Content Writer",
goal="Criar conteúdo envolvente com base em pesquisas", goal="Create engaging, well-structured content",
backstory="Redator habilidoso que transforma pesquisas em conteúdo atraente", backstory="""You're a skilled content writer who excels at transforming
research into compelling, readable content for different audiences.""",
allow_delegation=True, allow_delegation=True,
verbose=True verbose=True
) )
@@ -93,17 +95,17 @@ editor = Agent(
# Create a task that encourages collaboration # Create a task that encourages collaboration
article_task = Task( article_task = Task(
description="""Escreva um artigo abrangente de 1000 palavras sobre 'O Futuro da IA na Saúde'. description="""Write a comprehensive 1000-word article about 'The Future of AI in Healthcare'.
O artigo deve incluir: The article should include:
- Aplicações atuais de IA na saúde - Current AI applications in healthcare
- Tendências e tecnologias emergentes - Emerging trends and technologies
- Desafios potenciais e considerações éticas - Potential challenges and ethical considerations
- Previsões de especialistas para os próximos 5 anos - Expert predictions for the next 5 years
Colabore com seus colegas para garantir precisão e qualidade.""", Collaborate with your teammates to ensure accuracy and quality.""",
expected_output="Um artigo bem pesquisado, envolvente, com 1000 palavras, estrutura adequada e citações", expected_output="A well-researched, engaging 1000-word article with proper structure and citations",
agent=writer # O redator lidera, mas pode delegar pesquisa ao pesquisador agent=writer # Writer leads, but can delegate research to researcher
) )
# Create collaborative crew # Create collaborative crew
@@ -122,37 +124,37 @@ result = crew.kickoff()
### Padrão 1: Pesquisa → Redação → Edição ### Padrão 1: Pesquisa → Redação → Edição
```python ```python
research_task = Task( research_task = Task(
description="Pesquise os últimos avanços em computação quântica", description="Research the latest developments in quantum computing",
expected_output="Resumo abrangente da pesquisa com principais descobertas e fontes", expected_output="Comprehensive research summary with key findings and sources",
agent=researcher agent=researcher
) )
writing_task = Task( writing_task = Task(
description="Escreva um artigo com base nos achados da pesquisa", description="Write an article based on the research findings",
expected_output="Artigo envolvente de 800 palavras sobre computação quântica", expected_output="Engaging 800-word article about quantum computing",
agent=writer, agent=writer,
context=[research_task] # Recebe a saída da pesquisa como contexto context=[research_task] # Gets research output as context
) )
editing_task = Task( editing_task = Task(
description="Edite e revise o artigo para publicação", description="Edit and polish the article for publication",
expected_output="Artigo pronto para publicação, com clareza e fluidez aprimoradas", expected_output="Publication-ready article with improved clarity and flow",
agent=editor, agent=editor,
context=[writing_task] # Recebe o rascunho do artigo como contexto context=[writing_task] # Gets article draft as context
) )
``` ```
### Padrão 2: Tarefa Única Colaborativa ### Padrão 2: Tarefa Única Colaborativa
```python ```python
collaborative_task = Task( collaborative_task = Task(
description="""Crie uma estratégia de marketing para um novo produto de IA. description="""Create a marketing strategy for a new AI product.
Redator: Foque em mensagens e estratégia de conteúdo Writer: Focus on messaging and content strategy
Pesquisador: Forneça análise de mercado e insights de concorrentes Researcher: Provide market analysis and competitor insights
Trabalhem juntos para criar uma estratégia abrangente.""", Work together to create a comprehensive strategy.""",
expected_output="Estratégia de marketing completa com embasamento em pesquisa", expected_output="Complete marketing strategy with research backing",
agent=writer # Agente líder, mas pode delegar ao pesquisador agent=writer # Lead agent, but can delegate to researcher
) )
``` ```
@@ -165,35 +167,35 @@ from crewai import Agent, Crew, Task, Process
# Manager agent coordinates the team # Manager agent coordinates the team
manager = Agent( manager = Agent(
role="Gerente de Projetos", role="Project Manager",
goal="Coordenar esforços da equipe e garantir o sucesso do projeto", goal="Coordinate team efforts and ensure project success",
backstory="Gerente de projetos experiente, habilidoso em delegação e controle de qualidade", backstory="Experienced project manager skilled at delegation and quality control",
allow_delegation=True, allow_delegation=True,
verbose=True verbose=True
) )
# Specialist agents # Specialist agents
researcher = Agent( researcher = Agent(
role="Pesquisador", role="Researcher",
goal="Fornecer pesquisa e análise precisas", goal="Provide accurate research and analysis",
backstory="Pesquisador especialista com habilidades analíticas profundas", backstory="Expert researcher with deep analytical skills",
allow_delegation=False, # Especialistas focam em sua expertise allow_delegation=False, # Specialists focus on their expertise
verbose=True verbose=True
) )
writer = Agent( writer = Agent(
role="Redator", role="Writer",
goal="Criar conteúdo envolvente", goal="Create compelling content",
backstory="Redator habilidoso que cria conteúdo atraente", backstory="Skilled writer who creates engaging content",
allow_delegation=False, allow_delegation=False,
verbose=True verbose=True
) )
# Manager-led task # Manager-led task
project_task = Task( project_task = Task(
description="Crie um relatório de análise de mercado completo com recomendações", description="Create a comprehensive market analysis report with recommendations",
expected_output="Resumo executivo, análise detalhada e recomendações estratégicas", expected_output="Executive summary, detailed analysis, and strategic recommendations",
agent=manager # O gerente delega para especialistas agent=manager # Manager will delegate to specialists
) )
# Hierarchical crew # Hierarchical crew

View File

@@ -153,32 +153,32 @@ from crewai_tools import YourCustomTool
class YourCrewName: class YourCrewName:
def agent_one(self) -> Agent: def agent_one(self) -> Agent:
return Agent( return Agent(
role="Analista de Dados", role="Data Analyst",
goal="Analisar tendências de dados no mercado brasileiro", goal="Analyze data trends in the market",
backstory="Analista experiente com formação em economia", backstory="An experienced data analyst with a background in economics",
verbose=True, verbose=True,
tools=[YourCustomTool()] tools=[YourCustomTool()]
) )
def agent_two(self) -> Agent: def agent_two(self) -> Agent:
return Agent( return Agent(
role="Pesquisador de Mercado", role="Market Researcher",
goal="Coletar informações sobre a dinâmica do mercado nacional", goal="Gather information on market dynamics",
backstory="Pesquisador dedicado com olhar atento aos detalhes", backstory="A diligent researcher with a keen eye for detail",
verbose=True verbose=True
) )
def task_one(self) -> Task: def task_one(self) -> Task:
return Task( return Task(
description="Coletar dados recentes do mercado brasileiro e identificar tendências.", description="Collect recent market data and identify trends.",
expected_output="Um relatório resumido com as principais tendências do mercado.", expected_output="A report summarizing key trends in the market.",
agent=self.agent_one() agent=self.agent_one()
) )
def task_two(self) -> Task: def task_two(self) -> Task:
return Task( return Task(
description="Pesquisar fatores que afetam a dinâmica do mercado nacional.", description="Research factors affecting market dynamics.",
expected_output="Uma análise dos fatores que influenciam o mercado.", expected_output="An analysis of factors influencing the market.",
agent=self.agent_two() agent=self.agent_two()
) )

View File

@@ -51,24 +51,24 @@ from crewai.utilities.events import (
) )
from crewai.utilities.events.base_event_listener import BaseEventListener from crewai.utilities.events.base_event_listener import BaseEventListener
class MeuListenerPersonalizado(BaseEventListener): class MyCustomListener(BaseEventListener):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
def setup_listeners(self, crewai_event_bus): def setup_listeners(self, crewai_event_bus):
@crewai_event_bus.on(CrewKickoffStartedEvent) @crewai_event_bus.on(CrewKickoffStartedEvent)
def ao_iniciar_crew(source, event): def on_crew_started(source, event):
print(f"Crew '{event.crew_name}' iniciou a execução!") print(f"Crew '{event.crew_name}' has started execution!")
@crewai_event_bus.on(CrewKickoffCompletedEvent) @crewai_event_bus.on(CrewKickoffCompletedEvent)
def ao_finalizar_crew(source, event): def on_crew_completed(source, event):
print(f"Crew '{event.crew_name}' finalizou a execução!") print(f"Crew '{event.crew_name}' has completed execution!")
print(f"Saída: {event.output}") print(f"Output: {event.output}")
@crewai_event_bus.on(AgentExecutionCompletedEvent) @crewai_event_bus.on(AgentExecutionCompletedEvent)
def ao_finalizar_execucao_agente(source, event): def on_agent_execution_completed(source, event):
print(f"Agente '{event.agent.role}' concluiu a tarefa") print(f"Agent '{event.agent.role}' completed task")
print(f"Saída: {event.output}") print(f"Output: {event.output}")
``` ```
## Registrando Corretamente Seu Listener ## Registrando Corretamente Seu Listener

View File

@@ -486,9 +486,8 @@ Existem duas formas de executar um flow:
Você pode executar um flow programaticamente criando uma instância da sua classe de flow e chamando o método `kickoff()`: Você pode executar um flow programaticamente criando uma instância da sua classe de flow e chamando o método `kickoff()`:
```python ```python
# Exemplo de execução de flow em português flow = ExampleFlow()
flow = ExemploFlow() result = flow.kickoff()
resultado = flow.kickoff()
``` ```
### Usando a CLI ### Usando a CLI

View File

@@ -39,17 +39,17 @@ llm = LLM(model="gpt-4o-mini", temperature=0)
# Create an agent with the knowledge store # Create an agent with the knowledge store
agent = Agent( agent = Agent(
role="Sobre o Usuário", role="About User",
goal="Você sabe tudo sobre o usuário.", goal="You know everything about the user.",
backstory="Você é mestre em entender pessoas e suas preferências.", backstory="You are a master at understanding people and their preferences.",
verbose=True, verbose=True,
allow_delegation=False, allow_delegation=False,
llm=llm, llm=llm,
) )
task = Task( task = Task(
description="Responda às seguintes perguntas sobre o usuário: {question}", description="Answer the following questions about the user: {question}",
expected_output="Uma resposta para a pergunta.", expected_output="An answer to the question.",
agent=agent, agent=agent,
) )
@@ -87,17 +87,17 @@ llm = LLM(model="gpt-4o-mini", temperature=0)
# Create an agent with the knowledge store # Create an agent with the knowledge store
agent = Agent( agent = Agent(
role="Sobre artigos", role="About papers",
goal="Você sabe tudo sobre os artigos.", goal="You know everything about the papers.",
backstory="Você é mestre em entender artigos e seus conteúdos.", backstory="You are a master at understanding papers and their content.",
verbose=True, verbose=True,
allow_delegation=False, allow_delegation=False,
llm=llm, llm=llm,
) )
task = Task( task = Task(
description="Responda às seguintes perguntas sobre os artigos: {question}", description="Answer the following questions about the papers: {question}",
expected_output="Uma resposta para a pergunta.", expected_output="An answer to the question.",
agent=agent, agent=agent,
) )
@@ -201,16 +201,16 @@ specialist_knowledge = StringKnowledgeSource(
) )
specialist_agent = Agent( specialist_agent = Agent(
role="Especialista Técnico", role="Technical Specialist",
goal="Fornecer expertise técnica", goal="Provide technical expertise",
backstory="Especialista em domínios técnicos especializados", backstory="Expert in specialized technical domains",
knowledge_sources=[specialist_knowledge] # Conhecimento específico do agente knowledge_sources=[specialist_knowledge] # Agent-specific knowledge
) )
task = Task( task = Task(
description="Responda perguntas técnicas", description="Answer technical questions",
agent=specialist_agent, agent=specialist_agent,
expected_output="Resposta técnica" expected_output="Technical answer"
) )
# No crew-level knowledge required # No crew-level knowledge required
@@ -240,7 +240,7 @@ Cada nível de knowledge usa coleções de armazenamento independentes:
```python ```python
# Agent knowledge storage # Agent knowledge storage
agent_collection_name = agent.role # e.g., "Especialista Técnico" agent_collection_name = agent.role # e.g., "Technical Specialist"
# Crew knowledge storage # Crew knowledge storage
crew_collection_name = "crew" crew_collection_name = "crew"
@@ -248,7 +248,7 @@ crew_collection_name = "crew"
# Both stored in same ChromaDB instance but different collections # Both stored in same ChromaDB instance but different collections
# Path: ~/.local/share/CrewAI/{project}/knowledge/ # Path: ~/.local/share/CrewAI/{project}/knowledge/
# ├── crew/ # Crew knowledge collection # ├── crew/ # Crew knowledge collection
# ├── Especialista Técnico/ # Agent knowledge collection # ├── Technical Specialist/ # Agent knowledge collection
# └── Another Agent Role/ # Another agent's collection # └── Another Agent Role/ # Another agent's collection
``` ```
@@ -265,7 +265,7 @@ agent_knowledge = StringKnowledgeSource(
) )
agent = Agent( agent = Agent(
role="Especialista", role="Specialist",
goal="Use specialized knowledge", goal="Use specialized knowledge",
backstory="Expert with specific knowledge", backstory="Expert with specific knowledge",
knowledge_sources=[agent_knowledge], knowledge_sources=[agent_knowledge],
@@ -299,10 +299,10 @@ specialist_knowledge = StringKnowledgeSource(
) )
specialist = Agent( specialist = Agent(
role="Especialista Técnico", role="Technical Specialist",
goal="Fornecer expertise técnica", goal="Provide technical expertise",
backstory="Especialista em domínios técnicos especializados", backstory="Technical expert",
knowledge_sources=[specialist_knowledge] # Conhecimento específico do agente knowledge_sources=[specialist_knowledge] # Agent-specific
) )
generalist = Agent( generalist = Agent(

View File

@@ -78,15 +78,15 @@ Existem diferentes locais no código do CrewAI onde você pode especificar o mod
# Configuração avançada com parâmetros detalhados # Configuração avançada com parâmetros detalhados
llm = LLM( llm = LLM(
model="openai/gpt-4", model="model-id-here", # gpt-4o, gemini-2.0-flash, anthropic/claude...
temperature=0.8, temperature=0.7, # Mais alto para saídas criativas
max_tokens=150, timeout=120, # Segundos para aguardar resposta
top_p=0.9, max_tokens=4000, # Comprimento máximo da resposta
frequency_penalty=0.1, top_p=0.9, # Parâmetro de amostragem nucleus
presence_penalty=0.1, frequency_penalty=0.1 , # Reduz repetição
response_format={"type":"json"}, presence_penalty=0.1, # Incentiva diversidade de tópicos
stop=["FIM"], response_format={"type": "json"}, # Para respostas estruturadas
seed=42 seed=42 # Para resultados reproduzíveis
) )
``` ```
@@ -127,13 +127,13 @@ Nesta seção, você encontrará exemplos detalhados que ajudam a selecionar, co
from crewai import LLM from crewai import LLM
llm = LLM( llm = LLM(
model="openai/gpt-4", model="openai/gpt-4", # chamar modelo por provider/model_name
temperature=0.8, temperature=0.8,
max_tokens=150, max_tokens=150,
top_p=0.9, top_p=0.9,
frequency_penalty=0.1, frequency_penalty=0.1,
presence_penalty=0.1, presence_penalty=0.1,
stop=["FIM"], stop=["END"],
seed=42 seed=42
) )
``` ```
@@ -169,7 +169,7 @@ Nesta seção, você encontrará exemplos detalhados que ajudam a selecionar, co
llm = LLM( llm = LLM(
model="meta_llama/Llama-4-Scout-17B-16E-Instruct-FP8", model="meta_llama/Llama-4-Scout-17B-16E-Instruct-FP8",
temperature=0.8, temperature=0.8,
stop=["FIM"], stop=["END"],
seed=42 seed=42
) )
``` ```

View File

@@ -17,7 +17,7 @@ Começar a usar o recurso de planejamento é muito simples, o único passo neces
from crewai import Crew, Agent, Task, Process from crewai import Crew, Agent, Task, Process
# Monte sua crew com capacidades de planejamento # Monte sua crew com capacidades de planejamento
minha_crew = Crew( my_crew = Crew(
agents=self.agents, agents=self.agents,
tasks=self.tasks, tasks=self.tasks,
process=Process.sequential, process=Process.sequential,

View File

@@ -28,23 +28,23 @@ from crewai import Crew, Process
# Exemplo: Criando uma crew com processo sequencial # Exemplo: Criando uma crew com processo sequencial
crew = Crew( crew = Crew(
agents=meus_agentes, agents=my_agents,
tasks=minhas_tarefas, tasks=my_tasks,
process=Process.sequential process=Process.sequential
) )
# Exemplo: Criando uma crew com processo hierárquico # Exemplo: Criando uma crew com processo hierárquico
# Certifique-se de fornecer um manager_llm ou manager_agent # Certifique-se de fornecer um manager_llm ou manager_agent
crew = Crew( crew = Crew(
agents=meus_agentes, agents=my_agents,
tasks=minhas_tarefas, tasks=my_tasks,
process=Process.hierarchical, process=Process.hierarchical,
manager_llm="gpt-4o" manager_llm="gpt-4o"
# ou # ou
# manager_agent=meu_agente_gerente # manager_agent=my_manager_agent
) )
``` ```
**Nota:** Certifique-se de que `meus_agentes` e `minhas_tarefas` estejam definidos antes de criar o objeto `Crew`, e para o processo hierárquico, é necessário também fornecer o `manager_llm` ou `manager_agent`. **Nota:** Certifique-se de que `my_agents` e `my_tasks` estejam definidos antes de criar o objeto `Crew`, e para o processo hierárquico, é necessário também fornecer o `manager_llm` ou `manager_agent`.
## Processo Sequencial ## Processo Sequencial

View File

@@ -15,12 +15,12 @@ Para habilitar o reasoning para um agente, basta definir `reasoning=True` ao cri
```python ```python
from crewai import Agent from crewai import Agent
analista = Agent( agent = Agent(
role="Analista de Dados", role="Data Analyst",
goal="Analisar dados e fornecer insights", goal="Analyze complex datasets and provide insights",
backstory="Você é um analista de dados especialista.", backstory="You are an experienced data analyst with expertise in finding patterns in complex data.",
reasoning=True, reasoning=True, # Enable reasoning
max_reasoning_attempts=3 # Opcional: Defina um limite de tentativas de reasoning max_reasoning_attempts=3 # Optional: Set a maximum number of reasoning attempts
) )
``` ```
@@ -53,23 +53,23 @@ Aqui está um exemplo completo:
from crewai import Agent, Task, Crew from crewai import Agent, Task, Crew
# Create an agent with reasoning enabled # Create an agent with reasoning enabled
analista = Agent( analyst = Agent(
role="Analista de Dados", role="Data Analyst",
goal="Analisar dados e fornecer insights", goal="Analyze data and provide insights",
backstory="Você é um analista de dados especialista.", backstory="You are an expert data analyst.",
reasoning=True, reasoning=True,
max_reasoning_attempts=3 # Opcional: Defina um limite de tentativas de reasoning max_reasoning_attempts=3 # Optional: Set a limit on reasoning attempts
) )
# Create a task # Create a task
analysis_task = Task( analysis_task = Task(
description="Analise os dados de vendas fornecidos e identifique as principais tendências.", description="Analyze the provided sales data and identify key trends.",
expected_output="Um relatório destacando as 3 principais tendências de vendas.", expected_output="A report highlighting the top 3 sales trends.",
agent=analista agent=analyst
) )
# Create a crew and run the task # Create a crew and run the task
crew = Crew(agents=[analista], tasks=[analysis_task]) crew = Crew(agents=[analyst], tasks=[analysis_task])
result = crew.kickoff() result = crew.kickoff()
print(result) print(result)
@@ -90,16 +90,16 @@ logging.basicConfig(level=logging.INFO)
# Create an agent with reasoning enabled # Create an agent with reasoning enabled
agent = Agent( agent = Agent(
role="Analista de Dados", role="Data Analyst",
goal="Analisar dados e fornecer insights", goal="Analyze data and provide insights",
reasoning=True, reasoning=True,
max_reasoning_attempts=3 max_reasoning_attempts=3
) )
# Create a task # Create a task
task = Task( task = Task(
description="Analise os dados de vendas fornecidos e identifique as principais tendências.", description="Analyze the provided sales data and identify key trends.",
expected_output="Um relatório destacando as 3 principais tendências de vendas.", expected_output="A report highlighting the top 3 sales trends.",
agent=agent agent=agent
) )
@@ -113,7 +113,7 @@ result = agent.execute_task(task)
Veja um exemplo de como pode ser um plano de reasoning para uma tarefa de análise de dados: Veja um exemplo de como pode ser um plano de reasoning para uma tarefa de análise de dados:
``` ```
Task: Analise os dados de vendas fornecidos e identifique as principais tendências. Task: Analyze the provided sales data and identify key trends.
Reasoning Plan: Reasoning Plan:
I'll analyze the sales data to identify the top 3 trends. I'll analyze the sales data to identify the top 3 trends.

View File

@@ -386,7 +386,7 @@ def validate_with_context(result: TaskOutput) -> Tuple[bool, Any]:
validated_data = perform_validation(result) validated_data = perform_validation(result)
return (True, validated_data) return (True, validated_data)
except ValidationError as e: except ValidationError as e:
return (False, f"ERRO_DE_VALIDACAO: {str(e)}") return (False, f"VALIDATION_ERROR: {str(e)}")
except Exception as e: except Exception as e:
return (False, str(e)) return (False, str(e))
``` ```

View File

@@ -67,17 +67,17 @@ web_rag_tool = WebsiteSearchTool()
# Criar agentes # Criar agentes
researcher = Agent( researcher = Agent(
role='Analista de Mercado', role='Market Research Analyst',
goal='Fornecer análise de mercado atualizada da indústria de IA', goal='Provide up-to-date market analysis of the AI industry',
backstory='Analista especialista com olhar atento para tendências de mercado.', backstory='An expert analyst with a keen eye for market trends.',
tools=[search_tool, web_rag_tool], tools=[search_tool, web_rag_tool],
verbose=True verbose=True
) )
writer = Agent( writer = Agent(
role='Redator de Conteúdo', role='Content Writer',
goal='Criar posts de blog envolventes sobre a indústria de IA', goal='Craft engaging blog posts about the AI industry',
backstory='Redator habilidoso com paixão por tecnologia.', backstory='A skilled writer with a passion for technology.',
tools=[docs_tool, file_tool], tools=[docs_tool, file_tool],
verbose=True verbose=True
) )

View File

@@ -36,18 +36,19 @@ Para treinar sua crew de forma programática, siga estes passos:
3. Execute o comando de treinamento dentro de um bloco try-except para tratar possíveis erros. 3. Execute o comando de treinamento dentro de um bloco try-except para tratar possíveis erros.
```python Code ```python Code
n_iteracoes = 2 n_iterations = 2
entradas = {"topic": "Treinamento CrewAI"} inputs = {"topic": "CrewAI Training"}
nome_arquivo = "seu_modelo.pkl" filename = "your_model.pkl"
try: try:
SuaCrew().crew().train( YourCrewName_Crew().crew().train(
n_iterations=n_iteracoes, n_iterations=n_iterations,
inputs=entradas, inputs=inputs,
filename=nome_arquivo filename=filename
) )
except Exception as e: except Exception as e:
raise Exception(f"Ocorreu um erro ao treinar a crew: {e}") raise Exception(f"An error occurred while training the crew: {e}")
``` ```
### Pontos Importantes ### Pontos Importantes

View File

@@ -26,13 +26,13 @@ from crewai.tasks.hallucination_guardrail import HallucinationGuardrail
from crewai import LLM from crewai import LLM
# Uso básico - utiliza o expected_output da tarefa como contexto # Uso básico - utiliza o expected_output da tarefa como contexto
protecao = HallucinationGuardrail( guardrail = HallucinationGuardrail(
llm=LLM(model="gpt-4o-mini") llm=LLM(model="gpt-4o-mini")
) )
# Com contexto de referência explícito # Com contexto de referência explícito
protecao_com_contexto = HallucinationGuardrail( context_guardrail = HallucinationGuardrail(
context="IA ajuda em várias tarefas, incluindo análise e geração.", context="AI helps with various tasks including analysis and generation.",
llm=LLM(model="gpt-4o-mini") llm=LLM(model="gpt-4o-mini")
) )
``` ```
@@ -43,11 +43,11 @@ protecao_com_contexto = HallucinationGuardrail(
from crewai import Task from crewai import Task
# Crie sua tarefa com a proteção # Crie sua tarefa com a proteção
minha_tarefa = Task( task = Task(
description="Escreva um resumo sobre as capacidades da IA", description="Write a summary about AI capabilities",
expected_output="Um resumo factual baseado no contexto fornecido", expected_output="A factual summary based on the provided context",
agent=meu_agente, agent=my_agent,
guardrail=protecao # Adiciona a proteção para validar a saída guardrail=guardrail # Adiciona a proteção para validar a saída
) )
``` ```
@@ -59,8 +59,8 @@ Para validação mais rigorosa, é possível definir um limiar de fidelidade per
```python ```python
# Proteção rigorosa exigindo alta pontuação de fidelidade # Proteção rigorosa exigindo alta pontuação de fidelidade
protecao_rigorosa = HallucinationGuardrail( strict_guardrail = HallucinationGuardrail(
context="Computação quântica utiliza qubits que existem em estados de superposição.", context="Quantum computing uses qubits that exist in superposition states.",
llm=LLM(model="gpt-4o-mini"), llm=LLM(model="gpt-4o-mini"),
threshold=8.0 # Requer pontuação >= 8 para validar threshold=8.0 # Requer pontuação >= 8 para validar
) )
@@ -72,10 +72,10 @@ Se sua tarefa utiliza ferramentas, você pode incluir as respostas das ferrament
```python ```python
# Proteção com contexto de resposta da ferramenta # Proteção com contexto de resposta da ferramenta
protecao_clima = HallucinationGuardrail( weather_guardrail = HallucinationGuardrail(
context="Informações meteorológicas atuais para o local solicitado", context="Current weather information for the requested location",
llm=LLM(model="gpt-4o-mini"), llm=LLM(model="gpt-4o-mini"),
tool_response="API do Clima retornou: Temperatura 22°C, Umidade 65%, Céu limpo" tool_response="Weather API returned: Temperature 22°C, Humidity 65%, Clear skies"
) )
``` ```
@@ -123,15 +123,15 @@ Quando uma proteção é adicionada à tarefa, ela valida automaticamente a saí
```python ```python
# Fluxo de validação de saída da tarefa # Fluxo de validação de saída da tarefa
task_output = meu_agente.execute_task(minha_tarefa) task_output = agent.execute_task(task)
resultado_validacao = protecao(task_output) validation_result = guardrail(task_output)
if resultado_validacao.valid: if validation_result.valid:
# Tarefa concluída com sucesso # Tarefa concluída com sucesso
return task_output return task_output
else: else:
# Tarefa falha com feedback de validação # Tarefa falha com feedback de validação
raise ValidationError(resultado_validacao.feedback) raise ValidationError(validation_result.feedback)
``` ```
### Rastreamento de Eventos ### Rastreamento de Eventos
@@ -151,10 +151,10 @@ A proteção se integra ao sistema de eventos do CrewAI para fornecer observabil
Inclua todas as informações factuais relevantes nas quais a IA deve basear sua saída: Inclua todas as informações factuais relevantes nas quais a IA deve basear sua saída:
```python ```python
contexto = """ context = """
Empresa XYZ foi fundada em 2020 e é especializada em soluções de energia renovável. Company XYZ was founded in 2020 and specializes in renewable energy solutions.
Possui 150 funcionários e faturou R$ 50 milhões em 2023. They have 150 employees and generated $50M revenue in 2023.
Seus principais produtos incluem painéis solares e turbinas eólicas. Their main products include solar panels and wind turbines.
""" """
``` ```
</Step> </Step>
@@ -164,10 +164,10 @@ A proteção se integra ao sistema de eventos do CrewAI para fornecer observabil
```python ```python
# Bom: Contexto focado # Bom: Contexto focado
contexto = "O clima atual em Nova York é 18°C com chuva leve." context = "The current weather in New York is 18°C with light rain."
# Evite: Informações irrelevantes # Evite: Informações irrelevantes
contexto = "The weather is 18°C. The city has 8 million people. Traffic is heavy." context = "The weather is 18°C. The city has 8 million people. Traffic is heavy."
``` ```
</Step> </Step>

View File

@@ -84,31 +84,31 @@ from crewai import Agent, Task, Crew
from crewai_tools import CrewaiEnterpriseTools from crewai_tools import CrewaiEnterpriseTools
# Obtenha ferramentas enterprise (a ferramenta Gmail será incluída) # Obtenha ferramentas enterprise (a ferramenta Gmail será incluída)
ferramentas_enterprise = CrewaiEnterpriseTools( enterprise_tools = CrewaiEnterpriseTools(
enterprise_token="seu_token_enterprise" enterprise_token="your_enterprise_token"
) )
# imprima as ferramentas # imprima as ferramentas
printf(ferramentas_enterprise) print(enterprise_tools)
# Crie um agente com capacidades do Gmail # Crie um agente com capacidades do Gmail
agente_email = Agent( email_agent = Agent(
role="Gerente de E-mails", role="Email Manager",
goal="Gerenciar e organizar comunicações por e-mail", goal="Manage and organize email communications",
backstory="Um assistente de IA especializado em gestão de e-mails e comunicação.", backstory="An AI assistant specialized in email management and communication.",
tools=ferramentas_enterprise tools=enterprise_tools
) )
# Tarefa para enviar um e-mail # Tarefa para enviar um e-mail
tarefa_email = Task( email_task = Task(
description="Redigir e enviar um e-mail de acompanhamento para john@example.com sobre a atualização do projeto", description="Draft and send a follow-up email to john@example.com about the project update",
agent=agente_email, agent=email_agent,
expected_output="Confirmação de que o e-mail foi enviado com sucesso" expected_output="Confirmation that email was sent successfully"
) )
# Execute a tarefa # Execute a tarefa
crew = Crew( crew = Crew(
agents=[agente_email], agents=[email_agent],
tasks=[tarefa_email] tasks=[email_task]
) )
# Execute o crew # Execute o crew
@@ -125,23 +125,23 @@ enterprise_tools = CrewaiEnterpriseTools(
) )
gmail_tool = enterprise_tools["gmail_find_email"] gmail_tool = enterprise_tools["gmail_find_email"]
agente_gmail = Agent( gmail_agent = Agent(
role="Gerente do Gmail", role="Gmail Manager",
goal="Gerenciar comunicações e notificações do gmail", goal="Manage gmail communications and notifications",
backstory="Um assistente de IA que ajuda a coordenar comunicações no gmail.", backstory="An AI assistant that helps coordinate gmail communications.",
tools=[gmail_tool] tools=[gmail_tool]
) )
tarefa_notificacao = Task( notification_task = Task(
description="Encontrar o e-mail de john@example.com", description="Find the email from john@example.com",
agent=agente_gmail, agent=gmail_agent,
expected_output="E-mail encontrado de john@example.com" expected_output="Email found from john@example.com"
) )
# Execute a tarefa # Execute a tarefa
crew = Crew( crew = Crew(
agents=[agente_gmail], agents=[slack_agent],
tasks=[tarefa_notificacao] tasks=[notification_task]
) )
``` ```

View File

@@ -30,7 +30,7 @@ Antes de usar o Repositório de Ferramentas, certifique-se de que você possui:
Para instalar uma ferramenta: Para instalar uma ferramenta:
```bash ```bash
crewai tool install <nome-da-ferramenta> crewai tool install <tool-name>
``` ```
Isso instala a ferramenta e a adiciona ao `pyproject.toml`. Isso instala a ferramenta e a adiciona ao `pyproject.toml`.
@@ -40,7 +40,7 @@ Isso instala a ferramenta e a adiciona ao `pyproject.toml`.
Para criar um novo projeto de ferramenta: Para criar um novo projeto de ferramenta:
```bash ```bash
crewai tool create <nome-da-ferramenta> crewai tool create <tool-name>
``` ```
Isso gera um projeto de ferramenta estruturado localmente. Isso gera um projeto de ferramenta estruturado localmente.
@@ -76,7 +76,7 @@ Para atualizar uma ferramenta publicada:
3. Faça o commit das alterações e publique 3. Faça o commit das alterações e publique
```bash ```bash
git commit -m "Atualizar versão para 0.1.1" git commit -m "Update version to 0.1.1"
crewai tool publish crewai tool publish
``` ```

View File

@@ -12,17 +12,16 @@ O Enterprise Event Streaming permite que você receba atualizações em tempo re
Ao utilizar a API Kickoff, inclua um objeto `webhooks` em sua requisição, por exemplo: Ao utilizar a API Kickoff, inclua um objeto `webhooks` em sua requisição, por exemplo:
# Exemplo de uso da API Kickoff com webhooks
```json ```json
{ {
"inputs": {"foo": "bar"}, "inputs": {"foo": "bar"},
"webhooks": { "webhooks": {
"events": ["crew_kickoff_started", "llm_call_started"], "events": ["crew_kickoff_started", "llm_call_started"],
"url": "https://seu.endpoint/webhook", "url": "https://your.endpoint/webhook",
"realtime": false, "realtime": false,
"authentication": { "authentication": {
"strategy": "bearer", "strategy": "bearer",
"token": "meu-token-secreto" "token": "my-secret-token"
} }
} }
} }
@@ -34,20 +33,19 @@ Se `realtime` estiver definido como `true`, cada evento será entregue individua
Cada webhook envia uma lista de eventos: Cada webhook envia uma lista de eventos:
# Exemplo de evento enviado pelo webhook
```json ```json
{ {
"events": [ "events": [
{ {
"id": "id-do-evento", "id": "event-id",
"execution_id": "id-da-execucao-do-crew", "execution_id": "crew-run-id",
"timestamp": "2025-02-16T10:58:44.965Z", "timestamp": "2025-02-16T10:58:44.965Z",
"type": "llm_call_started", "type": "llm_call_started",
"data": { "data": {
"model": "gpt-4", "model": "gpt-4",
"messages": [ "messages": [
{"role": "system", "content": "Você é um assistente."}, {"role": "system", "content": "You are an assistant."},
{"role": "user", "content": "Resuma este artigo."} {"role": "user", "content": "Summarize this article."}
] ]
} }
} }

View File

@@ -41,8 +41,11 @@ A CLI fornece a maneira mais rápida de implantar crews desenvolvidos localmente
Primeiro, você precisa autenticar sua CLI com a plataforma CrewAI Enterprise: Primeiro, você precisa autenticar sua CLI com a plataforma CrewAI Enterprise:
```bash ```bash
# Se já possui uma conta CrewAI Enterprise, ou deseja criar uma: # Se já possui uma conta CrewAI Enterprise
crewai login crewai login
# Se vai criar uma nova conta
crewai signup
``` ```
Ao executar qualquer um dos comandos, a CLI irá: Ao executar qualquer um dos comandos, a CLI irá:

View File

@@ -16,17 +16,17 @@ from crewai import CrewBase
from crewai.project import before_kickoff from crewai.project import before_kickoff
@CrewBase @CrewBase
class MinhaEquipe: class MyCrew:
@before_kickoff @before_kickoff
def preparar_dados(self, entradas): def prepare_data(self, inputs):
# Pré-processa ou modifica as entradas # Preprocess or modify inputs
entradas['processado'] = True inputs['processed'] = True
return entradas return inputs
#... #...
``` ```
Neste exemplo, a função preparar_dados modifica as entradas adicionando um novo par chave-valor indicando que as entradas foram processadas. Neste exemplo, a função prepare_data modifica as entradas adicionando um novo par chave-valor indicando que as entradas foram processadas.
## Hook Depois do Kickoff ## Hook Depois do Kickoff
@@ -39,17 +39,17 @@ from crewai import CrewBase
from crewai.project import after_kickoff from crewai.project import after_kickoff
@CrewBase @CrewBase
class MinhaEquipe: class MyCrew:
@after_kickoff @after_kickoff
def registrar_resultados(self, resultado): def log_results(self, result):
# Registra ou modifica os resultados # Log or modify the results
print("Execução da equipe concluída com resultado:", resultado) print("Crew execution completed with result:", result)
return resultado return result
# ... # ...
``` ```
Na função `registrar_resultados`, os resultados da execução da crew são simplesmente impressos. Você pode estender isso para realizar operações mais complexas, como enviar notificações ou integrar com outros serviços. Na função `log_results`, os resultados da execução da crew são simplesmente impressos. Você pode estender isso para realizar operações mais complexas, como enviar notificações ou integrar com outros serviços.
## Utilizando Ambos os Hooks ## Utilizando Ambos os Hooks

View File

@@ -77,9 +77,9 @@ search_tool = SerperDevTool()
# Inicialize o agente com opções avançadas # Inicialize o agente com opções avançadas
agent = Agent( agent = Agent(
role='Analista de Pesquisa', role='Research Analyst',
goal='Fornecer análises de mercado atualizadas', goal='Provide up-to-date market analysis',
backstory='Um analista especialista com olhar atento para tendências de mercado.', backstory='An expert analyst with a keen eye for market trends.',
tools=[search_tool], tools=[search_tool],
memory=True, # Ativa memória memory=True, # Ativa memória
verbose=True, verbose=True,
@@ -98,9 +98,14 @@ eficiência dentro do ecossistema CrewAI. Se necessário, a delegação pode ser
```python Code ```python Code
agent = Agent( agent = Agent(
role='Redator de Conteúdo', role='Content Writer',
goal='Escrever conteúdo envolvente sobre tendências de mercado', goal='Write engaging content on market trends',
backstory='Um redator experiente com expertise em análise de mercado.', backstory='A seasoned writer with expertise in market analysis.',
allow_delegation=True # Habilitando delegação allow_delegation=True # Habilitando delegação
) )
``` ```
## Conclusão
Personalizar agentes no CrewAI definindo seus papéis, objetivos, histórias e ferramentas, juntamente com opções avançadas como personalização de modelo de linguagem, memória, ajustes de performance e preferências de delegação,
proporciona uma equipe de IA sofisticada e preparada para enfrentar desafios complexos.

View File

@@ -45,17 +45,17 @@ from crewai import Crew, Agent, Task
# Create an agent with code execution enabled # Create an agent with code execution enabled
coding_agent = Agent( coding_agent = Agent(
role="Analista de Dados Python", role="Python Data Analyst",
goal="Analisar dados e fornecer insights usando Python", goal="Analyze data and provide insights using Python",
backstory="Você é um analista de dados experiente com fortes habilidades em Python.", backstory="You are an experienced data analyst with strong Python skills.",
allow_code_execution=True allow_code_execution=True
) )
# Create a task that requires code execution # Create a task that requires code execution
data_analysis_task = Task( data_analysis_task = Task(
description="Analise o conjunto de dados fornecido e calcule a idade média dos participantes. Idades: {ages}", description="Analyze the given dataset and calculate the average age of participants. Ages: {ages}",
agent=coding_agent, agent=coding_agent,
expected_output="A idade média dos participantes." expected_output="The average age of the participants."
) )
# Create a crew and add the task # Create a crew and add the task
@@ -83,23 +83,23 @@ from crewai import Crew, Agent, Task
# Create an agent with code execution enabled # Create an agent with code execution enabled
coding_agent = Agent( coding_agent = Agent(
role="Analista de Dados Python", role="Python Data Analyst",
goal="Analisar dados e fornecer insights usando Python", goal="Analyze data and provide insights using Python",
backstory="Você é um analista de dados experiente com fortes habilidades em Python.", backstory="You are an experienced data analyst with strong Python skills.",
allow_code_execution=True allow_code_execution=True
) )
# Create tasks that require code execution # Create tasks that require code execution
task_1 = Task( task_1 = Task(
description="Analise o primeiro conjunto de dados e calcule a idade média dos participantes. Idades: {ages}", description="Analyze the first dataset and calculate the average age of participants. Ages: {ages}",
agent=coding_agent, agent=coding_agent,
expected_output="A idade média dos participantes." expected_output="The average age of the participants."
) )
task_2 = Task( task_2 = Task(
description="Analise o segundo conjunto de dados e calcule a idade média dos participantes. Idades: {ages}", description="Analyze the second dataset and calculate the average age of participants. Ages: {ages}",
agent=coding_agent, agent=coding_agent,
expected_output="A idade média dos participantes." expected_output="The average age of the participants."
) )
# Create two crews and add tasks # Create two crews and add tasks

View File

@@ -43,11 +43,11 @@ try:
with MCPServerAdapter(server_params_list) as aggregated_tools: with MCPServerAdapter(server_params_list) as aggregated_tools:
print(f"Available aggregated tools: {[tool.name for tool in aggregated_tools]}") print(f"Available aggregated tools: {[tool.name for tool in aggregated_tools]}")
agente_multiservidor = Agent( multi_server_agent = Agent(
role="Assistente Versátil", role="Versatile Assistant",
goal="Utilizar ferramentas de servidores MCP locais Stdio, remotos SSE e remotos HTTP.", goal="Utilize tools from local Stdio, remote SSE, and remote HTTP MCP servers.",
backstory="Um agente de IA capaz de aproveitar um conjunto diversificado de ferramentas de múltiplas fontes.", backstory="An AI agent capable of leveraging a diverse set of tools from multiple sources.",
tools=aggregated_tools, # Todas as ferramentas estão disponíveis aqui tools=aggregated_tools, # All tools are available here
verbose=True, verbose=True,
) )

View File

@@ -73,10 +73,10 @@ server_params = {
with MCPServerAdapter(server_params) as mcp_tools: with MCPServerAdapter(server_params) as mcp_tools:
print(f"Available tools: {[tool.name for tool in mcp_tools]}") print(f"Available tools: {[tool.name for tool in mcp_tools]}")
meu_agente = Agent( my_agent = Agent(
role="Usuário de Ferramentas MCP", role="MCP Tool User",
goal="Utilizar ferramentas de um servidor MCP.", goal="Utilize tools from an MCP server.",
backstory="Posso conectar a servidores MCP e usar suas ferramentas.", backstory="I can connect to MCP servers and use their tools.",
tools=mcp_tools, # Passe as ferramentas carregadas para o seu agente tools=mcp_tools, # Passe as ferramentas carregadas para o seu agente
reasoning=True, reasoning=True,
verbose=True verbose=True
@@ -91,10 +91,10 @@ Este padrão geral mostra como integrar ferramentas. Para exemplos específicos
with MCPServerAdapter(server_params) as mcp_tools: with MCPServerAdapter(server_params) as mcp_tools:
print(f"Available tools: {[tool.name for tool in mcp_tools]}") print(f"Available tools: {[tool.name for tool in mcp_tools]}")
meu_agente = Agent( my_agent = Agent(
role="Usuário de Ferramentas MCP", role="MCP Tool User",
goal="Utilizar ferramentas de um servidor MCP.", goal="Utilize tools from an MCP server.",
backstory="Posso conectar a servidores MCP e usar suas ferramentas.", backstory="I can connect to MCP servers and use their tools.",
tools=mcp_tools["tool_name"], # Passe as ferramentas filtradas para o seu agente tools=mcp_tools["tool_name"], # Passe as ferramentas filtradas para o seu agente
reasoning=True, reasoning=True,
verbose=True verbose=True

View File

@@ -37,24 +37,24 @@ try:
print(f"Available tools from SSE MCP server: {[tool.name for tool in tools]}") print(f"Available tools from SSE MCP server: {[tool.name for tool in tools]}")
# Example: Using a tool from the SSE MCP server # Example: Using a tool from the SSE MCP server
agente_sse = Agent( sse_agent = Agent(
role="Usuário de Serviço Remoto", role="Remote Service User",
goal="Utilizar uma ferramenta fornecida por um servidor MCP remoto via SSE.", goal="Utilize a tool provided by a remote SSE MCP server.",
backstory="Um agente de IA que conecta a serviços externos via SSE.", backstory="An AI agent that connects to external services via SSE.",
tools=tools, tools=tools,
reasoning=True, reasoning=True,
verbose=True, verbose=True,
) )
sse_task = Task( sse_task = Task(
description="Buscar atualizações em tempo real das ações 'AAPL' usando uma ferramenta SSE.", description="Fetch real-time stock updates for 'AAPL' using an SSE tool.",
expected_output="O preço mais recente da ação AAPL.", expected_output="The latest stock price for AAPL.",
agent=agente_sse, agent=sse_agent,
markdown=True markdown=True
) )
sse_crew = Crew( sse_crew = Crew(
agents=[agente_sse], agents=[sse_agent],
tasks=[sse_task], tasks=[sse_task],
verbose=True, verbose=True,
process=Process.sequential process=Process.sequential
@@ -101,16 +101,16 @@ try:
print(f"Available tools (manual SSE): {[tool.name for tool in tools]}") print(f"Available tools (manual SSE): {[tool.name for tool in tools]}")
manual_sse_agent = Agent( manual_sse_agent = Agent(
role="Analista Remoto de Dados", role="Remote Data Analyst",
goal="Analisar dados obtidos de um servidor MCP remoto SSE usando gerenciamento manual de conexão.", goal="Analyze data fetched from a remote SSE MCP server using manual connection management.",
backstory="Um agente de IA especializado em gerenciar conexões SSE explicitamente.", backstory="An AI skilled in handling SSE connections explicitly.",
tools=tools, tools=tools,
verbose=True verbose=True
) )
analysis_task = Task( analysis_task = Task(
description="Buscar e analisar as tendências mais recentes de atividade de usuários do servidor SSE.", description="Fetch and analyze the latest user activity trends from the SSE server.",
expected_output="Um relatório resumido das tendências de atividade dos usuários.", expected_output="A summary report of user activity trends.",
agent=manual_sse_agent agent=manual_sse_agent
) )

View File

@@ -38,24 +38,24 @@ with MCPServerAdapter(server_params) as tools:
print(f"Available tools from Stdio MCP server: {[tool.name for tool in tools]}") print(f"Available tools from Stdio MCP server: {[tool.name for tool in tools]}")
# Exemplo: Usando as ferramentas do servidor MCP Stdio em um Agente CrewAI # Exemplo: Usando as ferramentas do servidor MCP Stdio em um Agente CrewAI
pesquisador_local = Agent( research_agent = Agent(
role="Processador Local de Dados", role="Local Data Processor",
goal="Processar dados usando uma ferramenta local baseada em Stdio.", goal="Process data using a local Stdio-based tool.",
backstory="Uma IA que utiliza scripts locais via MCP para tarefas especializadas.", backstory="An AI that leverages local scripts via MCP for specialized tasks.",
tools=tools, tools=tools,
reasoning=True, reasoning=True,
verbose=True, verbose=True,
) )
processing_task = Task( processing_task = Task(
description="Processar o arquivo de dados de entrada 'data.txt' e resumir seu conteúdo.", description="Process the input data file 'data.txt' and summarize its contents.",
expected_output="Um resumo dos dados processados.", expected_output="A summary of the processed data.",
agent=pesquisador_local, agent=research_agent,
markdown=True markdown=True
) )
data_crew = Crew( data_crew = Crew(
agents=[pesquisador_local], agents=[research_agent],
tasks=[processing_task], tasks=[processing_task],
verbose=True, verbose=True,
process=Process.sequential process=Process.sequential
@@ -95,16 +95,16 @@ try:
# Exemplo: Usando as ferramentas com sua configuração de Agent, Task, Crew # Exemplo: Usando as ferramentas com sua configuração de Agent, Task, Crew
manual_agent = Agent( manual_agent = Agent(
role="Executor Local de Tarefas", role="Local Task Executor",
goal="Executar uma tarefa local específica usando uma ferramenta Stdio gerenciada manualmente.", goal="Execute a specific local task using a manually managed Stdio tool.",
backstory="Uma IA proficiente em controlar processos locais via MCP.", backstory="An AI proficient in controlling local processes via MCP.",
tools=tools, tools=tools,
verbose=True verbose=True
) )
manual_task = Task( manual_task = Task(
description="Executar o comando 'perform_analysis' via ferramenta Stdio.", description="Execute the 'perform_analysis' command via the Stdio tool.",
expected_output="Resultados da análise.", expected_output="Results of the analysis.",
agent=manual_agent agent=manual_agent
) )

View File

@@ -35,22 +35,22 @@ try:
with MCPServerAdapter(server_params) as tools: with MCPServerAdapter(server_params) as tools:
print(f"Available tools from Streamable HTTP MCP server: {[tool.name for tool in tools]}") print(f"Available tools from Streamable HTTP MCP server: {[tool.name for tool in tools]}")
agente_http = Agent( http_agent = Agent(
role="Integrador de Serviços HTTP", role="HTTP Service Integrator",
goal="Utilizar ferramentas de um servidor MCP remoto via Streamable HTTP.", goal="Utilize tools from a remote MCP server via Streamable HTTP.",
backstory="Um agente de IA especializado em interagir com serviços web complexos.", backstory="An AI agent adept at interacting with complex web services.",
tools=tools, tools=tools,
verbose=True, verbose=True,
) )
http_task = Task( http_task = Task(
description="Realizar uma consulta de dados complexa usando uma ferramenta do servidor Streamable HTTP.", description="Perform a complex data query using a tool from the Streamable HTTP server.",
expected_output="O resultado da consulta de dados complexa.", expected_output="The result of the complex data query.",
agent=agente_http, agent=http_agent,
) )
http_crew = Crew( http_crew = Crew(
agents=[agente_http], agents=[http_agent],
tasks=[http_task], tasks=[http_task],
verbose=True, verbose=True,
process=Process.sequential process=Process.sequential
@@ -91,16 +91,16 @@ try:
print(f"Available tools (manual Streamable HTTP): {[tool.name for tool in tools]}") print(f"Available tools (manual Streamable HTTP): {[tool.name for tool in tools]}")
manual_http_agent = Agent( manual_http_agent = Agent(
role="Usuário Avançado de Serviços Web", role="Advanced Web Service User",
goal="Interagir com um servidor MCP usando conexões HTTP Streamable gerenciadas manualmente.", goal="Interact with an MCP server using manually managed Streamable HTTP connections.",
backstory="Um especialista em IA em ajustar integrações baseadas em HTTP.", backstory="An AI specialist in fine-tuning HTTP-based service integrations.",
tools=tools, tools=tools,
verbose=True verbose=True
) )
data_processing_task = Task( data_processing_task = Task(
description="Enviar dados para processamento e recuperar resultados via Streamable HTTP.", description="Submit data for processing and retrieve results via Streamable HTTP.",
expected_output="Dados processados ou confirmação.", expected_output="Processed data or confirmation.",
agent=manual_http_agent agent=manual_http_agent
) )

View File

@@ -78,40 +78,47 @@ CrewAIInstrumentor().instrument(skip_dep_check=True, tracer_provider=tracer_prov
search_tool = SerperDevTool() search_tool = SerperDevTool()
# Defina seus agentes com papéis e objetivos # Defina seus agentes com papéis e objetivos
pesquisador = Agent( researcher = Agent(
role="Analista Sênior de Pesquisa", role="Senior Research Analyst",
goal="Descobrir os avanços mais recentes em IA e ciência de dados", goal="Uncover cutting-edge developments in AI and data science",
backstory=""" backstory="""You work at a leading tech think tank.
Você trabalha em um importante think tank de tecnologia. Sua especialidade é identificar tendências emergentes. Você tem habilidade para dissecar dados complexos e apresentar insights acionáveis. Your expertise lies in identifying emerging trends.
""", You have a knack for dissecting complex data and presenting actionable insights.""",
verbose=True, verbose=True,
allow_delegation=False, allow_delegation=False,
# You can pass an optional llm attribute specifying what model you wanna use.
# llm=ChatOpenAI(model_name="gpt-3.5", temperature=0.7),
tools=[search_tool], tools=[search_tool],
) )
writer = Agent( writer = Agent(
role="Estrategista de Conteúdo Técnico", role="Tech Content Strategist",
goal="Criar conteúdo envolvente sobre avanços tecnológicos", goal="Craft compelling content on tech advancements",
backstory="Você é um Estrategista de Conteúdo renomado, conhecido por seus artigos perspicazes e envolventes. Você transforma conceitos complexos em narrativas atraentes.", backstory="""You are a renowned Content Strategist, known for your insightful and engaging articles.
You transform complex concepts into compelling narratives.""",
verbose=True, verbose=True,
allow_delegation=True, allow_delegation=True,
) )
# Crie tarefas para seus agentes # Crie tarefas para seus agentes
task1 = Task( task1 = Task(
description="Realize uma análise abrangente dos avanços mais recentes em IA em 2024. Identifique tendências-chave, tecnologias inovadoras e impactos potenciais na indústria.", description="""Conduct a comprehensive analysis of the latest advancements in AI in 2024.
expected_output="Relatório analítico completo em tópicos", Identify key trends, breakthrough technologies, and potential industry impacts.""",
agent=pesquisador, expected_output="Full analysis report in bullet points",
agent=researcher,
) )
task2 = Task( task2 = Task(
description="Utilizando os insights fornecidos, desenvolva um blog envolvente destacando os avanços mais significativos em IA. O post deve ser informativo e acessível, voltado para um público técnico. Dê um tom interessante, evite palavras complexas para não soar como IA.", description="""Using the insights provided, develop an engaging blog
expected_output="Post de blog completo com pelo menos 4 parágrafos", post that highlights the most significant AI advancements.
Your post should be informative yet accessible, catering to a tech-savvy audience.
Make it sound cool, avoid complex words so it doesn't sound like AI.""",
expected_output="Full blog post of at least 4 paragraphs",
agent=writer, agent=writer,
) )
# Instancie seu crew com um processo sequencial # Instancie seu crew com um processo sequencial
crew = Crew( crew = Crew(
agents=[pesquisador, writer], tasks=[task1, task2], verbose=1, process=Process.sequential agents=[researcher, writer], tasks=[task1, task2], verbose=1, process=Process.sequential
) )
# Coloque seu crew para trabalhar! # Coloque seu crew para trabalhar!

View File

@@ -76,20 +76,20 @@ from crewai_tools import (
web_rag_tool = WebsiteSearchTool() web_rag_tool = WebsiteSearchTool()
escritor = Agent( writer = Agent(
role="Escritor", role="Writer",
goal="Você torna a matemática envolvente e compreensível para crianças pequenas através de poesias", goal="Você torna a matemática envolvente e compreensível para crianças pequenas através de poesias",
backstory="Você é especialista em escrever haicais mas não sabe nada de matemática.", backstory="Você é especialista em escrever haicais mas não sabe nada de matemática.",
tools=[web_rag_tool], tools=[web_rag_tool],
) )
tarefa = Task(description=("O que é {multiplicação}?"), task = Task(description=("O que é {multiplicação}?"),
expected_output=("Componha um haicai que inclua a resposta."), expected_output=("Componha um haicai que inclua a resposta."),
agent=escritor) agent=writer)
equipe = Crew( crew = Crew(
agents=[escritor], agents=[writer],
tasks=[tarefa], tasks=[task],
share_crew=False share_crew=False
) )
``` ```

View File

@@ -35,7 +35,7 @@ Essa integração permite o registro de hiperparâmetros, o monitoramento de reg
```python ```python
from langtrace_python_sdk import langtrace from langtrace_python_sdk import langtrace
langtrace.init(api_key='<SUA_CHAVE_LANGTRACE>') langtrace.init(api_key='<LANGTRACE_API_KEY>')
# Agora importe os módulos do CrewAI # Agora importe os módulos do CrewAI
from crewai import Agent, Task, Crew from crewai import Agent, Task, Crew

View File

@@ -73,24 +73,26 @@ instrument_crewai(logger)
### 4. Crie e execute sua aplicação CrewAI normalmente ### 4. Crie e execute sua aplicação CrewAI normalmente
```python ```python
pesquisador = Agent(
role='Pesquisador Sênior', # Crie seu agente
goal='Descobrir os avanços mais recentes em IA', researcher = Agent(
backstory="Você é um pesquisador especialista em um think tank de tecnologia...", role='Senior Research Analyst',
goal='Uncover cutting-edge developments in AI',
backstory="You are an expert researcher at a tech think tank...",
verbose=True, verbose=True,
llm=llm llm=llm
) )
# Defina a tarefa # Defina a tarefa
research_task = Task( research_task = Task(
description="Pesquise os avanços mais recentes em IA...", description="Research the latest AI advancements...",
expected_output="", expected_output="",
agent=pesquisador agent=researcher
) )
# Configure e execute a crew # Configure e execute a crew
crew = Crew( crew = Crew(
agents=[pesquisador], agents=[researcher],
tasks=[research_task], tasks=[research_task],
verbose=True verbose=True
) )

View File

@@ -70,19 +70,22 @@ O tracing fornece uma forma de registrar os inputs, outputs e metadados associad
class TripAgents: class TripAgents:
def city_selection_agent(self): def city_selection_agent(self):
especialista_cidades = Agent( return Agent(
role="Especialista em Seleção de Cidades", role="City Selection Expert",
goal="Selecionar a melhor cidade com base no clima, estação e preços", goal="Select the best city based on weather, season, and prices",
backstory="Especialista em analisar dados de viagem para escolher destinos ideais", backstory="An expert in analyzing travel data to pick ideal destinations",
tools=[search_tool], tools=[
search_tool,
],
verbose=True, verbose=True,
) )
def local_expert(self): def local_expert(self):
especialista_local = Agent( return Agent(
role="Especialista Local nesta cidade", role="Local Expert at this city",
goal="Fornecer as MELHORES informações sobre a cidade selecionada", goal="Provide the BEST insights about the selected city",
backstory="Um guia local experiente com amplo conhecimento sobre a cidade, suas atrações e costumes", backstory="""A knowledgeable local guide with extensive information
about the city, it's attractions and customs""",
tools=[search_tool], tools=[search_tool],
verbose=True, verbose=True,
) )
@@ -93,36 +96,53 @@ O tracing fornece uma forma de registrar os inputs, outputs e metadados associad
return Task( return Task(
description=dedent( description=dedent(
f""" f"""
Analise e selecione a melhor cidade para a viagem com base em critérios específicos como padrões climáticos, eventos sazonais e custos de viagem. Esta tarefa envolve comparar várias cidades, considerando fatores como condições climáticas atuais, eventos culturais ou sazonais e despesas gerais de viagem. Analyze and select the best city for the trip based
Sua resposta final deve ser um relatório detalhado sobre a cidade escolhida e tudo o que você descobriu sobre ela, incluindo custos reais de voo, previsão do tempo e atrações. on specific criteria such as weather patterns, seasonal
events, and travel costs. This task involves comparing
multiple cities, considering factors like current weather
conditions, upcoming cultural or seasonal events, and
overall travel expenses.
Your final answer must be a detailed
report on the chosen city, and everything you found out
about it, including the actual flight costs, weather
forecast and attractions.
Saindo de: {origin} Traveling from: {origin}
Opções de cidades: {cities} City Options: {cities}
Data da viagem: {range} Trip Date: {range}
Interesses do viajante: {interests} Traveler Interests: {interests}
""" """
), ),
agent=agent, agent=agent,
expected_output="Relatório detalhado sobre a cidade escolhida incluindo custos de voo, previsão do tempo e atrações", expected_output="Detailed report on the chosen city including flight costs, weather forecast, and attractions",
) )
def gather_task(self, agent, origin, interests, range): def gather_task(self, agent, origin, interests, range):
return Task( return Task(
description=dedent( description=dedent(
f""" f"""
Como especialista local nesta cidade, você deve compilar um guia aprofundado para alguém que está viajando para lá e quer ter a MELHOR viagem possível! As a local expert on this city you must compile an
Reúna informações sobre principais atrações, costumes locais, eventos especiais e recomendações de atividades diárias. in-depth guide for someone traveling there and wanting
Encontre os melhores lugares para ir, aqueles que só um local conhece. to have THE BEST trip ever!
Este guia deve fornecer uma visão abrangente do que a cidade tem a oferecer, incluindo joias escondidas, pontos culturais, marcos imperdíveis, previsão do tempo e custos gerais. Gather information about key attractions, local customs,
A resposta final deve ser um guia completo da cidade, rico em insights culturais e dicas práticas, adaptado para aprimorar a experiência de viagem. special events, and daily activity recommendations.
Find the best spots to go to, the kind of place only a
local would know.
This guide should provide a thorough overview of what
the city has to offer, including hidden gems, cultural
hotspots, must-visit landmarks, weather forecasts, and
high level costs.
The final answer must be a comprehensive city guide,
rich in cultural insights and practical tips,
tailored to enhance the travel experience.
Data da viagem: {range} Trip Date: {range}
Saindo de: {origin} Traveling from: {origin}
Interesses do viajante: {interests} Traveler Interests: {interests}
""" """
), ),
agent=agent, agent=agent,
expected_output="Guia completo da cidade incluindo joias escondidas, pontos culturais e dicas práticas", expected_output="Comprehensive city guide including hidden gems, cultural hotspots, and practical travel tips",
) )
@@ -169,7 +189,7 @@ O tracing fornece uma forma de registrar os inputs, outputs e metadados associad
trip_crew = TripCrew("California", "Tokyo", "Dec 12 - Dec 20", "sports") trip_crew = TripCrew("California", "Tokyo", "Dec 12 - Dec 20", "sports")
result = trip_crew.run() result = trip_crew.run()
print("Resultado da equipe:", result) print(result)
``` ```
Consulte a [Documentação de Tracing do MLflow](https://mlflow.org/docs/latest/llms/tracing/index.html) para mais configurações e casos de uso. Consulte a [Documentação de Tracing do MLflow](https://mlflow.org/docs/latest/llms/tracing/index.html) para mais configurações e casos de uso.
</Step> </Step>

View File

@@ -69,10 +69,10 @@ Essa configuração permite acompanhar hiperparâmetros e monitorar problemas de
openlit.init(disable_metrics=True) openlit.init(disable_metrics=True)
# Definir seus agentes # Definir seus agentes
pesquisador = Agent( researcher = Agent(
role="Pesquisador", role="Researcher",
goal="Realizar pesquisas e análises aprofundadas sobre IA e agentes de IA", goal="Conduct thorough research and analysis on AI and AI agents",
backstory="Você é um pesquisador especialista em tecnologia, engenharia de software, IA e startups. Trabalha como freelancer e está atualmente pesquisando para um novo cliente.", backstory="You're an expert researcher, specialized in technology, software engineering, AI, and startups. You work as a freelancer and are currently researching for a new client.",
allow_delegation=False, allow_delegation=False,
llm='command-r' llm='command-r'
) )
@@ -80,24 +80,24 @@ Essa configuração permite acompanhar hiperparâmetros e monitorar problemas de
# Definir sua task # Definir sua task
task = Task( task = Task(
description="Gere uma lista com 5 ideias interessantes para um artigo e escreva um parágrafo cativante para cada ideia, mostrando o potencial de um artigo completo sobre o tema. Retorne a lista de ideias com seus parágrafos e suas anotações.", description="Generate a list of 5 interesting ideas for an article, then write one captivating paragraph for each idea that showcases the potential of a full article on this topic. Return the list of ideas with their paragraphs and your notes.",
expected_output="5 tópicos, cada um com um parágrafo e notas complementares.", expected_output="5 bullet points, each with a paragraph and accompanying notes.",
) )
# Definir o agente gerente # Definir o agente gerente
gerente = Agent( manager = Agent(
role="Gerente de Projeto", role="Project Manager",
goal="Gerenciar eficientemente a equipe e garantir a conclusão de tarefas de alta qualidade", goal="Efficiently manage the crew and ensure high-quality task completion",
backstory="Você é um gerente de projetos experiente, habilidoso em supervisionar projetos complexos e guiar equipes para o sucesso. Sua função é coordenar os esforços dos membros da equipe, garantindo que cada tarefa seja concluída no prazo e com o mais alto padrão.", backstory="You're an experienced project manager, skilled in overseeing complex projects and guiding teams to success. Your role is to coordinate the efforts of the crew members, ensuring that each task is completed on time and to the highest standard.",
allow_delegation=True, allow_delegation=True,
llm='command-r' llm='command-r'
) )
# Instanciar sua crew com um manager personalizado # Instanciar sua crew com um manager personalizado
crew = Crew( crew = Crew(
agents=[pesquisador], agents=[researcher],
tasks=[task], tasks=[task],
manager_agent=gerente, manager_agent=manager,
process=Process.hierarchical, process=Process.hierarchical,
) )
@@ -132,18 +132,18 @@ Essa configuração permite acompanhar hiperparâmetros e monitorar problemas de
# Criar um agente com execução de código habilitada # Criar um agente com execução de código habilitada
coding_agent = Agent( coding_agent = Agent(
role="Analista de Dados Python", role="Python Data Analyst",
goal="Analisar dados e fornecer insights usando Python", goal="Analyze data and provide insights using Python",
backstory="Você é um analista de dados experiente com fortes habilidades em Python.", backstory="You are an experienced data analyst with strong Python skills.",
allow_code_execution=True, allow_code_execution=True,
llm="command-r" llm="command-r"
) )
# Criar uma task que exige execução de código # Criar uma task que exige execução de código
data_analysis_task = Task( data_analysis_task = Task(
description="Analise o conjunto de dados fornecido e calcule a idade média dos participantes. Idades: {ages}", description="Analyze the given dataset and calculate the average age of participants. Ages: {ages}",
agent=coding_agent, agent=coding_agent,
expected_output="5 tópicos, cada um com um parágrafo e notas complementares.", expected_output="5 bullet points, each with a paragraph and accompanying notes.",
) )
# Criar uma crew e adicionar a task # Criar uma crew e adicionar a task

View File

@@ -58,43 +58,43 @@ Neste guia, utilizaremos o exemplo de início rápido da CrewAI.
from crewai import Agent, Crew, Task, Process from crewai import Agent, Crew, Task, Process
class NomeDaEquipe: class YourCrewName:
def agente_um(self) -> Agent: def agent_one(self) -> Agent:
return Agent( return Agent(
role="Analista de Dados", role="Data Analyst",
goal="Analisar tendências de dados no mercado", goal="Analyze data trends in the market",
backstory="Analista de dados experiente com formação em economia", backstory="An experienced data analyst with a background in economics",
verbose=True, verbose=True,
) )
def agente_dois(self) -> Agent: def agent_two(self) -> Agent:
return Agent( return Agent(
role="Pesquisador de Mercado", role="Market Researcher",
goal="Coletar informações sobre a dinâmica do mercado", goal="Gather information on market dynamics",
backstory="Pesquisador dedicado com olhar atento para detalhes", backstory="A diligent researcher with a keen eye for detail",
verbose=True, verbose=True,
) )
def tarefa_um(self) -> Task: def task_one(self) -> Task:
return Task( return Task(
name="Tarefa de Coleta de Dados", name="Collect Data Task",
description="Coletar dados recentes do mercado e identificar tendências.", description="Collect recent market data and identify trends.",
expected_output="Um relatório resumindo as principais tendências do mercado.", expected_output="A report summarizing key trends in the market.",
agent=self.agente_um(), agent=self.agent_one(),
) )
def tarefa_dois(self) -> Task: def task_two(self) -> Task:
return Task( return Task(
name="Tarefa de Pesquisa de Mercado", name="Market Research Task",
description="Pesquisar fatores que afetam a dinâmica do mercado.", description="Research factors affecting market dynamics.",
expected_output="Uma análise dos fatores que influenciam o mercado.", expected_output="An analysis of factors influencing the market.",
agent=self.agente_dois(), agent=self.agent_two(),
) )
def equipe(self) -> Crew: def crew(self) -> Crew:
return Crew( return Crew(
agents=[self.agente_um(), self.agente_dois()], agents=[self.agent_one(), self.agent_two()],
tasks=[self.tarefa_um(), self.tarefa_dois()], tasks=[self.task_one(), self.task_two()],
process=Process.sequential, process=Process.sequential,
verbose=True, verbose=True,
) )
@@ -108,7 +108,7 @@ Neste guia, utilizaremos o exemplo de início rápido da CrewAI.
track_crewai(project_name="crewai-integration-demo") track_crewai(project_name="crewai-integration-demo")
my_crew = NomeDaEquipe().equipe() my_crew = YourCrewName().crew()
result = my_crew.kickoff() result = my_crew.kickoff()
print(result) print(result)

View File

@@ -64,17 +64,17 @@ patronus_eval_tool = PatronusEvalTool()
# Define an agent that uses the tool # Define an agent that uses the tool
coding_agent = Agent( coding_agent = Agent(
role="Agente de Programação", role="Coding Agent",
goal="Gerar código de alta qualidade e verificar se a saída é código", goal="Generate high quality code and verify that the output is code",
backstory="Um programador experiente que pode gerar código Python de alta qualidade.", backstory="An experienced coder who can generate high quality python code.",
tools=[patronus_eval_tool], tools=[patronus_eval_tool],
verbose=True, verbose=True,
) )
# Example task to generate and evaluate code # Example task to generate and evaluate code
generate_code_task = Task( generate_code_task = Task(
description="Crie um programa simples para gerar os N primeiros números da sequência de Fibonacci. Selecione o avaliador e os critérios mais apropriados para avaliar sua saída.", description="Create a simple program to generate the first N numbers in the Fibonacci sequence. Select the most appropriate evaluator and criteria for evaluating your output.",
expected_output="Programa que gera os N primeiros números da sequência de Fibonacci.", expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
agent=coding_agent, agent=coding_agent,
) )
@@ -98,17 +98,17 @@ patronus_eval_tool = PatronusPredefinedCriteriaEvalTool(
# Define an agent that uses the tool # Define an agent that uses the tool
coding_agent = Agent( coding_agent = Agent(
role="Agente de Programação", role="Coding Agent",
goal="Gerar código de alta qualidade", goal="Generate high quality code",
backstory="Um programador experiente que pode gerar código Python de alta qualidade.", backstory="An experienced coder who can generate high quality python code.",
tools=[patronus_eval_tool], tools=[patronus_eval_tool],
verbose=True, verbose=True,
) )
# Example task to generate code # Example task to generate code
generate_code_task = Task( generate_code_task = Task(
description="Crie um programa simples para gerar os N primeiros números da sequência de Fibonacci.", description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
expected_output="Programa que gera os N primeiros números da sequência de Fibonacci.", expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
agent=coding_agent, agent=coding_agent,
) )
@@ -149,17 +149,17 @@ patronus_eval_tool = PatronusLocalEvaluatorTool(
# Define an agent that uses the tool # Define an agent that uses the tool
coding_agent = Agent( coding_agent = Agent(
role="Agente de Programação", role="Coding Agent",
goal="Gerar código de alta qualidade", goal="Generate high quality code",
backstory="Um programador experiente que pode gerar código Python de alta qualidade.", backstory="An experienced coder who can generate high quality python code.",
tools=[patronus_eval_tool], tools=[patronus_eval_tool],
verbose=True, verbose=True,
) )
# Example task to generate code # Example task to generate code
generate_code_task = Task( generate_code_task = Task(
description="Crie um programa simples para gerar os N primeiros números da sequência de Fibonacci.", description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
expected_output="Programa que gera os N primeiros números da sequência de Fibonacci.", expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
agent=coding_agent, agent=coding_agent,
) )

View File

@@ -50,48 +50,48 @@ O Weave captura automaticamente rastreamentos (traces) de suas aplicações Crew
llm = LLM(model="gpt-4o", temperature=0) llm = LLM(model="gpt-4o", temperature=0)
# Crie os agentes # Crie os agentes
pesquisador = Agent( researcher = Agent(
role='Analista de Pesquisa', role='Research Analyst',
goal='Encontrar e analisar as melhores oportunidades de investimento', goal='Find and analyze the best investment opportunities',
backstory='Especialista em análise financeira e pesquisa de mercado', backstory='Expert in financial analysis and market research',
llm=llm, llm=llm,
verbose=True, verbose=True,
allow_delegation=False, allow_delegation=False,
) )
redator = Agent( writer = Agent(
role='Redator de Relatórios', role='Report Writer',
goal='Escrever relatórios de investimento claros e concisos', goal='Write clear and concise investment reports',
backstory='Experiente na criação de relatórios financeiros detalhados', backstory='Experienced in creating detailed financial reports',
llm=llm, llm=llm,
verbose=True, verbose=True,
allow_delegation=False, allow_delegation=False,
) )
# Crie as tarefas # Crie as tarefas
pesquisa = Task( research_task = Task(
description='Pesquisa aprofundada sobre o {tema}', description='Deep research on the {topic}',
expected_output='Dados de mercado abrangentes incluindo principais players, tamanho de mercado e tendências de crescimento.', expected_output='Comprehensive market data including key players, market size, and growth trends.',
agent=pesquisador agent=researcher
) )
redacao = Task( writing_task = Task(
description='Escreva um relatório detalhado com base na pesquisa', description='Write a detailed report based on the research',
expected_output='O relatório deve ser fácil de ler e entender. Use tópicos quando aplicável.', expected_output='The report should be easy to read and understand. Use bullet points where applicable.',
agent=redator agent=writer
) )
# Crie o crew # Crie o crew
equipe = Crew( crew = Crew(
agents=[pesquisador, redator], agents=[researcher, writer],
tasks=[pesquisa, redacao], tasks=[research_task, writing_task],
verbose=True, verbose=True,
process=Process.sequential, process=Process.sequential,
) )
# Execute o crew # Execute o crew
resultado = equipe.kickoff(inputs={"tema": "IA em ciência dos materiais"}) result = crew.kickoff(inputs={"topic": "AI in material science"})
print(resultado) print(result)
``` ```
</Step> </Step>
<Step title="Visualize rastreamentos no Weave"> <Step title="Visualize rastreamentos no Weave">

View File

@@ -39,19 +39,23 @@ Siga os passos abaixo para começar a tripular! 🚣‍♂️
# src/latest_ai_development/config/agents.yaml # src/latest_ai_development/config/agents.yaml
researcher: researcher:
role: > role: >
Pesquisador Sênior de Dados em {topic} {topic} Senior Data Researcher
goal: > goal: >
Descobrir os avanços mais recentes em {topic} Uncover cutting-edge developments in {topic}
backstory: > backstory: >
Você é um pesquisador experiente com talento para descobrir os últimos avanços em {topic}. Conhecido por sua habilidade em encontrar as informações mais relevantes e apresentá-las de forma clara e concisa. You're a seasoned researcher with a knack for uncovering the latest
developments in {topic}. Known for your ability to find the most relevant
information and present it in a clear and concise manner.
reporting_analyst: reporting_analyst:
role: > role: >
Analista de Relatórios em {topic} {topic} Reporting Analyst
goal: > goal: >
Criar relatórios detalhados com base na análise de dados e descobertas de pesquisa em {topic} Create detailed reports based on {topic} data analysis and research findings
backstory: > backstory: >
Você é um analista meticuloso com um olhar atento aos detalhes. É conhecido por sua capacidade de transformar dados complexos em relatórios claros e concisos, facilitando o entendimento e a tomada de decisão por parte dos outros. You're a meticulous analyst with a keen eye for detail. You're known for
your ability to turn complex data into clear and concise reports, making
it easy for others to understand and act on the information you provide.
``` ```
</Step> </Step>
<Step title="Modifique seu arquivo `tasks.yaml`"> <Step title="Modifique seu arquivo `tasks.yaml`">
@@ -59,19 +63,20 @@ Siga os passos abaixo para começar a tripular! 🚣‍♂️
# src/latest_ai_development/config/tasks.yaml # src/latest_ai_development/config/tasks.yaml
research_task: research_task:
description: > description: >
Realize uma pesquisa aprofundada sobre {topic}. Conduct a thorough research about {topic}
Certifique-se de encontrar informações interessantes e relevantes considerando que o ano atual é 2025. Make sure you find any interesting and relevant information given
the current year is 2025.
expected_output: > expected_output: >
Uma lista com 10 tópicos dos dados mais relevantes sobre {topic} A list with 10 bullet points of the most relevant information about {topic}
agent: researcher agent: researcher
reporting_task: reporting_task:
description: > description: >
Revise o contexto obtido e expanda cada tópico em uma seção completa para um relatório. Review the context you got and expand each topic into a full section for a report.
Certifique-se de que o relatório seja detalhado e contenha todas as informações relevantes. Make sure the report is detailed and contains any and all relevant information.
expected_output: > expected_output: >
Um relatório completo com os principais tópicos, cada um com uma seção detalhada de informações. A fully fledge reports with the mains topics, each with a full section of information.
Formate como markdown sem usar '```' Formatted as markdown without '```'
agent: reporting_analyst agent: reporting_analyst
output_file: report.md output_file: report.md
``` ```
@@ -117,15 +122,15 @@ Siga os passos abaixo para começar a tripular! 🚣‍♂️
def reporting_task(self) -> Task: def reporting_task(self) -> Task:
return Task( return Task(
config=self.tasks_config['reporting_task'], # type: ignore[index] config=self.tasks_config['reporting_task'], # type: ignore[index]
output_file='output/report.md' # Este é o arquivo que conterá o relatório final. output_file='output/report.md' # This is the file that will be contain the final report.
) )
@crew @crew
def crew(self) -> Crew: def crew(self) -> Crew:
"""Creates the LatestAiDevelopment crew""" """Creates the LatestAiDevelopment crew"""
return Crew( return Crew(
agents=self.agents, # Criado automaticamente pelo decorador @agent agents=self.agents, # Automatically created by the @agent decorator
tasks=self.tasks, # Criado automaticamente pelo decorador @task tasks=self.tasks, # Automatically created by the @task decorator
process=Process.sequential, process=Process.sequential,
verbose=True, verbose=True,
) )
@@ -224,7 +229,7 @@ Siga os passos abaixo para começar a tripular! 🚣‍♂️
<CodeGroup> <CodeGroup>
```markdown output/report.md ```markdown output/report.md
# Relatório Abrangente sobre a Ascensão e o Impacto dos Agentes de IA em 2025 # Comprehensive Report on the Rise and Impact of AI Agents in 2025
## 1. Introduction to AI Agents ## 1. Introduction to AI Agents
In 2025, Artificial Intelligence (AI) agents are at the forefront of innovation across various industries. As intelligent systems that can perform tasks typically requiring human cognition, AI agents are paving the way for significant advancements in operational efficiency, decision-making, and overall productivity within sectors like Human Resources (HR) and Finance. This report aims to detail the rise of AI agents, their frameworks, applications, and potential implications on the workforce. In 2025, Artificial Intelligence (AI) agents are at the forefront of innovation across various industries. As intelligent systems that can perform tasks typically requiring human cognition, AI agents are paving the way for significant advancements in operational efficiency, decision-making, and overall productivity within sectors like Human Resources (HR) and Finance. This report aims to detail the rise of AI agents, their frameworks, applications, and potential implications on the workforce.

View File

@@ -35,18 +35,78 @@ from crewai_tools import LinkupSearchTool
from crewai import Agent from crewai import Agent
import os import os
# Inicialize a ferramenta com sua chave de API # Initialize the tool with your API key
linkup_ferramenta = LinkupSearchTool(api_key=os.getenv("LINKUP_API_KEY")) linkup_tool = LinkupSearchTool(api_key=os.getenv("LINKUP_API_KEY"))
# Defina um agente que usa a ferramenta # Define an agent that uses the tool
@agent @agent
def pesquisador(self) -> Agent: def researcher(self) -> Agent:
''' '''
Este agente usa o LinkupSearchTool para recuperar informações contextuais This agent uses the LinkupSearchTool to retrieve contextual information
da API do Linkup. from the Linkup API.
''' '''
return Agent( return Agent(
config=self.agentes_config["pesquisador"], config=self.agents_config["researcher"],
tools=[linkup_ferramenta] tools=[linkup_tool]
) )
``` ```
## Parâmetros
O `LinkupSearchTool` aceita os seguintes parâmetros:
### Parâmetros do Construtor
- **api_key**: Obrigatório. Sua chave de API do Linkup.
### Parâmetros de Execução
- **query**: Obrigatório. O termo ou frase de busca.
- **depth**: Opcional. A profundidade da busca. O padrão é "standard".
- **output_type**: Opcional. O tipo de saída. O padrão é "searchResults".
## Uso Avançado
Você pode personalizar os parâmetros de busca para resultados mais específicos:
```python Code
# Perform a search with custom parameters
results = linkup_tool.run(
query="Women Nobel Prize Physics",
depth="deep",
output_type="searchResults"
)
```
## Formato de Retorno
A ferramenta retorna resultados no seguinte formato:
```json
{
"success": true,
"results": [
{
"name": "Result Title",
"url": "https://example.com/result",
"content": "Content of the result..."
},
// Additional results...
]
}
```
Se ocorrer um erro, a resposta será:
```json
{
"success": false,
"error": "Error message"
}
```
## Tratamento de Erros
A ferramenta lida com erros de API de forma amigável e fornece feedback estruturado. Se a requisição à API falhar, a ferramenta retornará um dicionário com `success: false` e uma mensagem de erro.
## Conclusão
O `LinkupSearchTool` oferece uma forma integrada de incorporar as capacidades de busca de informações contextuais do Linkup aos seus agentes CrewAI. Ao utilizar esta ferramenta, os agentes podem acessar informações relevantes e atualizadas para aprimorar sua tomada de decisão e execução de tarefas.

View File

@@ -27,13 +27,13 @@ dependencies = [
"openpyxl>=3.1.5", "openpyxl>=3.1.5",
"pyvis>=0.3.2", "pyvis>=0.3.2",
# Authentication and Security # Authentication and Security
"auth0-python>=4.7.1",
"python-dotenv>=1.0.0", "python-dotenv>=1.0.0",
"pyjwt>=2.9.0",
# Configuration and Utils # Configuration and Utils
"click>=8.1.7", "click>=8.1.7",
"appdirs>=1.4.4", "appdirs>=1.4.4",
"jsonref>=1.1.0", "jsonref>=1.1.0",
"json-repair==0.25.2", "json-repair>=0.25.2",
"uv>=0.4.25", "uv>=0.4.25",
"tomli-w>=1.1.0", "tomli-w>=1.1.0",
"tomli>=2.0.2", "tomli>=2.0.2",
@@ -47,11 +47,11 @@ Documentation = "https://docs.crewai.com"
Repository = "https://github.com/crewAIInc/crewAI" Repository = "https://github.com/crewAIInc/crewAI"
[project.optional-dependencies] [project.optional-dependencies]
tools = ["crewai-tools~=0.51.0"] tools = ["crewai-tools~=0.48.0"]
embeddings = [ embeddings = [
"tiktoken~=0.8.0" "tiktoken~=0.8.0"
] ]
agentops = ["agentops==0.3.18"] agentops = ["agentops>=0.3.0"]
pdfplumber = [ pdfplumber = [
"pdfplumber>=0.11.4", "pdfplumber>=0.11.4",
] ]
@@ -83,8 +83,6 @@ dev-dependencies = [
"pytest-recording>=0.13.2", "pytest-recording>=0.13.2",
"pytest-randomly>=3.16.0", "pytest-randomly>=3.16.0",
"pytest-timeout>=2.3.1", "pytest-timeout>=2.3.1",
"pytest-xdist>=3.6.1",
"pytest-split>=0.9.0",
] ]
[project.scripts] [project.scripts]
@@ -125,15 +123,3 @@ path = "src/crewai/__init__.py"
[build-system] [build-system]
requires = ["hatchling"] requires = ["hatchling"]
build-backend = "hatchling.build" build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
exclude = [
"docs/**",
"docs/",
]
[tool.hatch.build.targets.sdist]
exclude = [
"docs/**",
"docs/",
]

View File

@@ -28,19 +28,19 @@ _telemetry_submitted = False
def _track_install(): def _track_install():
"""Track package installation/first-use via Scarf analytics.""" """Track package installation/first-use via Scarf analytics."""
global _telemetry_submitted global _telemetry_submitted
if _telemetry_submitted or Telemetry._is_telemetry_disabled(): if _telemetry_submitted or Telemetry._is_telemetry_disabled():
return return
try: try:
pixel_url = "https://api.scarf.sh/v2/packages/CrewAI/crewai/docs/00f2dad1-8334-4a39-934e-003b2e1146db" pixel_url = "https://api.scarf.sh/v2/packages/CrewAI/crewai/docs/00f2dad1-8334-4a39-934e-003b2e1146db"
req = urllib.request.Request(pixel_url) req = urllib.request.Request(pixel_url)
req.add_header('User-Agent', f'CrewAI-Python/{__version__}') req.add_header('User-Agent', f'CrewAI-Python/{__version__}')
with urllib.request.urlopen(req, timeout=2): # nosec B310 with urllib.request.urlopen(req, timeout=2): # nosec B310
_telemetry_submitted = True _telemetry_submitted = True
except Exception: except Exception:
pass pass
@@ -54,7 +54,7 @@ def _track_install_async():
_track_install_async() _track_install_async()
__version__ = "0.141.0" __version__ = "0.134.0"
__all__ = [ __all__ = [
"Agent", "Agent",
"Crew", "Crew",

View File

@@ -2,7 +2,3 @@ ALGORITHMS = ["RS256"]
AUTH0_DOMAIN = "crewai.us.auth0.com" AUTH0_DOMAIN = "crewai.us.auth0.com"
AUTH0_CLIENT_ID = "DEVC5Fw6NlRoSzmDCcOhVq85EfLBjKa8" AUTH0_CLIENT_ID = "DEVC5Fw6NlRoSzmDCcOhVq85EfLBjKa8"
AUTH0_AUDIENCE = "https://crewai.us.auth0.com/api/v2/" AUTH0_AUDIENCE = "https://crewai.us.auth0.com/api/v2/"
WORKOS_DOMAIN = "login.crewai.com"
WORKOS_CLI_CONNECT_APP_ID = "client_01JYT06R59SP0NXYGD994NFXXX"
WORKOS_ENVIRONMENT_ID = "client_01JNJQWBJ4SPFN3SWJM5T7BDG8"

View File

@@ -5,72 +5,37 @@ from typing import Any, Dict
import requests import requests
from rich.console import Console from rich.console import Console
from .constants import ( from .constants import AUTH0_AUDIENCE, AUTH0_CLIENT_ID, AUTH0_DOMAIN
AUTH0_AUDIENCE, from .utils import TokenManager, validate_token
AUTH0_CLIENT_ID,
AUTH0_DOMAIN,
WORKOS_DOMAIN,
WORKOS_CLI_CONNECT_APP_ID,
WORKOS_ENVIRONMENT_ID,
)
from .utils import TokenManager, validate_jwt_token
from urllib.parse import quote
from crewai.cli.plus_api import PlusAPI
from crewai.cli.config import Settings
console = Console() console = Console()
class AuthenticationCommand: class AuthenticationCommand:
AUTH0_DEVICE_CODE_URL = f"https://{AUTH0_DOMAIN}/oauth/device/code" DEVICE_CODE_URL = f"https://{AUTH0_DOMAIN}/oauth/device/code"
AUTH0_TOKEN_URL = f"https://{AUTH0_DOMAIN}/oauth/token" TOKEN_URL = f"https://{AUTH0_DOMAIN}/oauth/token"
WORKOS_DEVICE_CODE_URL = f"https://{WORKOS_DOMAIN}/oauth2/device_authorization"
WORKOS_TOKEN_URL = f"https://{WORKOS_DOMAIN}/oauth2/token"
def __init__(self): def __init__(self):
self.token_manager = TokenManager() self.token_manager = TokenManager()
# TODO: WORKOS - This variable is temporary until migration to WorkOS is complete.
self.user_provider = "workos"
def login(self) -> None: def signup(self) -> None:
"""Sign up to CrewAI+""" """Sign up to CrewAI+"""
console.print("Signing Up to CrewAI+ \n", style="bold blue")
device_code_url = self.WORKOS_DEVICE_CODE_URL device_code_data = self._get_device_code()
token_url = self.WORKOS_TOKEN_URL
client_id = WORKOS_CLI_CONNECT_APP_ID
audience = None
console.print("Signing in to CrewAI Enterprise...\n", style="bold blue")
# TODO: WORKOS - Next line and conditional are temporary until migration to WorkOS is complete.
user_provider = self._determine_user_provider()
if user_provider == "auth0":
device_code_url = self.AUTH0_DEVICE_CODE_URL
token_url = self.AUTH0_TOKEN_URL
client_id = AUTH0_CLIENT_ID
audience = AUTH0_AUDIENCE
self.user_provider = "auth0"
# End of temporary code.
device_code_data = self._get_device_code(client_id, device_code_url, audience)
self._display_auth_instructions(device_code_data) self._display_auth_instructions(device_code_data)
return self._poll_for_token(device_code_data, client_id, token_url) return self._poll_for_token(device_code_data)
def _get_device_code( def _get_device_code(self) -> Dict[str, Any]:
self, client_id: str, device_code_url: str, audience: str | None = None
) -> Dict[str, Any]:
"""Get the device code to authenticate the user.""" """Get the device code to authenticate the user."""
device_code_payload = { device_code_payload = {
"client_id": client_id, "client_id": AUTH0_CLIENT_ID,
"scope": "openid", "scope": "openid",
"audience": audience, "audience": AUTH0_AUDIENCE,
} }
response = requests.post( response = requests.post(
url=device_code_url, data=device_code_payload, timeout=20 url=self.DEVICE_CODE_URL, data=device_code_payload, timeout=20
) )
response.raise_for_status() response.raise_for_status()
return response.json() return response.json()
@@ -81,33 +46,38 @@ class AuthenticationCommand:
console.print("2. Enter the following code: ", device_code_data["user_code"]) console.print("2. Enter the following code: ", device_code_data["user_code"])
webbrowser.open(device_code_data["verification_uri_complete"]) webbrowser.open(device_code_data["verification_uri_complete"])
def _poll_for_token( def _poll_for_token(self, device_code_data: Dict[str, Any]) -> None:
self, device_code_data: Dict[str, Any], client_id: str, token_poll_url: str """Poll the server for the token."""
) -> None:
"""Polls the server for the token until it is received, or max attempts are reached."""
token_payload = { token_payload = {
"grant_type": "urn:ietf:params:oauth:grant-type:device_code", "grant_type": "urn:ietf:params:oauth:grant-type:device_code",
"device_code": device_code_data["device_code"], "device_code": device_code_data["device_code"],
"client_id": client_id, "client_id": AUTH0_CLIENT_ID,
} }
console.print("\nWaiting for authentication... ", style="bold blue", end="")
attempts = 0 attempts = 0
while True and attempts < 10: while True and attempts < 5:
response = requests.post(token_poll_url, data=token_payload, timeout=30) response = requests.post(self.TOKEN_URL, data=token_payload, timeout=30)
token_data = response.json() token_data = response.json()
if response.status_code == 200: if response.status_code == 200:
self._validate_and_save_token(token_data) validate_token(token_data["id_token"])
expires_in = 360000 # Token expiration time in seconds
self.token_manager.save_tokens(token_data["access_token"], expires_in)
console.print( try:
"Success!", from crewai.cli.tools.main import ToolCommand
style="bold green", ToolCommand().login()
) except Exception:
console.print(
self._login_to_tool_repository() "\n[bold yellow]Warning:[/bold yellow] Authentication with the Tool Repository failed.",
style="yellow",
)
console.print(
"Other features will work normally, but you may experience limitations "
"with downloading and publishing tools."
"\nRun [bold]crewai login[/bold] to try logging in again.\n",
style="yellow",
)
console.print( console.print(
"\n[bold green]Welcome to CrewAI Enterprise![/bold green]\n" "\n[bold green]Welcome to CrewAI Enterprise![/bold green]\n"
@@ -123,88 +93,3 @@ class AuthenticationCommand:
console.print( console.print(
"Timeout: Failed to get the token. Please try again.", style="bold red" "Timeout: Failed to get the token. Please try again.", style="bold red"
) )
def _validate_and_save_token(self, token_data: Dict[str, Any]) -> None:
"""Validates the JWT token and saves the token to the token manager."""
jwt_token = token_data["access_token"]
jwt_token_data = {
"jwt_token": jwt_token,
"jwks_url": f"https://{WORKOS_DOMAIN}/oauth2/jwks",
"issuer": f"https://{WORKOS_DOMAIN}",
"audience": WORKOS_ENVIRONMENT_ID,
}
# TODO: WORKOS - The following conditional is temporary until migration to WorkOS is complete.
if self.user_provider == "auth0":
jwt_token_data["jwks_url"] = f"https://{AUTH0_DOMAIN}/.well-known/jwks.json"
jwt_token_data["issuer"] = f"https://{AUTH0_DOMAIN}/"
jwt_token_data["audience"] = AUTH0_AUDIENCE
decoded_token = validate_jwt_token(**jwt_token_data)
expires_at = decoded_token.get("exp", 0)
self.token_manager.save_tokens(jwt_token, expires_at)
def _login_to_tool_repository(self) -> None:
"""Login to the tool repository."""
from crewai.cli.tools.main import ToolCommand
try:
console.print(
"Now logging you in to the Tool Repository... ",
style="bold blue",
end="",
)
ToolCommand().login()
console.print(
"Success!\n",
style="bold green",
)
settings = Settings()
console.print(
f"You are authenticated to the tool repository as [bold cyan]'{settings.org_name}'[/bold cyan] ({settings.org_uuid})",
style="green",
)
except Exception:
console.print(
"\n[bold yellow]Warning:[/bold yellow] Authentication with the Tool Repository failed.",
style="yellow",
)
console.print(
"Other features will work normally, but you may experience limitations "
"with downloading and publishing tools."
"\nRun [bold]crewai login[/bold] to try logging in again.\n",
style="yellow",
)
# TODO: WORKOS - This method is temporary until migration to WorkOS is complete.
def _determine_user_provider(self) -> str:
"""Determine which provider to use for authentication."""
console.print(
"Enter your CrewAI Enterprise account email: ", style="bold blue", end=""
)
email = input()
email_encoded = quote(email)
# It's not correct to call this method directly, but it's temporary until migration is complete.
response = PlusAPI("")._make_request(
"GET", f"/crewai_plus/api/v1/me/provider?email={email_encoded}"
)
if response.status_code == 200:
if response.json().get("provider") == "auth0":
return "auth0"
else:
return "workos"
else:
console.print(
"Error: Failed to authenticate with crewai enterprise. Ensure that you are using the latest crewai version and please try again. If the problem persists, contact support@crewai.com.",
style="red",
)
raise SystemExit

View File

@@ -1,72 +1,32 @@
import json import json
import os import os
import sys import sys
from datetime import datetime from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import jwt
from jwt import PyJWKClient from auth0.authentication.token_verifier import (
AsymmetricSignatureVerifier,
TokenVerifier,
)
from cryptography.fernet import Fernet from cryptography.fernet import Fernet
from .constants import AUTH0_CLIENT_ID, AUTH0_DOMAIN
def validate_jwt_token(
jwt_token: str, jwks_url: str, issuer: str, audience: str def validate_token(id_token: str) -> None:
) -> dict:
""" """
Verify the token's signature and claims using PyJWT. Verify the token and its precedence
:param jwt_token: The JWT (JWS) string to validate.
:param jwks_url: The URL of the JWKS endpoint. :param id_token:
:param issuer: The expected issuer of the token.
:param audience: The expected audience of the token.
:return: The decoded token.
:raises Exception: If the token is invalid for any reason (e.g., signature mismatch,
expired, incorrect issuer/audience, JWKS fetching error,
missing required claims).
""" """
jwks_url = f"https://{AUTH0_DOMAIN}/.well-known/jwks.json"
decoded_token = None issuer = f"https://{AUTH0_DOMAIN}/"
signature_verifier = AsymmetricSignatureVerifier(jwks_url)
try: token_verifier = TokenVerifier(
jwk_client = PyJWKClient(jwks_url) signature_verifier=signature_verifier, issuer=issuer, audience=AUTH0_CLIENT_ID
signing_key = jwk_client.get_signing_key_from_jwt(jwt_token) )
token_verifier.verify(id_token)
_unverified_decoded_token = jwt.decode(
jwt_token, options={"verify_signature": False}
)
decoded_token = jwt.decode(
jwt_token,
signing_key.key,
algorithms=["RS256"],
audience=audience,
issuer=issuer,
options={
"verify_signature": True,
"verify_exp": True,
"verify_nbf": True,
"verify_iat": True,
"require": ["exp", "iat", "iss", "aud", "sub"],
},
)
return decoded_token
except jwt.ExpiredSignatureError:
raise Exception("Token has expired.")
except jwt.InvalidAudienceError:
actual_audience = _unverified_decoded_token.get("aud", "[no audience found]")
raise Exception(
f"Invalid token audience. Got: '{actual_audience}'. Expected: '{audience}'"
)
except jwt.InvalidIssuerError:
actual_issuer = _unverified_decoded_token.get("iss", "[no issuer found]")
raise Exception(
f"Invalid token issuer. Got: '{actual_issuer}'. Expected: '{issuer}'"
)
except jwt.MissingRequiredClaimError as e:
raise Exception(f"Token is missing required claims: {str(e)}")
except jwt.exceptions.PyJWKClientError as e:
raise Exception(f"JWKS or key processing error: {str(e)}")
except jwt.InvalidTokenError as e:
raise Exception(f"Invalid token: {str(e)}")
class TokenManager: class TokenManager:
@@ -96,14 +56,14 @@ class TokenManager:
self.save_secure_file(key_filename, new_key) self.save_secure_file(key_filename, new_key)
return new_key return new_key
def save_tokens(self, access_token: str, expires_at: int) -> None: def save_tokens(self, access_token: str, expires_in: int) -> None:
""" """
Save the access token and its expiration time. Save the access token and its expiration time.
:param access_token: The access token to save. :param access_token: The access token to save.
:param expires_at: The UNIX timestamp of the expiration time. :param expires_in: The expiration time of the access token in seconds.
""" """
expiration_time = datetime.fromtimestamp(expires_at) expiration_time = datetime.now() + timedelta(seconds=expires_in)
data = { data = {
"access_token": access_token, "access_token": access_token,
"expiration": expiration_time.isoformat(), "expiration": expiration_time.isoformat(),

View File

@@ -2,7 +2,7 @@ from importlib.metadata import version as get_version
from typing import Optional from typing import Optional
import click import click
from crewai.cli.config import Settings
from crewai.cli.add_crew_to_flow import add_crew_to_flow from crewai.cli.add_crew_to_flow import add_crew_to_flow
from crewai.cli.create_crew import create_crew from crewai.cli.create_crew import create_crew
from crewai.cli.create_flow import create_flow from crewai.cli.create_flow import create_flow
@@ -138,12 +138,8 @@ def log_tasks_outputs() -> None:
@click.option("-s", "--short", is_flag=True, help="Reset SHORT TERM memory") @click.option("-s", "--short", is_flag=True, help="Reset SHORT TERM memory")
@click.option("-e", "--entities", is_flag=True, help="Reset ENTITIES memory") @click.option("-e", "--entities", is_flag=True, help="Reset ENTITIES memory")
@click.option("-kn", "--knowledge", is_flag=True, help="Reset KNOWLEDGE storage") @click.option("-kn", "--knowledge", is_flag=True, help="Reset KNOWLEDGE storage")
@click.option( @click.option("-akn", "--agent-knowledge", is_flag=True, help="Reset AGENT KNOWLEDGE storage")
"-akn", "--agent-knowledge", is_flag=True, help="Reset AGENT KNOWLEDGE storage" @click.option("-k","--kickoff-outputs",is_flag=True,help="Reset LATEST KICKOFF TASK OUTPUTS")
)
@click.option(
"-k", "--kickoff-outputs", is_flag=True, help="Reset LATEST KICKOFF TASK OUTPUTS"
)
@click.option("-a", "--all", is_flag=True, help="Reset ALL memories") @click.option("-a", "--all", is_flag=True, help="Reset ALL memories")
def reset_memories( def reset_memories(
long: bool, long: bool,
@@ -158,23 +154,13 @@ def reset_memories(
Reset the crew memories (long, short, entity, latest_crew_kickoff_ouputs, knowledge, agent_knowledge). This will delete all the data saved. Reset the crew memories (long, short, entity, latest_crew_kickoff_ouputs, knowledge, agent_knowledge). This will delete all the data saved.
""" """
try: try:
memory_types = [ memory_types = [long, short, entities, knowledge, agent_knowledge, kickoff_outputs, all]
long,
short,
entities,
knowledge,
agent_knowledge,
kickoff_outputs,
all,
]
if not any(memory_types): if not any(memory_types):
click.echo( click.echo(
"Please specify at least one memory type to reset using the appropriate flags." "Please specify at least one memory type to reset using the appropriate flags."
) )
return return
reset_memories_command( reset_memories_command(long, short, entities, knowledge, agent_knowledge, kickoff_outputs, all)
long, short, entities, knowledge, agent_knowledge, kickoff_outputs, all
)
except Exception as e: except Exception as e:
click.echo(f"An error occurred while resetting memories: {e}", err=True) click.echo(f"An error occurred while resetting memories: {e}", err=True)
@@ -224,11 +210,16 @@ def update():
update_crew() update_crew()
@crewai.command()
def signup():
"""Sign Up/Login to CrewAI+."""
AuthenticationCommand().signup()
@crewai.command() @crewai.command()
def login(): def login():
"""Sign Up/Login to CrewAI Enterprise.""" """Sign Up/Login to CrewAI+."""
Settings().clear() AuthenticationCommand().signup()
AuthenticationCommand().login()
# DEPLOY CREWAI+ COMMANDS # DEPLOY CREWAI+ COMMANDS

View File

@@ -37,10 +37,6 @@ class Settings(BaseModel):
merged_data = {**file_data, **data} merged_data = {**file_data, **data}
super().__init__(config_path=config_path, **merged_data) super().__init__(config_path=config_path, **merged_data)
def clear(self) -> None:
"""Clear all settings"""
self.config_path.unlink(missing_ok=True)
def dump(self) -> None: def dump(self) -> None:
"""Save current settings to settings.json""" """Save current settings to settings.json"""
if self.config_path.is_file(): if self.config_path.is_file():

View File

@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
authors = [{ name = "Your Name", email = "you@example.com" }] authors = [{ name = "Your Name", email = "you@example.com" }]
requires-python = ">=3.10,<3.14" requires-python = ">=3.10,<3.14"
dependencies = [ dependencies = [
"crewai[tools]>=0.141.0,<1.0.0" "crewai[tools]>=0.134.0,<1.0.0"
] ]
[project.scripts] [project.scripts]

View File

@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
authors = [{ name = "Your Name", email = "you@example.com" }] authors = [{ name = "Your Name", email = "you@example.com" }]
requires-python = ">=3.10,<3.14" requires-python = ">=3.10,<3.14"
dependencies = [ dependencies = [
"crewai[tools]>=0.141.0,<1.0.0", "crewai[tools]>=0.134.0,<1.0.0",
] ]
[project.scripts] [project.scripts]

View File

@@ -5,7 +5,7 @@ description = "Power up your crews with {{folder_name}}"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10,<3.14" requires-python = ">=3.10,<3.14"
dependencies = [ dependencies = [
"crewai[tools]>=0.141.0" "crewai[tools]>=0.134.0"
] ]
[tool.crewai] [tool.crewai]

View File

@@ -156,7 +156,7 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
console.print(f"Successfully installed {handle}", style="bold green") console.print(f"Successfully installed {handle}", style="bold green")
def login(self) -> None: def login(self):
login_response = self.plus_api_client.login_to_tool_repository() login_response = self.plus_api_client.login_to_tool_repository()
if login_response.status_code != 200: if login_response.status_code != 200:
@@ -175,10 +175,18 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
settings.tool_repository_password = login_response_json["credential"][ settings.tool_repository_password = login_response_json["credential"][
"password" "password"
] ]
settings.org_uuid = login_response_json["current_organization"]["uuid"] settings.org_uuid = login_response_json["current_organization"][
settings.org_name = login_response_json["current_organization"]["name"] "uuid"
]
settings.org_name = login_response_json["current_organization"][
"name"
]
settings.dump() settings.dump()
console.print(
f"Successfully authenticated to the tool repository as {settings.org_name} ({settings.org_uuid}).", style="bold green"
)
def _add_package(self, tool_details: dict[str, Any]): def _add_package(self, tool_details: dict[str, Any]):
is_from_pypi = tool_details.get("source", None) == "pypi" is_from_pypi = tool_details.get("source", None) == "pypi"
tool_handle = tool_details["handle"] tool_handle = tool_details["handle"]
@@ -235,15 +243,9 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
return env return env
def _print_current_organization(self) -> None: def _print_current_organization(self):
settings = Settings() settings = Settings()
if settings.org_uuid: if settings.org_uuid:
console.print( console.print(f"Current organization: {settings.org_name} ({settings.org_uuid})", style="bold blue")
f"Current organization: {settings.org_name} ({settings.org_uuid})",
style="bold blue",
)
else: else:
console.print( console.print("No organization currently set. We recommend setting one before using: `crewai org switch <org_id>` command.", style="yellow")
"No organization currently set. We recommend setting one before using: `crewai org switch <org_id>` command.",
style="yellow",
)

View File

@@ -18,11 +18,6 @@ from typing import (
cast, cast,
) )
from opentelemetry import baggage
from opentelemetry.context import attach, detach
from crewai.utilities.crew.models import CrewContext
from pydantic import ( from pydantic import (
UUID4, UUID4,
BaseModel, BaseModel,
@@ -621,11 +616,6 @@ class Crew(FlowTrackable, BaseModel):
self, self,
inputs: Optional[Dict[str, Any]] = None, inputs: Optional[Dict[str, Any]] = None,
) -> CrewOutput: ) -> CrewOutput:
ctx = baggage.set_baggage(
"crew_context", CrewContext(id=str(self.id), key=self.key)
)
token = attach(ctx)
try: try:
for before_callback in self.before_kickoff_callbacks: for before_callback in self.before_kickoff_callbacks:
if inputs is None: if inputs is None:
@@ -686,8 +676,6 @@ class Crew(FlowTrackable, BaseModel):
CrewKickoffFailedEvent(error=str(e), crew_name=self.name or "crew"), CrewKickoffFailedEvent(error=str(e), crew_name=self.name or "crew"),
) )
raise raise
finally:
detach(token)
def kickoff_for_each(self, inputs: List[Dict[str, Any]]) -> List[CrewOutput]: def kickoff_for_each(self, inputs: List[Dict[str, Any]]) -> List[CrewOutput]:
"""Executes the Crew's workflow for each input in the list and aggregates results.""" """Executes the Crew's workflow for each input in the list and aggregates results."""
@@ -1313,7 +1301,6 @@ class Crew(FlowTrackable, BaseModel):
n_iterations: int, n_iterations: int,
eval_llm: Union[str, InstanceOf[BaseLLM]], eval_llm: Union[str, InstanceOf[BaseLLM]],
inputs: Optional[Dict[str, Any]] = None, inputs: Optional[Dict[str, Any]] = None,
include_agent_eval: Optional[bool] = False
) -> None: ) -> None:
"""Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.""" """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures."""
try: try:
@@ -1332,29 +1319,13 @@ class Crew(FlowTrackable, BaseModel):
), ),
) )
test_crew = self.copy() test_crew = self.copy()
# TODO: Refator to use a single Evaluator Manage class
evaluator = CrewEvaluator(test_crew, llm_instance) evaluator = CrewEvaluator(test_crew, llm_instance)
if include_agent_eval:
from crewai.evaluation import create_default_evaluator
agent_evaluator = create_default_evaluator(crew=test_crew)
for i in range(1, n_iterations + 1): for i in range(1, n_iterations + 1):
evaluator.set_iteration(i) evaluator.set_iteration(i)
if include_agent_eval:
agent_evaluator.set_iteration(i)
test_crew.kickoff(inputs=inputs) test_crew.kickoff(inputs=inputs)
# TODO: Refactor to use ListenerEvents instead of trigger each iteration manually
if include_agent_eval:
agent_evaluator.evaluate_current_iteration()
evaluator.print_crew_evaluation_result() evaluator.print_crew_evaluation_result()
if include_agent_eval:
agent_evaluator.get_agent_evaluation(include_evaluation_feedback=True)
crewai_event_bus.emit( crewai_event_bus.emit(
self, self,

View File

@@ -1,53 +0,0 @@
from crewai.evaluation.base_evaluator import (
BaseEvaluator,
EvaluationScore,
MetricCategory,
AgentEvaluationResult
)
from crewai.evaluation.metrics.semantic_quality_metrics import (
SemanticQualityEvaluator
)
from crewai.evaluation.metrics.goal_metrics import (
GoalAlignmentEvaluator
)
from crewai.evaluation.metrics.reasoning_metrics import (
ReasoningEfficiencyEvaluator
)
from crewai.evaluation.metrics.tools_metrics import (
ToolSelectionEvaluator,
ParameterExtractionEvaluator,
ToolInvocationEvaluator
)
from crewai.evaluation.evaluation_listener import (
EvaluationTraceCallback,
create_evaluation_callbacks
)
from crewai.evaluation.agent_evaluator import (
AgentEvaluator,
create_default_evaluator
)
__all__ = [
"BaseEvaluator",
"EvaluationScore",
"MetricCategory",
"AgentEvaluationResult",
"SemanticQualityEvaluator",
"GoalAlignmentEvaluator",
"ReasoningEfficiencyEvaluator",
"ToolSelectionEvaluator",
"ParameterExtractionEvaluator",
"ToolInvocationEvaluator",
"EvaluationTraceCallback",
"create_evaluation_callbacks",
"AgentEvaluator",
"create_default_evaluator"
]

View File

@@ -1,178 +0,0 @@
from crewai.evaluation.base_evaluator import AgentEvaluationResult, AggregationStrategy
from crewai.agent import Agent
from crewai.task import Task
from crewai.evaluation.evaluation_display import EvaluationDisplayFormatter
from typing import Any, Dict
from collections import defaultdict
from crewai.evaluation import BaseEvaluator, create_evaluation_callbacks
from collections.abc import Sequence
from crewai.crew import Crew
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
class AgentEvaluator:
def __init__(
self,
evaluators: Sequence[BaseEvaluator] | None = None,
crew: Crew | None = None,
):
self.crew: Crew | None = crew
self.evaluators: Sequence[BaseEvaluator] | None = evaluators
self.agent_evaluators: dict[str, Sequence[BaseEvaluator] | None] = {}
if crew is not None:
assert crew and crew.agents is not None
for agent in crew.agents:
self.agent_evaluators[str(agent.id)] = self.evaluators
self.callback = create_evaluation_callbacks()
self.console_formatter = ConsoleFormatter()
self.display_formatter = EvaluationDisplayFormatter()
self.iteration = 1
self.iterations_results: dict[int, dict[str, list[AgentEvaluationResult]]] = {}
def set_iteration(self, iteration: int) -> None:
self.iteration = iteration
def evaluate_current_iteration(self) -> dict[str, list[AgentEvaluationResult]]:
if not self.crew:
raise ValueError("Cannot evaluate: no crew was provided to the evaluator.")
if not self.callback:
raise ValueError("Cannot evaluate: no callback was set. Use set_callback() method first.")
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
evaluation_results: defaultdict[str, list[AgentEvaluationResult]] = defaultdict(list)
total_evals = 0
for agent in self.crew.agents:
for task in self.crew.tasks:
if task.agent and task.agent.id == agent.id and self.agent_evaluators.get(str(agent.id)):
total_evals += 1
with Progress(
SpinnerColumn(),
TextColumn("[bold blue]{task.description}[/bold blue]"),
BarColumn(),
TextColumn("{task.percentage:.0f}% completed"),
console=self.console_formatter.console
) as progress:
eval_task = progress.add_task(f"Evaluating agents (iteration {self.iteration})...", total=total_evals)
for agent in self.crew.agents:
evaluator = self.agent_evaluators.get(str(agent.id))
if not evaluator:
continue
for task in self.crew.tasks:
if task.agent and str(task.agent.id) != str(agent.id):
continue
trace = self.callback.get_trace(str(agent.id), str(task.id))
if not trace:
self.console_formatter.print(f"[yellow]Warning: No trace found for agent {agent.role} on task {task.description[:30]}...[/yellow]")
progress.update(eval_task, advance=1)
continue
with crewai_event_bus.scoped_handlers():
result = self.evaluate(
agent=agent,
task=task,
execution_trace=trace,
final_output=task.output
)
evaluation_results[agent.role].append(result)
progress.update(eval_task, advance=1)
self.iterations_results[self.iteration] = evaluation_results
return evaluation_results
def get_evaluation_results(self):
if self.iteration in self.iterations_results:
return self.iterations_results[self.iteration]
return self.evaluate_current_iteration()
def display_results_with_iterations(self):
self.display_formatter.display_summary_results(self.iterations_results)
def get_agent_evaluation(self, strategy: AggregationStrategy = AggregationStrategy.SIMPLE_AVERAGE, include_evaluation_feedback: bool = False):
agent_results = {}
with crewai_event_bus.scoped_handlers():
task_results = self.get_evaluation_results()
for agent_role, results in task_results.items():
if not results:
continue
agent_id = results[0].agent_id
aggregated_result = self.display_formatter._aggregate_agent_results(
agent_id=agent_id,
agent_role=agent_role,
results=results,
strategy=strategy
)
agent_results[agent_role] = aggregated_result
if self.iteration == max(self.iterations_results.keys()):
self.display_results_with_iterations()
if include_evaluation_feedback:
self.display_evaluation_with_feedback()
return agent_results
def display_evaluation_with_feedback(self):
self.display_formatter.display_evaluation_with_feedback(self.iterations_results)
def evaluate(
self,
agent: Agent,
task: Task,
execution_trace: Dict[str, Any],
final_output: Any
) -> AgentEvaluationResult:
result = AgentEvaluationResult(
agent_id=str(agent.id),
task_id=str(task.id)
)
assert self.evaluators is not None
for evaluator in self.evaluators:
try:
score = evaluator.evaluate(
agent=agent,
task=task,
execution_trace=execution_trace,
final_output=final_output
)
result.metrics[evaluator.metric_category] = score
except Exception as e:
self.console_formatter.print(f"Error in {evaluator.metric_category.value} evaluator: {str(e)}")
return result
def create_default_evaluator(crew, llm=None):
from crewai.evaluation import (
GoalAlignmentEvaluator,
SemanticQualityEvaluator,
ToolSelectionEvaluator,
ParameterExtractionEvaluator,
ToolInvocationEvaluator,
ReasoningEfficiencyEvaluator
)
evaluators = [
GoalAlignmentEvaluator(llm=llm),
SemanticQualityEvaluator(llm=llm),
ToolSelectionEvaluator(llm=llm),
ParameterExtractionEvaluator(llm=llm),
ToolInvocationEvaluator(llm=llm),
ReasoningEfficiencyEvaluator(llm=llm),
]
return AgentEvaluator(evaluators=evaluators, crew=crew)

View File

@@ -1,125 +0,0 @@
import abc
import enum
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
from crewai.agent import Agent
from crewai.task import Task
from crewai.llm import BaseLLM
from crewai.utilities.llm_utils import create_llm
class MetricCategory(enum.Enum):
GOAL_ALIGNMENT = "goal_alignment"
SEMANTIC_QUALITY = "semantic_quality"
REASONING_EFFICIENCY = "reasoning_efficiency"
TOOL_SELECTION = "tool_selection"
PARAMETER_EXTRACTION = "parameter_extraction"
TOOL_INVOCATION = "tool_invocation"
def title(self):
return self.value.replace('_', ' ').title()
class EvaluationScore(BaseModel):
score: float | None = Field(
default=5.0,
description="Numeric score from 0-10 where 0 is worst and 10 is best, None if not applicable",
ge=0.0,
le=10.0
)
feedback: str = Field(
default="",
description="Detailed feedback explaining the evaluation score"
)
raw_response: str | None = Field(
default=None,
description="Raw response from the evaluator (e.g., LLM)"
)
def __str__(self) -> str:
if self.score is None:
return f"Score: N/A - {self.feedback}"
return f"Score: {self.score:.1f}/10 - {self.feedback}"
class BaseEvaluator(abc.ABC):
def __init__(self, llm: BaseLLM | None = None):
self.llm: BaseLLM | None = create_llm(llm)
@property
@abc.abstractmethod
def metric_category(self) -> MetricCategory:
pass
@abc.abstractmethod
def evaluate(
self,
agent: Agent,
task: Task,
execution_trace: Dict[str, Any],
final_output: Any,
) -> EvaluationScore:
pass
class AgentEvaluationResult(BaseModel):
agent_id: str = Field(description="ID of the evaluated agent")
task_id: str = Field(description="ID of the task that was executed")
metrics: Dict[MetricCategory, EvaluationScore] = Field(
default_factory=dict,
description="Evaluation scores for each metric category"
)
class AggregationStrategy(Enum):
SIMPLE_AVERAGE = "simple_average" # Equal weight to all tasks
WEIGHTED_BY_COMPLEXITY = "weighted_by_complexity" # Weight by task complexity
BEST_PERFORMANCE = "best_performance" # Use best scores across tasks
WORST_PERFORMANCE = "worst_performance" # Use worst scores across tasks
class AgentAggregatedEvaluationResult(BaseModel):
agent_id: str = Field(
default="",
description="ID of the agent"
)
agent_role: str = Field(
default="",
description="Role of the agent"
)
task_count: int = Field(
default=0,
description="Number of tasks included in this aggregation"
)
aggregation_strategy: AggregationStrategy = Field(
default=AggregationStrategy.SIMPLE_AVERAGE,
description="Strategy used for aggregation"
)
metrics: Dict[MetricCategory, EvaluationScore] = Field(
default_factory=dict,
description="Aggregated metrics across all tasks"
)
task_results: List[str] = Field(
default_factory=list,
description="IDs of tasks included in this aggregation"
)
overall_score: Optional[float] = Field(
default=None,
description="Overall score for this agent"
)
def __str__(self) -> str:
result = f"Agent Evaluation: {self.agent_role}\n"
result += f"Strategy: {self.aggregation_strategy.value}\n"
result += f"Tasks evaluated: {self.task_count}\n"
for category, score in self.metrics.items():
result += f"\n\n- {category.value.upper()}: {score.score}/10\n"
if score.feedback:
detailed_feedback = "\n ".join(score.feedback.split('\n'))
result += f" {detailed_feedback}\n"
return result

View File

@@ -1,341 +0,0 @@
from collections import defaultdict
from typing import Dict, Any, List
from rich.table import Table
from rich.box import HEAVY_EDGE, ROUNDED
from collections.abc import Sequence
from crewai.evaluation.base_evaluator import AgentAggregatedEvaluationResult, AggregationStrategy, AgentEvaluationResult, MetricCategory
from crewai.evaluation import EvaluationScore
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
from crewai.utilities.llm_utils import create_llm
class EvaluationDisplayFormatter:
def __init__(self):
self.console_formatter = ConsoleFormatter()
def display_evaluation_with_feedback(self, iterations_results: Dict[int, Dict[str, List[Any]]]):
if not iterations_results:
self.console_formatter.print("[yellow]No evaluation results to display[/yellow]")
return
# Get all agent roles across all iterations
all_agent_roles: set[str] = set()
for iter_results in iterations_results.values():
all_agent_roles.update(iter_results.keys())
for agent_role in sorted(all_agent_roles):
self.console_formatter.print(f"\n[bold cyan]Agent: {agent_role}[/bold cyan]")
# Process each iteration
for iter_num, results in sorted(iterations_results.items()):
if agent_role not in results or not results[agent_role]:
continue
agent_results = results[agent_role]
agent_id = agent_results[0].agent_id
# Aggregate results for this agent in this iteration
aggregated_result = self._aggregate_agent_results(
agent_id=agent_id,
agent_role=agent_role,
results=agent_results,
)
# Display iteration header
self.console_formatter.print(f"\n[bold]Iteration {iter_num}[/bold]")
# Create table for this iteration
table = Table(box=ROUNDED)
table.add_column("Metric", style="cyan")
table.add_column("Score (1-10)", justify="center")
table.add_column("Feedback", style="green")
# Add metrics to table
if aggregated_result.metrics:
for metric, evaluation_score in aggregated_result.metrics.items():
score = evaluation_score.score
if isinstance(score, (int, float)):
if score >= 8.0:
score_text = f"[green]{score:.1f}[/green]"
elif score >= 6.0:
score_text = f"[cyan]{score:.1f}[/cyan]"
elif score >= 4.0:
score_text = f"[yellow]{score:.1f}[/yellow]"
else:
score_text = f"[red]{score:.1f}[/red]"
else:
score_text = "[dim]N/A[/dim]"
table.add_section()
table.add_row(
metric.title(),
score_text,
evaluation_score.feedback or ""
)
if aggregated_result.overall_score is not None:
overall_score = aggregated_result.overall_score
if overall_score >= 8.0:
overall_color = "green"
elif overall_score >= 6.0:
overall_color = "cyan"
elif overall_score >= 4.0:
overall_color = "yellow"
else:
overall_color = "red"
table.add_section()
table.add_row(
"Overall Score",
f"[{overall_color}]{overall_score:.1f}[/]",
"Overall agent evaluation score"
)
# Print the table for this iteration
self.console_formatter.print(table)
def display_summary_results(self, iterations_results: Dict[int, Dict[str, List[AgentAggregatedEvaluationResult]]]):
if not iterations_results:
self.console_formatter.print("[yellow]No evaluation results to display[/yellow]")
return
self.console_formatter.print("\n")
table = Table(title="Agent Performance Scores \n (1-10 Higher is better)", box=HEAVY_EDGE)
table.add_column("Agent/Metric", style="cyan")
for iter_num in sorted(iterations_results.keys()):
run_label = f"Run {iter_num}"
table.add_column(run_label, justify="center")
table.add_column("Avg. Total", justify="center")
all_agent_roles: set[str] = set()
for results in iterations_results.values():
all_agent_roles.update(results.keys())
for agent_role in sorted(all_agent_roles):
agent_scores_by_iteration = {}
agent_metrics_by_iteration = {}
for iter_num, results in sorted(iterations_results.items()):
if agent_role not in results or not results[agent_role]:
continue
agent_results = results[agent_role]
agent_id = agent_results[0].agent_id
aggregated_result = self._aggregate_agent_results(
agent_id=agent_id,
agent_role=agent_role,
results=agent_results,
strategy=AggregationStrategy.SIMPLE_AVERAGE
)
valid_scores = [score.score for score in aggregated_result.metrics.values()
if score.score is not None]
if valid_scores:
avg_score = sum(valid_scores) / len(valid_scores)
agent_scores_by_iteration[iter_num] = avg_score
agent_metrics_by_iteration[iter_num] = aggregated_result.metrics
if not agent_scores_by_iteration:
continue
avg_across_iterations = sum(agent_scores_by_iteration.values()) / len(agent_scores_by_iteration)
row = [f"[bold]{agent_role}[/bold]"]
for iter_num in sorted(iterations_results.keys()):
if iter_num in agent_scores_by_iteration:
score = agent_scores_by_iteration[iter_num]
if score >= 8.0:
color = "green"
elif score >= 6.0:
color = "cyan"
elif score >= 4.0:
color = "yellow"
else:
color = "red"
row.append(f"[bold {color}]{score:.1f}[/]")
else:
row.append("-")
if avg_across_iterations >= 8.0:
color = "green"
elif avg_across_iterations >= 6.0:
color = "cyan"
elif avg_across_iterations >= 4.0:
color = "yellow"
else:
color = "red"
row.append(f"[bold {color}]{avg_across_iterations:.1f}[/]")
table.add_row(*row)
all_metrics: set[Any] = set()
for metrics in agent_metrics_by_iteration.values():
all_metrics.update(metrics.keys())
for metric in sorted(all_metrics, key=lambda x: x.value):
metric_scores = []
row = [f" - {metric.title()}"]
for iter_num in sorted(iterations_results.keys()):
if (iter_num in agent_metrics_by_iteration and
metric in agent_metrics_by_iteration[iter_num]):
metric_score = agent_metrics_by_iteration[iter_num][metric].score
if metric_score is not None:
metric_scores.append(metric_score)
if metric_score >= 8.0:
color = "green"
elif metric_score >= 6.0:
color = "cyan"
elif metric_score >= 4.0:
color = "yellow"
else:
color = "red"
row.append(f"[{color}]{metric_score:.1f}[/]")
else:
row.append("[dim]N/A[/dim]")
else:
row.append("-")
if metric_scores:
avg = sum(metric_scores) / len(metric_scores)
if avg >= 8.0:
color = "green"
elif avg >= 6.0:
color = "cyan"
elif avg >= 4.0:
color = "yellow"
else:
color = "red"
row.append(f"[{color}]{avg:.1f}[/]")
else:
row.append("-")
table.add_row(*row)
table.add_row(*[""] * (len(sorted(iterations_results.keys())) + 2))
self.console_formatter.print(table)
self.console_formatter.print("\n")
def _aggregate_agent_results(
self,
agent_id: str,
agent_role: str,
results: Sequence[AgentEvaluationResult],
strategy: AggregationStrategy = AggregationStrategy.SIMPLE_AVERAGE,
) -> AgentAggregatedEvaluationResult:
metrics_by_category: dict[MetricCategory, list[EvaluationScore]] = defaultdict(list)
for result in results:
for metric_name, evaluation_score in result.metrics.items():
metrics_by_category[metric_name].append(evaluation_score)
aggregated_metrics: dict[MetricCategory, EvaluationScore] = {}
for category, scores in metrics_by_category.items():
valid_scores = [s.score for s in scores if s.score is not None]
avg_score = sum(valid_scores) / len(valid_scores) if valid_scores else None
feedbacks = [s.feedback for s in scores if s.feedback]
feedback_summary = None
if feedbacks:
if len(feedbacks) > 1:
# Use the summarization method for multiple feedbacks
feedback_summary = self._summarize_feedbacks(
agent_role=agent_role,
metric=category.title(),
feedbacks=feedbacks,
scores=[s.score for s in scores],
strategy=strategy
)
else:
feedback_summary = feedbacks[0]
aggregated_metrics[category] = EvaluationScore(
score=avg_score,
feedback=feedback_summary
)
overall_score = None
if aggregated_metrics:
valid_scores = [m.score for m in aggregated_metrics.values() if m.score is not None]
if valid_scores:
overall_score = sum(valid_scores) / len(valid_scores)
return AgentAggregatedEvaluationResult(
agent_id=agent_id,
agent_role=agent_role,
metrics=aggregated_metrics,
overall_score=overall_score,
task_count=len(results),
aggregation_strategy=strategy
)
def _summarize_feedbacks(
self,
agent_role: str,
metric: str,
feedbacks: List[str],
scores: List[float | None],
strategy: AggregationStrategy
) -> str:
if len(feedbacks) <= 2 and all(len(fb) < 200 for fb in feedbacks):
return "\n\n".join([f"Feedback {i+1}: {fb}" for i, fb in enumerate(feedbacks)])
try:
llm = create_llm()
formatted_feedbacks = []
for i, (feedback, score) in enumerate(zip(feedbacks, scores)):
if len(feedback) > 500:
feedback = feedback[:500] + "..."
score_text = f"{score:.1f}" if score is not None else "N/A"
formatted_feedbacks.append(f"Feedback #{i+1} (Score: {score_text}):\n{feedback}")
all_feedbacks = "\n\n" + "\n\n---\n\n".join(formatted_feedbacks)
strategy_guidance = ""
if strategy == AggregationStrategy.BEST_PERFORMANCE:
strategy_guidance = "Focus on the highest-scoring aspects and strengths demonstrated."
elif strategy == AggregationStrategy.WORST_PERFORMANCE:
strategy_guidance = "Focus on areas that need improvement and common issues across tasks."
else: # Default/average strategies
strategy_guidance = "Provide a balanced analysis of strengths and weaknesses across all tasks."
prompt = [
{"role": "system", "content": f"""You are an expert evaluator creating a comprehensive summary of agent performance feedback.
Your job is to synthesize multiple feedback points about the same metric across different tasks.
Create a concise, insightful summary that captures the key patterns and themes from all feedback.
{strategy_guidance}
Your summary should be:
1. Specific and concrete (not vague or general)
2. Focused on actionable insights
3. Highlighting patterns across tasks
4. 150-250 words in length
The summary should be directly usable as final feedback for the agent's performance on this metric."""},
{"role": "user", "content": f"""I need a synthesized summary of the following feedback for:
Agent Role: {agent_role}
Metric: {metric.title()}
{all_feedbacks}
"""}
]
assert llm is not None
response = llm.call(prompt)
return response
except Exception:
return "Synthesized from multiple tasks: " + "\n\n".join([f"- {fb[:500]}..." for fb in feedbacks])

View File

@@ -1,190 +0,0 @@
from datetime import datetime
from typing import Any, Dict, Optional
from collections.abc import Sequence
from crewai.agent import Agent
from crewai.task import Task
from crewai.utilities.events.base_event_listener import BaseEventListener
from crewai.utilities.events.crewai_event_bus import CrewAIEventsBus
from crewai.utilities.events.agent_events import (
AgentExecutionStartedEvent,
AgentExecutionCompletedEvent
)
from crewai.utilities.events.tool_usage_events import (
ToolUsageFinishedEvent,
ToolUsageErrorEvent,
ToolExecutionErrorEvent,
ToolSelectionErrorEvent,
ToolValidateInputErrorEvent
)
from crewai.utilities.events.llm_events import (
LLMCallStartedEvent,
LLMCallCompletedEvent
)
class EvaluationTraceCallback(BaseEventListener):
"""Event listener for collecting execution traces for evaluation.
This listener attaches to the event bus to collect detailed information
about the execution process, including agent steps, tool uses, knowledge
retrievals, and final output - all for use in agent evaluation.
"""
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if not hasattr(self, "_initialized") or not self._initialized:
super().__init__()
self.traces = {}
self.current_agent_id = None
self.current_task_id = None
self._initialized = True
def setup_listeners(self, event_bus: CrewAIEventsBus):
@event_bus.on(AgentExecutionStartedEvent)
def on_agent_started(source, event: AgentExecutionStartedEvent):
self.on_agent_start(event.agent, event.task)
@event_bus.on(AgentExecutionCompletedEvent)
def on_agent_completed(source, event: AgentExecutionCompletedEvent):
self.on_agent_finish(event.agent, event.task, event.output)
@event_bus.on(ToolUsageFinishedEvent)
def on_tool_completed(source, event: ToolUsageFinishedEvent):
self.on_tool_use(event.tool_name, event.tool_args, event.output, success=True)
@event_bus.on(ToolUsageErrorEvent)
def on_tool_usage_error(source, event: ToolUsageErrorEvent):
self.on_tool_use(event.tool_name, event.tool_args, event.error,
success=False, error_type="usage_error")
@event_bus.on(ToolExecutionErrorEvent)
def on_tool_execution_error(source, event: ToolExecutionErrorEvent):
self.on_tool_use(event.tool_name, event.tool_args, event.error,
success=False, error_type="execution_error")
@event_bus.on(ToolSelectionErrorEvent)
def on_tool_selection_error(source, event: ToolSelectionErrorEvent):
self.on_tool_use(event.tool_name, event.tool_args, event.error,
success=False, error_type="selection_error")
@event_bus.on(ToolValidateInputErrorEvent)
def on_tool_validate_input_error(source, event: ToolValidateInputErrorEvent):
self.on_tool_use(event.tool_name, event.tool_args, event.error,
success=False, error_type="validation_error")
@event_bus.on(LLMCallStartedEvent)
def on_llm_call_started(source, event: LLMCallStartedEvent):
self.on_llm_call_start(event.messages, event.tools)
@event_bus.on(LLMCallCompletedEvent)
def on_llm_call_completed(source, event: LLMCallCompletedEvent):
self.on_llm_call_end(event.messages, event.response)
def on_agent_start(self, agent: Agent, task: Task):
self.current_agent_id = agent.id
self.current_task_id = task.id
trace_key = f"{agent.id}_{task.id}"
self.traces[trace_key] = {
"agent_id": agent.id,
"task_id": task.id,
"tool_uses": [],
"llm_calls": [],
"start_time": datetime.now(),
"final_output": None
}
def on_agent_finish(self, agent: Agent, task: Task, output: Any):
trace_key = f"{agent.id}_{task.id}"
if trace_key in self.traces:
self.traces[trace_key]["final_output"] = output
self.traces[trace_key]["end_time"] = datetime.now()
self.current_agent_id = None
self.current_task_id = None
def on_tool_use(self, tool_name: str, tool_args: dict[str, Any] | str, result: Any,
success: bool = True, error_type: str | None = None):
if not self.current_agent_id or not self.current_task_id:
return
trace_key = f"{self.current_agent_id}_{self.current_task_id}"
if trace_key in self.traces:
tool_use = {
"tool": tool_name,
"args": tool_args,
"result": result,
"success": success,
"timestamp": datetime.now()
}
# Add error information if applicable
if not success and error_type:
tool_use["error"] = True
tool_use["error_type"] = error_type
self.traces[trace_key]["tool_uses"].append(tool_use)
def on_llm_call_start(self, messages: str | Sequence[dict[str, Any]] | None, tools: Sequence[dict[str, Any]] | None = None):
if not self.current_agent_id or not self.current_task_id:
return
trace_key = f"{self.current_agent_id}_{self.current_task_id}"
if trace_key not in self.traces:
return
self.current_llm_call = {
"messages": messages,
"tools": tools,
"start_time": datetime.now(),
"response": None,
"end_time": None
}
def on_llm_call_end(self, messages: str | list[dict[str, Any]] | None, response: Any):
if not self.current_agent_id or not self.current_task_id:
return
trace_key = f"{self.current_agent_id}_{self.current_task_id}"
if trace_key not in self.traces:
return
total_tokens = 0
if hasattr(response, "usage") and hasattr(response.usage, "total_tokens"):
total_tokens = response.usage.total_tokens
current_time = datetime.now()
start_time = None
if hasattr(self, "current_llm_call") and self.current_llm_call:
start_time = self.current_llm_call.get("start_time")
if not start_time:
start_time = current_time
llm_call = {
"messages": messages,
"response": response,
"start_time": start_time,
"end_time": current_time,
"total_tokens": total_tokens
}
self.traces[trace_key]["llm_calls"].append(llm_call)
if hasattr(self, "current_llm_call"):
self.current_llm_call = {}
def get_trace(self, agent_id: str, task_id: str) -> Optional[Dict[str, Any]]:
trace_key = f"{agent_id}_{task_id}"
return self.traces.get(trace_key)
def create_evaluation_callbacks() -> EvaluationTraceCallback:
return EvaluationTraceCallback()

View File

@@ -1,30 +0,0 @@
"""Robust JSON parsing utilities for evaluation responses."""
import json
import re
from typing import Any
def extract_json_from_llm_response(text: str) -> dict[str, Any]:
try:
return json.loads(text)
except json.JSONDecodeError:
pass
json_patterns = [
# Standard markdown code blocks with json
r'```json\s*([\s\S]*?)\s*```',
# Code blocks without language specifier
r'```\s*([\s\S]*?)\s*```',
# Inline code with JSON
r'`([{\\[].*[}\]])`',
]
for pattern in json_patterns:
matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
for match in matches:
try:
return json.loads(match.strip())
except json.JSONDecodeError:
continue
raise ValueError("No valid JSON found in the response")

View File

@@ -1,66 +0,0 @@
from typing import Any, Dict
from crewai.agent import Agent
from crewai.task import Task
from crewai.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory
from crewai.evaluation.json_parser import extract_json_from_llm_response
class GoalAlignmentEvaluator(BaseEvaluator):
@property
def metric_category(self) -> MetricCategory:
return MetricCategory.GOAL_ALIGNMENT
def evaluate(
self,
agent: Agent,
task: Task,
execution_trace: Dict[str, Any],
final_output: Any,
) -> EvaluationScore:
prompt = [
{"role": "system", "content": """You are an expert evaluator assessing how well an AI agent's output aligns with its assigned task goal.
Score the agent's goal alignment on a scale from 0-10 where:
- 0: Complete misalignment, agent did not understand or attempt the task goal
- 5: Partial alignment, agent attempted the task but missed key requirements
- 10: Perfect alignment, agent fully satisfied all task requirements
Consider:
1. Did the agent correctly interpret the task goal?
2. Did the final output directly address the requirements?
3. Did the agent focus on relevant aspects of the task?
4. Did the agent provide all requested information or deliverables?
Return your evaluation as JSON with fields 'score' (number) and 'feedback' (string).
"""},
{"role": "user", "content": f"""
Agent role: {agent.role}
Agent goal: {agent.goal}
Task description: {task.description}
Expected output: {task.expected_output}
Agent's final output:
{final_output}
Evaluate how well the agent's output aligns with the assigned task goal.
"""}
]
assert self.llm is not None
response = self.llm.call(prompt)
try:
evaluation_data: dict[str, Any] = extract_json_from_llm_response(response)
assert evaluation_data is not None
return EvaluationScore(
score=evaluation_data.get("score", 0),
feedback=evaluation_data.get("feedback", response),
raw_response=response
)
except Exception:
return EvaluationScore(
score=None,
feedback=f"Failed to parse evaluation. Raw response: {response}",
raw_response=response
)

View File

@@ -1,355 +0,0 @@
"""Agent reasoning efficiency evaluators.
This module provides evaluator implementations for:
- Reasoning efficiency
- Loop detection
- Thinking-to-action ratio
"""
import logging
import re
from enum import Enum
from typing import Any, Dict, List, Tuple
import numpy as np
from collections.abc import Sequence
from crewai.agent import Agent
from crewai.task import Task
from crewai.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory
from crewai.evaluation.json_parser import extract_json_from_llm_response
from crewai.tasks.task_output import TaskOutput
class ReasoningPatternType(Enum):
EFFICIENT = "efficient" # Good reasoning flow
LOOP = "loop" # Agent is stuck in a loop
VERBOSE = "verbose" # Agent is unnecessarily verbose
INDECISIVE = "indecisive" # Agent struggles to make decisions
SCATTERED = "scattered" # Agent jumps between topics without focus
class ReasoningEfficiencyEvaluator(BaseEvaluator):
@property
def metric_category(self) -> MetricCategory:
return MetricCategory.REASONING_EFFICIENCY
def evaluate(
self,
agent: Agent,
task: Task,
execution_trace: Dict[str, Any],
final_output: TaskOutput,
) -> EvaluationScore:
llm_calls = execution_trace.get("llm_calls", [])
if not llm_calls or len(llm_calls) < 2:
return EvaluationScore(
score=None,
feedback="Insufficient LLM calls to evaluate reasoning efficiency."
)
total_calls = len(llm_calls)
total_tokens = sum(call.get("total_tokens", 0) for call in llm_calls)
avg_tokens_per_call = total_tokens / total_calls if total_calls > 0 else 0
time_intervals = []
has_reliable_timing = True
for i in range(1, len(llm_calls)):
start_time = llm_calls[i-1].get("end_time")
end_time = llm_calls[i].get("start_time")
if start_time and end_time and start_time != end_time:
try:
interval = end_time - start_time
time_intervals.append(interval.total_seconds() if hasattr(interval, 'total_seconds') else 0)
except Exception:
has_reliable_timing = False
else:
has_reliable_timing = False
loop_detected, loop_details = self._detect_loops(llm_calls)
pattern_analysis = self._analyze_reasoning_patterns(llm_calls)
efficiency_metrics = {
"total_llm_calls": total_calls,
"total_tokens": total_tokens,
"avg_tokens_per_call": avg_tokens_per_call,
"reasoning_pattern": pattern_analysis["primary_pattern"].value,
"loops_detected": loop_detected,
}
if has_reliable_timing and time_intervals:
efficiency_metrics["avg_time_between_calls"] = np.mean(time_intervals)
loop_info = f"Detected {len(loop_details)} potential reasoning loops." if loop_detected else "No significant reasoning loops detected."
call_samples = self._get_call_samples(llm_calls)
prompt = [
{"role": "system", "content": """You are an expert evaluator assessing the reasoning efficiency of an AI agent's thought process.
Evaluate the agent's reasoning efficiency across these five key subcategories:
1. Focus (0-10): How well the agent stays on topic and avoids unnecessary tangents
2. Progression (0-10): How effectively the agent builds on previous thoughts rather than repeating or circling
3. Decision Quality (0-10): How decisively and appropriately the agent makes decisions
4. Conciseness (0-10): How efficiently the agent communicates without unnecessary verbosity
5. Loop Avoidance (0-10): How well the agent avoids getting stuck in repetitive thinking patterns
For each subcategory, provide a score from 0-10 where:
- 0: Completely inefficient
- 5: Moderately efficient
- 10: Highly efficient
The overall score should be a weighted average of these subcategories.
Return your evaluation as JSON with the following structure:
{
"overall_score": float,
"scores": {
"focus": float,
"progression": float,
"decision_quality": float,
"conciseness": float,
"loop_avoidance": float
},
"feedback": string (general feedback about overall reasoning efficiency),
"optimization_suggestions": string (concrete suggestions for improving reasoning efficiency),
"detected_patterns": string (describe any inefficient reasoning patterns you observe)
}"""},
{"role": "user", "content": f"""
Agent role: {agent.role}
Task description: {task.description}
Reasoning efficiency metrics:
- Total LLM calls: {efficiency_metrics["total_llm_calls"]}
- Average tokens per call: {efficiency_metrics["avg_tokens_per_call"]:.1f}
- Primary reasoning pattern: {efficiency_metrics["reasoning_pattern"]}
- {loop_info}
{"- Average time between calls: {:.2f} seconds".format(efficiency_metrics.get("avg_time_between_calls", 0)) if "avg_time_between_calls" in efficiency_metrics else ""}
Sample of agent reasoning flow (chronological sequence):
{call_samples}
Agent's final output:
{final_output.raw[:500]}... (truncated)
Evaluate the reasoning efficiency of this agent based on these interaction patterns.
Identify any inefficient reasoning patterns and provide specific suggestions for optimization.
"""}
]
assert self.llm is not None
response = self.llm.call(prompt)
try:
evaluation_data = extract_json_from_llm_response(response)
scores = evaluation_data.get("scores", {})
focus = scores.get("focus", 5.0)
progression = scores.get("progression", 5.0)
decision_quality = scores.get("decision_quality", 5.0)
conciseness = scores.get("conciseness", 5.0)
loop_avoidance = scores.get("loop_avoidance", 5.0)
overall_score = evaluation_data.get("overall_score", evaluation_data.get("score", 5.0))
feedback = evaluation_data.get("feedback", "No detailed feedback provided.")
optimization_suggestions = evaluation_data.get("optimization_suggestions", "No specific suggestions provided.")
detailed_feedback = "Reasoning Efficiency Evaluation:\n"
detailed_feedback += f"• Focus: {focus}/10 - Staying on topic without tangents\n"
detailed_feedback += f"• Progression: {progression}/10 - Building on previous thinking\n"
detailed_feedback += f"• Decision Quality: {decision_quality}/10 - Making appropriate decisions\n"
detailed_feedback += f"• Conciseness: {conciseness}/10 - Communicating efficiently\n"
detailed_feedback += f"• Loop Avoidance: {loop_avoidance}/10 - Avoiding repetitive patterns\n\n"
detailed_feedback += f"Feedback:\n{feedback}\n\n"
detailed_feedback += f"Optimization Suggestions:\n{optimization_suggestions}"
return EvaluationScore(
score=float(overall_score),
feedback=detailed_feedback,
raw_response=response
)
except Exception as e:
logging.warning(f"Failed to parse reasoning efficiency evaluation: {e}")
return EvaluationScore(
score=None,
feedback=f"Failed to parse reasoning efficiency evaluation. Raw response: {response[:200]}...",
raw_response=response
)
def _detect_loops(self, llm_calls: List[Dict]) -> Tuple[bool, List[Dict]]:
loop_details = []
messages = []
for call in llm_calls:
content = call.get("response", "")
if isinstance(content, str):
messages.append(content)
elif isinstance(content, list) and len(content) > 0:
# Handle message list format
for msg in content:
if isinstance(msg, dict) and "content" in msg:
messages.append(msg["content"])
# Simple n-gram based similarity detection
# For a more robust implementation, consider using embedding-based similarity
for i in range(len(messages) - 2):
for j in range(i + 1, len(messages) - 1):
# Check for repeated patterns (simplistic approach)
# A more sophisticated approach would use semantic similarity
similarity = self._calculate_text_similarity(messages[i], messages[j])
if similarity > 0.7: # Arbitrary threshold
loop_details.append({
"first_occurrence": i,
"second_occurrence": j,
"similarity": similarity,
"snippet": messages[i][:100] + "..."
})
return len(loop_details) > 0, loop_details
def _calculate_text_similarity(self, text1: str, text2: str) -> float:
text1 = re.sub(r'\s+', ' ', text1.lower()).strip()
text2 = re.sub(r'\s+', ' ', text2.lower()).strip()
# Simple Jaccard similarity on word sets
words1 = set(text1.split())
words2 = set(text2.split())
intersection = len(words1.intersection(words2))
union = len(words1.union(words2))
return intersection / union if union > 0 else 0.0
def _analyze_reasoning_patterns(self, llm_calls: List[Dict]) -> Dict[str, Any]:
call_lengths = []
response_times = []
for call in llm_calls:
content = call.get("response", "")
if isinstance(content, str):
call_lengths.append(len(content))
elif isinstance(content, list) and len(content) > 0:
# Handle message list format
total_length = 0
for msg in content:
if isinstance(msg, dict) and "content" in msg:
total_length += len(msg["content"])
call_lengths.append(total_length)
start_time = call.get("start_time")
end_time = call.get("end_time")
if start_time and end_time:
try:
response_times.append(end_time - start_time)
except Exception:
pass
avg_length = np.mean(call_lengths) if call_lengths else 0
std_length = np.std(call_lengths) if call_lengths else 0
length_trend = self._calculate_trend(call_lengths)
primary_pattern = ReasoningPatternType.EFFICIENT
details = "Agent demonstrates efficient reasoning patterns."
loop_score = self._calculate_loop_likelihood(call_lengths, response_times)
if loop_score > 0.7:
primary_pattern = ReasoningPatternType.LOOP
details = "Agent appears to be stuck in repetitive thinking patterns."
elif avg_length > 1000 and std_length / avg_length < 0.3:
primary_pattern = ReasoningPatternType.VERBOSE
details = "Agent is consistently verbose across interactions."
elif len(llm_calls) > 10 and length_trend > 0.5:
primary_pattern = ReasoningPatternType.INDECISIVE
details = "Agent shows signs of indecisiveness with increasing message lengths."
elif std_length / avg_length > 0.8:
primary_pattern = ReasoningPatternType.SCATTERED
details = "Agent shows inconsistent reasoning flow with highly variable responses."
return {
"primary_pattern": primary_pattern,
"details": details,
"metrics": {
"avg_length": avg_length,
"std_length": std_length,
"length_trend": length_trend,
"loop_score": loop_score
}
}
def _calculate_trend(self, values: Sequence[float | int]) -> float:
if not values or len(values) < 2:
return 0.0
try:
x = np.arange(len(values))
y = np.array(values)
# Simple linear regression
slope = np.polyfit(x, y, 1)[0]
# Normalize slope to -1 to 1 range
max_possible_slope = max(values) - min(values)
if max_possible_slope > 0:
normalized_slope = slope / max_possible_slope
return max(min(normalized_slope, 1.0), -1.0)
return 0.0
except Exception:
return 0.0
def _calculate_loop_likelihood(self, call_lengths: Sequence[float], response_times: Sequence[float]) -> float:
if not call_lengths or len(call_lengths) < 3:
return 0.0
indicators = []
if len(call_lengths) >= 4:
repeated_lengths = 0
for i in range(len(call_lengths) - 2):
ratio = call_lengths[i] / call_lengths[i + 2] if call_lengths[i + 2] > 0 else 0
if 0.85 <= ratio <= 1.15:
repeated_lengths += 1
length_repetition_score = repeated_lengths / (len(call_lengths) - 2)
indicators.append(length_repetition_score)
if response_times and len(response_times) >= 3:
try:
std_time = np.std(response_times)
mean_time = np.mean(response_times)
if mean_time > 0:
time_consistency = 1.0 - (std_time / mean_time)
indicators.append(max(0, time_consistency - 0.3) * 1.5)
except Exception:
pass
return np.mean(indicators) if indicators else 0.0
def _get_call_samples(self, llm_calls: List[Dict]) -> str:
samples = []
if len(llm_calls) <= 6:
sample_indices = list(range(len(llm_calls)))
else:
sample_indices = [0, 1, len(llm_calls) // 2 - 1, len(llm_calls) // 2,
len(llm_calls) - 2, len(llm_calls) - 1]
for idx in sample_indices:
call = llm_calls[idx]
content = call.get("response", "")
if isinstance(content, str):
sample = content
elif isinstance(content, list) and len(content) > 0:
sample_parts = []
for msg in content:
if isinstance(msg, dict) and "content" in msg:
sample_parts.append(msg["content"])
sample = "\n".join(sample_parts)
else:
sample = str(content)
truncated = sample[:200] + "..." if len(sample) > 200 else sample
samples.append(f"Call {idx + 1}:\n{truncated}\n")
return "\n".join(samples)

View File

@@ -1,65 +0,0 @@
from typing import Any, Dict
from crewai.agent import Agent
from crewai.task import Task
from crewai.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory
from crewai.evaluation.json_parser import extract_json_from_llm_response
class SemanticQualityEvaluator(BaseEvaluator):
@property
def metric_category(self) -> MetricCategory:
return MetricCategory.SEMANTIC_QUALITY
def evaluate(
self,
agent: Agent,
task: Task,
execution_trace: Dict[str, Any],
final_output: Any,
) -> EvaluationScore:
prompt = [
{"role": "system", "content": """You are an expert evaluator assessing the semantic quality of an AI agent's output.
Score the semantic quality on a scale from 0-10 where:
- 0: Completely incoherent, confusing, or logically flawed output
- 5: Moderately clear and logical output with some issues
- 10: Exceptionally clear, coherent, and logically sound output
Consider:
1. Is the output well-structured and organized?
2. Is the reasoning logical and well-supported?
3. Is the language clear, precise, and appropriate for the task?
4. Are claims supported by evidence when appropriate?
5. Is the output free from contradictions and logical fallacies?
Return your evaluation as JSON with fields 'score' (number) and 'feedback' (string).
"""},
{"role": "user", "content": f"""
Agent role: {agent.role}
Task description: {task.description}
Agent's final output:
{final_output}
Evaluate the semantic quality and reasoning of this output.
"""}
]
assert self.llm is not None
response = self.llm.call(prompt)
try:
evaluation_data: dict[str, Any] = extract_json_from_llm_response(response)
assert evaluation_data is not None
return EvaluationScore(
score=float(evaluation_data["score"]) if evaluation_data.get("score") is not None else None,
feedback=evaluation_data.get("feedback", response),
raw_response=response
)
except Exception:
return EvaluationScore(
score=None,
feedback=f"Failed to parse evaluation. Raw response: {response}",
raw_response=response
)

View File

@@ -1,400 +0,0 @@
import json
from typing import Dict, Any
from crewai.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory
from crewai.evaluation.json_parser import extract_json_from_llm_response
from crewai.agent import Agent
from crewai.task import Task
class ToolSelectionEvaluator(BaseEvaluator):
@property
def metric_category(self) -> MetricCategory:
return MetricCategory.TOOL_SELECTION
def evaluate(
self,
agent: Agent,
task: Task,
execution_trace: Dict[str, Any],
final_output: str,
) -> EvaluationScore:
tool_uses = execution_trace.get("tool_uses", [])
tool_count = len(tool_uses)
unique_tool_types = set([tool.get("tool", "Unknown tool") for tool in tool_uses])
if tool_count == 0:
if not agent.tools:
return EvaluationScore(
score=None,
feedback="Agent had no tools available to use."
)
else:
return EvaluationScore(
score=None,
feedback="Agent had tools available but didn't use any."
)
available_tools_info = ""
if agent.tools:
for tool in agent.tools:
available_tools_info += f"- {tool.name}: {tool.description}\n"
else:
available_tools_info = "No tools available"
tool_types_summary = "Tools selected by the agent:\n"
for tool_type in sorted(unique_tool_types):
tool_types_summary += f"- {tool_type}\n"
prompt = [
{"role": "system", "content": """You are an expert evaluator assessing if an AI agent selected the most appropriate tools for a given task.
You must evaluate based on these 2 criteria:
1. Relevance (0-10): Were the tools chosen directly aligned with the task's goals?
2. Coverage (0-10): Did the agent select ALL appropriate tools from the AVAILABLE tools?
IMPORTANT:
- ONLY consider tools that are listed as available to the agent
- DO NOT suggest tools that aren't in the 'Available tools' list
- DO NOT evaluate the quality or accuracy of tool outputs/results
- DO NOT evaluate how many times each tool was used
- DO NOT evaluate how the agent used the parameters
- DO NOT evaluate whether the agent interpreted the task correctly
Focus ONLY on whether the correct CATEGORIES of tools were selected from what was available.
Return your evaluation as JSON with these fields:
- scores: {"relevance": number, "coverage": number}
- overall_score: number (average of all scores, 0-10)
- feedback: string (focused ONLY on tool selection decisions from available tools)
- improvement_suggestions: string (ONLY suggest better selection from the AVAILABLE tools list, NOT new tools)
"""},
{"role": "user", "content": f"""
Agent role: {agent.role}
Task description: {task.description}
Available tools for this agent:
{available_tools_info}
{tool_types_summary}
Based ONLY on the task description and comparing the AVAILABLE tools with those that were selected (listed above), evaluate if the agent selected the appropriate tool types for this task.
IMPORTANT:
- ONLY evaluate selection from tools listed as available
- DO NOT suggest new tools that aren't in the available tools list
- DO NOT evaluate tool usage or results
"""}
]
assert self.llm is not None
response = self.llm.call(prompt)
try:
evaluation_data = extract_json_from_llm_response(response)
assert evaluation_data is not None
scores = evaluation_data.get("scores", {})
relevance = scores.get("relevance", 5.0)
coverage = scores.get("coverage", 5.0)
overall_score = float(evaluation_data.get("overall_score", 5.0))
feedback = "Tool Selection Evaluation:\n"
feedback += f"• Relevance: {relevance}/10 - Selection of appropriate tool types for the task\n"
feedback += f"• Coverage: {coverage}/10 - Selection of all necessary tool types\n"
if "improvement_suggestions" in evaluation_data:
feedback += f"Improvement Suggestions:\n{evaluation_data['improvement_suggestions']}"
else:
feedback += evaluation_data.get("feedback", "No detailed feedback available.")
return EvaluationScore(
score=overall_score,
feedback=feedback,
raw_response=response
)
except Exception as e:
return EvaluationScore(
score=None,
feedback=f"Error evaluating tool selection: {e}",
raw_response=response
)
class ParameterExtractionEvaluator(BaseEvaluator):
@property
def metric_category(self) -> MetricCategory:
return MetricCategory.PARAMETER_EXTRACTION
def evaluate(
self,
agent: Agent,
task: Task,
execution_trace: Dict[str, Any],
final_output: str,
) -> EvaluationScore:
tool_uses = execution_trace.get("tool_uses", [])
tool_count = len(tool_uses)
if tool_count == 0:
return EvaluationScore(
score=None,
feedback="No tool usage detected. Cannot evaluate parameter extraction."
)
validation_errors = []
for tool_use in tool_uses:
if not tool_use.get("success", True) and tool_use.get("error_type") == "validation_error":
validation_errors.append({
"tool": tool_use.get("tool", "Unknown tool"),
"error": tool_use.get("result"),
"args": tool_use.get("args", {})
})
validation_error_rate = len(validation_errors) / tool_count if tool_count > 0 else 0
param_samples = []
for i, tool_use in enumerate(tool_uses[:5]):
tool_name = tool_use.get("tool", "Unknown tool")
tool_args = tool_use.get("args", {})
success = tool_use.get("success", True) and not tool_use.get("error", False)
error_type = tool_use.get("error_type", "") if not success else ""
is_validation_error = error_type == "validation_error"
sample = f"Tool use #{i+1} - {tool_name}:\n"
sample += f"- Parameters: {json.dumps(tool_args, indent=2)}\n"
sample += f"- Success: {'No' if not success else 'Yes'}"
if is_validation_error:
sample += " (PARAMETER VALIDATION ERROR)\n"
sample += f"- Error: {tool_use.get('result', 'Unknown error')}"
elif not success:
sample += f" (Other error: {error_type})\n"
param_samples.append(sample)
validation_errors_info = ""
if validation_errors:
validation_errors_info = f"\nParameter validation errors detected: {len(validation_errors)} ({validation_error_rate:.1%} of tool uses)\n"
for i, err in enumerate(validation_errors[:3]):
tool_name = err.get("tool", "Unknown tool")
error_msg = err.get("error", "Unknown error")
args = err.get("args", {})
validation_errors_info += f"\nValidation Error #{i+1}:\n- Tool: {tool_name}\n- Args: {json.dumps(args, indent=2)}\n- Error: {error_msg}"
if len(validation_errors) > 3:
validation_errors_info += f"\n...and {len(validation_errors) - 3} more validation errors."
param_samples_text = "\n\n".join(param_samples)
prompt = [
{"role": "system", "content": """You are an expert evaluator assessing how well an AI agent extracts and formats PARAMETER VALUES for tool calls.
Your job is to evaluate ONLY whether the agent used the correct parameter VALUES, not whether the right tools were selected or how the tools were invoked.
Evaluate parameter extraction based on these criteria:
1. Accuracy (0-10): Are parameter values correctly identified from the context/task?
2. Formatting (0-10): Are values formatted correctly for each tool's requirements?
3. Completeness (0-10): Are all required parameter values provided, with no missing information?
IMPORTANT: DO NOT evaluate:
- Whether the right tool was chosen (that's the ToolSelectionEvaluator's job)
- How the tools were structurally invoked (that's the ToolInvocationEvaluator's job)
- The quality of results from tools
Focus ONLY on the PARAMETER VALUES - whether they were correctly extracted from the context, properly formatted, and complete.
Validation errors are important signals that parameter values weren't properly extracted or formatted.
Return your evaluation as JSON with these fields:
- scores: {"accuracy": number, "formatting": number, "completeness": number}
- overall_score: number (average of all scores, 0-10)
- feedback: string (focused ONLY on parameter value extraction quality)
- improvement_suggestions: string (concrete suggestions for better parameter VALUE extraction)
"""},
{"role": "user", "content": f"""
Agent role: {agent.role}
Task description: {task.description}
Parameter extraction examples:
{param_samples_text}
{validation_errors_info}
Evaluate the quality of the agent's parameter extraction for this task.
"""}
]
assert self.llm is not None
response = self.llm.call(prompt)
try:
evaluation_data = extract_json_from_llm_response(response)
assert evaluation_data is not None
scores = evaluation_data.get("scores", {})
accuracy = scores.get("accuracy", 5.0)
formatting = scores.get("formatting", 5.0)
completeness = scores.get("completeness", 5.0)
overall_score = float(evaluation_data.get("overall_score", 5.0))
feedback = "Parameter Extraction Evaluation:\n"
feedback += f"• Accuracy: {accuracy}/10 - Correctly identifying required parameters\n"
feedback += f"• Formatting: {formatting}/10 - Properly formatting parameters for tools\n"
feedback += f"• Completeness: {completeness}/10 - Including all necessary information\n\n"
if "improvement_suggestions" in evaluation_data:
feedback += f"Improvement Suggestions:\n{evaluation_data['improvement_suggestions']}"
else:
feedback += evaluation_data.get("feedback", "No detailed feedback available.")
return EvaluationScore(
score=overall_score,
feedback=feedback,
raw_response=response
)
except Exception as e:
return EvaluationScore(
score=None,
feedback=f"Error evaluating parameter extraction: {e}",
raw_response=response
)
class ToolInvocationEvaluator(BaseEvaluator):
@property
def metric_category(self) -> MetricCategory:
return MetricCategory.TOOL_INVOCATION
def evaluate(
self,
agent: Agent,
task: Task,
execution_trace: Dict[str, Any],
final_output: str,
) -> EvaluationScore:
tool_uses = execution_trace.get("tool_uses", [])
tool_errors = []
tool_count = len(tool_uses)
if tool_count == 0:
return EvaluationScore(
score=None,
feedback="No tool usage detected. Cannot evaluate tool invocation."
)
for tool_use in tool_uses:
if not tool_use.get("success", True) or tool_use.get("error", False):
error_info = {
"tool": tool_use.get("tool", "Unknown tool"),
"error": tool_use.get("result"),
"error_type": tool_use.get("error_type", "unknown_error")
}
tool_errors.append(error_info)
error_rate = len(tool_errors) / tool_count if tool_count > 0 else 0
error_types = {}
for error in tool_errors:
error_type = error.get("error_type", "unknown_error")
if error_type not in error_types:
error_types[error_type] = 0
error_types[error_type] += 1
invocation_samples = []
for i, tool_use in enumerate(tool_uses[:5]):
tool_name = tool_use.get("tool", "Unknown tool")
tool_args = tool_use.get("args", {})
success = tool_use.get("success", True) and not tool_use.get("error", False)
error_type = tool_use.get("error_type", "") if not success else ""
error_msg = tool_use.get("result", "No error") if not success else "No error"
sample = f"Tool invocation #{i+1}:\n"
sample += f"- Tool: {tool_name}\n"
sample += f"- Parameters: {json.dumps(tool_args, indent=2)}\n"
sample += f"- Success: {'No' if not success else 'Yes'}\n"
if not success:
sample += f"- Error type: {error_type}\n"
sample += f"- Error: {error_msg}"
invocation_samples.append(sample)
error_type_summary = ""
if error_types:
error_type_summary = "Error type breakdown:\n"
for error_type, count in error_types.items():
error_type_summary += f"- {error_type}: {count} occurrences ({(count/tool_count):.1%})\n"
invocation_samples_text = "\n\n".join(invocation_samples)
prompt = [
{"role": "system", "content": """You are an expert evaluator assessing how correctly an AI agent's tool invocations are STRUCTURED.
Your job is to evaluate ONLY the structural and syntactical aspects of how the agent called tools, NOT which tools were selected or what parameter values were used.
Evaluate the agent's tool invocation based on these criteria:
1. Structure (0-10): Does the tool call follow the expected syntax and format?
2. Error Handling (0-10): Does the agent handle tool errors appropriately?
3. Invocation Patterns (0-10): Are tool calls properly sequenced, batched, or managed?
Error types that indicate invocation issues:
- execution_error: The tool was called correctly but failed during execution
- usage_error: General errors in how the tool was used structurally
IMPORTANT: DO NOT evaluate:
- Whether the right tool was chosen (that's the ToolSelectionEvaluator's job)
- Whether the parameter values are correct (that's the ParameterExtractionEvaluator's job)
- The quality of results from tools
Focus ONLY on HOW tools were invoked - the structure, format, and handling of the invocation process.
Return your evaluation as JSON with these fields:
- scores: {"structure": number, "error_handling": number, "invocation_patterns": number}
- overall_score: number (average of all scores, 0-10)
- feedback: string (focused ONLY on structural aspects of tool invocation)
- improvement_suggestions: string (concrete suggestions for better structuring of tool calls)
"""},
{"role": "user", "content": f"""
Agent role: {agent.role}
Task description: {task.description}
Tool invocation examples:
{invocation_samples_text}
Tool error rate: {error_rate:.2%} ({len(tool_errors)} errors out of {tool_count} invocations)
{error_type_summary}
Evaluate the quality of the agent's tool invocation structure during this task.
"""}
]
assert self.llm is not None
response = self.llm.call(prompt)
try:
evaluation_data = extract_json_from_llm_response(response)
assert evaluation_data is not None
scores = evaluation_data.get("scores", {})
structure = scores.get("structure", 5.0)
error_handling = scores.get("error_handling", 5.0)
invocation_patterns = scores.get("invocation_patterns", 5.0)
overall_score = float(evaluation_data.get("overall_score", 5.0))
feedback = "Tool Invocation Evaluation:\n"
feedback += f"• Structure: {structure}/10 - Following proper syntax and format\n"
feedback += f"• Error Handling: {error_handling}/10 - Appropriately handling tool errors\n"
feedback += f"• Invocation Patterns: {invocation_patterns}/10 - Proper sequencing and management of calls\n\n"
if "improvement_suggestions" in evaluation_data:
feedback += f"Improvement Suggestions:\n{evaluation_data['improvement_suggestions']}"
else:
feedback += evaluation_data.get("feedback", "No detailed feedback available.")
return EvaluationScore(
score=overall_score,
feedback=feedback,
raw_response=response
)
except Exception as e:
return EvaluationScore(
score=None,
feedback=f"Error evaluating tool invocation: {e}",
raw_response=response
)

View File

@@ -537,7 +537,6 @@ class LiteAgent(FlowTrackable, BaseModel):
crewai_event_bus.emit( crewai_event_bus.emit(
self, self,
event=LLMCallCompletedEvent( event=LLMCallCompletedEvent(
messages=self._messages,
response=answer, response=answer,
call_type=LLMCallType.LLM_CALL, call_type=LLMCallType.LLM_CALL,
from_agent=self, from_agent=self,

View File

@@ -508,6 +508,7 @@ class LLM(BaseLLM):
# Enable tool calls using streaming # Enable tool calls using streaming
if "tool_calls" in delta: if "tool_calls" in delta:
tool_calls = delta["tool_calls"] tool_calls = delta["tool_calls"]
if tool_calls: if tool_calls:
result = self._handle_streaming_tool_calls( result = self._handle_streaming_tool_calls(
tool_calls=tool_calls, tool_calls=tool_calls,
@@ -516,7 +517,6 @@ class LLM(BaseLLM):
from_task=from_task, from_task=from_task,
from_agent=from_agent, from_agent=from_agent,
) )
if result is not None: if result is not None:
chunk_content = result chunk_content = result
@@ -631,7 +631,7 @@ class LLM(BaseLLM):
# Log token usage if available in streaming mode # Log token usage if available in streaming mode
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk) self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
# Emit completion event and return response # Emit completion event and return response
self._handle_emit_call_events(response=full_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"]) self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL, from_task, from_agent)
return full_response return full_response
# --- 9) Handle tool calls if present # --- 9) Handle tool calls if present
@@ -643,7 +643,7 @@ class LLM(BaseLLM):
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk) self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
# --- 11) Emit completion event and return response # --- 11) Emit completion event and return response
self._handle_emit_call_events(response=full_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"]) self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL, from_task, from_agent)
return full_response return full_response
except ContextWindowExceededError as e: except ContextWindowExceededError as e:
@@ -655,7 +655,7 @@ class LLM(BaseLLM):
logging.error(f"Error in streaming response: {str(e)}") logging.error(f"Error in streaming response: {str(e)}")
if full_response.strip(): if full_response.strip():
logging.warning(f"Returning partial response despite error: {str(e)}") logging.warning(f"Returning partial response despite error: {str(e)}")
self._handle_emit_call_events(response=full_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"]) self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL, from_task, from_agent)
return full_response return full_response
# Emit failed event and re-raise the exception # Emit failed event and re-raise the exception
@@ -809,7 +809,7 @@ class LLM(BaseLLM):
# --- 5) If no tool calls or no available functions, return the text response directly # --- 5) If no tool calls or no available functions, return the text response directly
if not tool_calls or not available_functions: if not tool_calls or not available_functions:
self._handle_emit_call_events(response=text_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"]) self._handle_emit_call_events(text_response, LLMCallType.LLM_CALL, from_task, from_agent)
return text_response return text_response
# --- 6) Handle tool calls if present # --- 6) Handle tool calls if present
@@ -818,7 +818,7 @@ class LLM(BaseLLM):
return tool_result return tool_result
# --- 7) If tool call handling didn't return a result, emit completion event and return text response # --- 7) If tool call handling didn't return a result, emit completion event and return text response
self._handle_emit_call_events(response=text_response, call_type=LLMCallType.LLM_CALL, from_task=from_task, from_agent=from_agent, messages=params["messages"]) self._handle_emit_call_events(text_response, LLMCallType.LLM_CALL, from_task, from_agent)
return text_response return text_response
def _handle_tool_call( def _handle_tool_call(
@@ -861,7 +861,6 @@ class LLM(BaseLLM):
tool_args=function_args, tool_args=function_args,
), ),
) )
result = fn(**function_args) result = fn(**function_args)
crewai_event_bus.emit( crewai_event_bus.emit(
self, self,
@@ -875,7 +874,7 @@ class LLM(BaseLLM):
) )
# --- 3.3) Emit success event # --- 3.3) Emit success event
self._handle_emit_call_events(response=result, call_type=LLMCallType.TOOL_CALL) self._handle_emit_call_events(result, LLMCallType.TOOL_CALL)
return result return result
except Exception as e: except Exception as e:
# --- 3.4) Handle execution errors # --- 3.4) Handle execution errors
@@ -992,20 +991,17 @@ class LLM(BaseLLM):
logging.error(f"LiteLLM call failed: {str(e)}") logging.error(f"LiteLLM call failed: {str(e)}")
raise raise
def _handle_emit_call_events(self, response: Any, call_type: LLMCallType, from_task: Optional[Any] = None, from_agent: Optional[Any] = None, messages: str | list[dict[str, Any]] | None = None): def _handle_emit_call_events(self, response: Any, call_type: LLMCallType, from_task: Optional[Any] = None, from_agent: Optional[Any] = None):
"""Handle the events for the LLM call. """Handle the events for the LLM call.
Args: Args:
response (str): The response from the LLM call. response (str): The response from the LLM call.
call_type (str): The type of call, either "tool_call" or "llm_call". call_type (str): The type of call, either "tool_call" or "llm_call".
from_task: Optional task object
from_agent: Optional agent object
messages: Optional messages object
""" """
assert hasattr(crewai_event_bus, "emit") assert hasattr(crewai_event_bus, "emit")
crewai_event_bus.emit( crewai_event_bus.emit(
self, self,
event=LLMCallCompletedEvent(messages=messages, response=response, call_type=call_type, from_task=from_task, from_agent=from_agent), event=LLMCallCompletedEvent(response=response, call_type=call_type, from_task=from_task, from_agent=from_agent),
) )
def _format_messages_for_provider( def _format_messages_for_provider(

View File

@@ -97,7 +97,7 @@ class Task(BaseModel):
) )
context: Union[List["Task"], None, _NotSpecified] = Field( context: Union[List["Task"], None, _NotSpecified] = Field(
description="Other tasks that will have their output used as context for this task.", description="Other tasks that will have their output used as context for this task.",
default=NOT_SPECIFIED, default=NOT_SPECIFIED
) )
async_execution: Optional[bool] = Field( async_execution: Optional[bool] = Field(
description="Whether the task should be executed asynchronously or not.", description="Whether the task should be executed asynchronously or not.",
@@ -158,7 +158,9 @@ class Task(BaseModel):
end_time: Optional[datetime.datetime] = Field( end_time: Optional[datetime.datetime] = Field(
default=None, description="End time of the task execution" default=None, description="End time of the task execution"
) )
model_config = {"arbitrary_types_allowed": True} model_config = {
"arbitrary_types_allowed": True
}
@field_validator("guardrail") @field_validator("guardrail")
@classmethod @classmethod
@@ -202,6 +204,7 @@ class Task(BaseModel):
# Check return annotation if present, but don't require it # Check return annotation if present, but don't require it
return_annotation = sig.return_annotation return_annotation = sig.return_annotation
if return_annotation != inspect.Signature.empty: if return_annotation != inspect.Signature.empty:
return_annotation_args = get_args(return_annotation) return_annotation_args = get_args(return_annotation)
if not ( if not (
get_origin(return_annotation) is tuple get_origin(return_annotation) is tuple
@@ -434,7 +437,7 @@ class Task(BaseModel):
guardrail_result = process_guardrail( guardrail_result = process_guardrail(
output=task_output, output=task_output,
guardrail=self._guardrail, guardrail=self._guardrail,
retry_count=self.retry_count, retry_count=self.retry_count
) )
if not guardrail_result.success: if not guardrail_result.success:
if self.retry_count >= self.max_retries: if self.retry_count >= self.max_retries:
@@ -507,6 +510,8 @@ class Task(BaseModel):
) )
from crewai.utilities.events.crewai_event_bus import crewai_event_bus from crewai.utilities.events.crewai_event_bus import crewai_event_bus
result = self._guardrail(task_output)
crewai_event_bus.emit( crewai_event_bus.emit(
self, self,
LLMGuardrailStartedEvent( LLMGuardrailStartedEvent(
@@ -514,13 +519,7 @@ class Task(BaseModel):
), ),
) )
try: guardrail_result = GuardrailResult.from_tuple(result)
result = self._guardrail(task_output)
guardrail_result = GuardrailResult.from_tuple(result)
except Exception as e:
guardrail_result = GuardrailResult(
success=False, result=None, error=f"Guardrail execution error: {str(e)}"
)
crewai_event_bus.emit( crewai_event_bus.emit(
self, self,

View File

@@ -1 +0,0 @@
"""Crew-specific utilities."""

View File

@@ -1,16 +0,0 @@
"""Context management utilities for tracking crew and task execution context using OpenTelemetry baggage."""
from typing import Optional
from opentelemetry import baggage
from crewai.utilities.crew.models import CrewContext
def get_crew_context() -> Optional[CrewContext]:
"""Get the current crew context from OpenTelemetry baggage.
Returns:
CrewContext instance containing crew context information, or None if no context is set
"""
return baggage.get_baggage("crew_context")

View File

@@ -1,16 +0,0 @@
"""Models for crew-related data structures."""
from typing import Optional
from pydantic import BaseModel, Field
class CrewContext(BaseModel):
"""Model representing crew context information."""
id: Optional[str] = Field(
default=None, description="Unique identifier for the crew"
)
key: Optional[str] = Field(
default=None, description="Optional crew key/name for identification"
)

View File

@@ -155,7 +155,6 @@ class CrewEvaluator:
) )
console = Console() console = Console()
console.print("\n")
console.print(table) console.print(table)
def evaluate(self, task_output: TaskOutput): def evaluate(self, task_output: TaskOutput):

View File

@@ -22,10 +22,6 @@ from crewai.utilities.events.llm_events import (
LLMCallStartedEvent, LLMCallStartedEvent,
LLMStreamChunkEvent, LLMStreamChunkEvent,
) )
from crewai.utilities.events.llm_guardrail_events import (
LLMGuardrailStartedEvent,
LLMGuardrailCompletedEvent,
)
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
from .agent_events import ( from .agent_events import (
@@ -374,23 +370,6 @@ class EventListener(BaseEventListener):
print(content, end="", flush=True) print(content, end="", flush=True)
self.next_chunk = self.text_stream.tell() self.next_chunk = self.text_stream.tell()
# ----------- LLM GUARDRAIL EVENTS -----------
@crewai_event_bus.on(LLMGuardrailStartedEvent)
def on_llm_guardrail_started(source, event: LLMGuardrailStartedEvent):
guardrail_str = str(event.guardrail)
guardrail_name = (
guardrail_str[:50] + "..." if len(guardrail_str) > 50 else guardrail_str
)
self.formatter.handle_guardrail_started(guardrail_name, event.retry_count)
@crewai_event_bus.on(LLMGuardrailCompletedEvent)
def on_llm_guardrail_completed(source, event: LLMGuardrailCompletedEvent):
self.formatter.handle_guardrail_completed(
event.success, event.error, event.retry_count
)
@crewai_event_bus.on(CrewTestStartedEvent) @crewai_event_bus.on(CrewTestStartedEvent)
def on_crew_test_started(source, event: CrewTestStartedEvent): def on_crew_test_started(source, event: CrewTestStartedEvent):
cloned_crew = source.copy() cloned_crew = source.copy()

View File

@@ -48,8 +48,8 @@ class LLMCallStartedEvent(LLMEventBase):
""" """
type: str = "llm_call_started" type: str = "llm_call_started"
messages: Optional[Union[str, List[Dict[str, Any]]]] = None messages: Union[str, List[Dict[str, Any]]]
tools: Optional[List[dict[str, Any]]] = None tools: Optional[List[dict]] = None
callbacks: Optional[List[Any]] = None callbacks: Optional[List[Any]] = None
available_functions: Optional[Dict[str, Any]] = None available_functions: Optional[Dict[str, Any]] = None
@@ -58,10 +58,10 @@ class LLMCallCompletedEvent(LLMEventBase):
"""Event emitted when a LLM call completes""" """Event emitted when a LLM call completes"""
type: str = "llm_call_completed" type: str = "llm_call_completed"
messages: str | list[dict[str, Any]] | None = None
response: Any response: Any
call_type: LLMCallType call_type: LLMCallType
class LLMCallFailedEvent(LLMEventBase): class LLMCallFailedEvent(LLMEventBase):
"""Event emitted when a LLM call fails""" """Event emitted when a LLM call fails"""

View File

@@ -1,4 +1,3 @@
from inspect import getsource
from typing import Any, Callable, Optional, Union from typing import Any, Callable, Optional, Union
from crewai.utilities.events.base_events import BaseEvent from crewai.utilities.events.base_events import BaseEvent
@@ -17,26 +16,23 @@ class LLMGuardrailStartedEvent(BaseEvent):
retry_count: int retry_count: int
def __init__(self, **data): def __init__(self, **data):
from inspect import getsource
from crewai.tasks.llm_guardrail import LLMGuardrail from crewai.tasks.llm_guardrail import LLMGuardrail
from crewai.tasks.hallucination_guardrail import HallucinationGuardrail from crewai.tasks.hallucination_guardrail import HallucinationGuardrail
super().__init__(**data) super().__init__(**data)
if isinstance(self.guardrail, (LLMGuardrail, HallucinationGuardrail)): if isinstance(self.guardrail, LLMGuardrail) or isinstance(
self.guardrail, HallucinationGuardrail
):
self.guardrail = self.guardrail.description.strip() self.guardrail = self.guardrail.description.strip()
elif isinstance(self.guardrail, Callable): elif isinstance(self.guardrail, Callable):
self.guardrail = getsource(self.guardrail).strip() self.guardrail = getsource(self.guardrail).strip()
class LLMGuardrailCompletedEvent(BaseEvent): class LLMGuardrailCompletedEvent(BaseEvent):
"""Event emitted when a guardrail task completes """Event emitted when a guardrail task completes"""
Attributes:
success: Whether the guardrail validation passed
result: The validation result
error: Error message if validation failed
retry_count: The number of times the guardrail has been retried
"""
type: str = "llm_guardrail_completed" type: str = "llm_guardrail_completed"
success: bool success: bool

View File

@@ -1473,7 +1473,9 @@ class ConsoleFormatter:
return None return None
memory_branch = branch_to_use.add("") memory_branch = branch_to_use.add("")
self.update_tree_label(memory_branch, "🧠", "Memory Retrieval Started", "blue") self.update_tree_label(
memory_branch, "🧠", "Memory Retrieval Started", "blue"
)
self.print(tree_to_use) self.print(tree_to_use)
self.print() self.print()
@@ -1547,6 +1549,7 @@ class ConsoleFormatter:
if memory_content: if memory_content:
add_panel() add_panel()
def handle_memory_query_completed( def handle_memory_query_completed(
self, self,
agent_branch: Optional[Tree], agent_branch: Optional[Tree],
@@ -1613,8 +1616,11 @@ class ConsoleFormatter:
sources_branch.add(f"{memory_type} - Error: {error}") sources_branch.add(f"{memory_type} - Error: {error}")
break break
def handle_memory_save_started( def handle_memory_save_started(
self, agent_branch: Optional[Tree], crew_tree: Optional[Tree] self,
agent_branch: Optional[Tree],
crew_tree: Optional[Tree]
) -> None: ) -> None:
if not self.verbose: if not self.verbose:
return None return None
@@ -1627,7 +1633,7 @@ class ConsoleFormatter:
for child in tree_to_use.children: for child in tree_to_use.children:
if "Memory Update" in str(child.label): if "Memory Update" in str(child.label):
break break
else: else:
memory_branch = tree_to_use.add("") memory_branch = tree_to_use.add("")
self.update_tree_label( self.update_tree_label(
@@ -1694,62 +1700,4 @@ class ConsoleFormatter:
memory_branch.add(content) memory_branch.add(content)
self.print(tree_to_use) self.print(tree_to_use)
self.print() self.print()
def handle_guardrail_started(
self,
guardrail_name: str,
retry_count: int,
) -> None:
"""Display guardrail evaluation started status.
Args:
guardrail_name: Name/description of the guardrail being evaluated.
retry_count: Zero-based retry count (0 = first attempt).
"""
if not self.verbose:
return
content = self.create_status_content(
"Guardrail Evaluation Started",
guardrail_name,
"yellow",
Status="🔄 Evaluating",
Attempt=f"{retry_count + 1}",
)
self.print_panel(content, "🛡️ Guardrail Check", "yellow")
def handle_guardrail_completed(
self,
success: bool,
error: Optional[str],
retry_count: int,
) -> None:
"""Display guardrail evaluation result.
Args:
success: Whether validation passed.
error: Error message if validation failed.
retry_count: Zero-based retry count.
"""
if not self.verbose:
return
if success:
content = self.create_status_content(
"Guardrail Passed",
"Validation Successful",
"green",
Status="✅ Validated",
Attempts=f"{retry_count + 1}",
)
self.print_panel(content, "🛡️ Guardrail Success", "green")
else:
content = self.create_status_content(
"Guardrail Failed",
"Validation Error",
"red",
Error=str(error) if error else "Unknown error",
Attempts=f"{retry_count + 1}",
)
self.print_panel(content, "🛡️ Guardrail Failed", "red")

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,419 +1,103 @@
import pytest import unittest
from datetime import datetime, timedelta from unittest.mock import MagicMock, patch
import requests import requests
from unittest.mock import MagicMock, patch, call
from crewai.cli.authentication.main import AuthenticationCommand from crewai.cli.authentication.main import AuthenticationCommand
from crewai.cli.authentication.constants import (
AUTH0_AUDIENCE,
AUTH0_CLIENT_ID,
AUTH0_DOMAIN,
WORKOS_DOMAIN,
WORKOS_CLI_CONNECT_APP_ID,
WORKOS_ENVIRONMENT_ID,
)
class TestAuthenticationCommand: class TestAuthenticationCommand(unittest.TestCase):
def setup_method(self): def setUp(self):
self.auth_command = AuthenticationCommand() self.auth_command = AuthenticationCommand()
@pytest.mark.parametrize( @patch("crewai.cli.authentication.main.requests.post")
"user_provider,expected_urls", def test_get_device_code(self, mock_post):
[ mock_response = MagicMock()
( mock_response.json.return_value = {
"auth0", "device_code": "123456",
{ "user_code": "ABCDEF",
"device_code_url": f"https://{AUTH0_DOMAIN}/oauth/device/code", "verification_uri_complete": "https://example.com",
"token_url": f"https://{AUTH0_DOMAIN}/oauth/token", "interval": 5,
"client_id": AUTH0_CLIENT_ID,
"audience": AUTH0_AUDIENCE,
},
),
(
"workos",
{
"device_code_url": f"https://{WORKOS_DOMAIN}/oauth2/device_authorization",
"token_url": f"https://{WORKOS_DOMAIN}/oauth2/token",
"client_id": WORKOS_CLI_CONNECT_APP_ID,
},
),
],
)
@patch(
"crewai.cli.authentication.main.AuthenticationCommand._determine_user_provider"
)
@patch("crewai.cli.authentication.main.AuthenticationCommand._get_device_code")
@patch(
"crewai.cli.authentication.main.AuthenticationCommand._display_auth_instructions"
)
@patch("crewai.cli.authentication.main.AuthenticationCommand._poll_for_token")
@patch("crewai.cli.authentication.main.console.print")
def test_login(
self,
mock_console_print,
mock_poll,
mock_display,
mock_get_device,
mock_determine_provider,
user_provider,
expected_urls,
):
mock_determine_provider.return_value = user_provider
mock_get_device.return_value = {
"device_code": "test_code",
"user_code": "123456",
} }
mock_post.return_value = mock_response
self.auth_command.login() device_code_data = self.auth_command._get_device_code()
mock_console_print.assert_called_once_with( self.assertEqual(device_code_data["device_code"], "123456")
"Signing in to CrewAI Enterprise...\n", style="bold blue" self.assertEqual(device_code_data["user_code"], "ABCDEF")
) self.assertEqual(
mock_determine_provider.assert_called_once() device_code_data["verification_uri_complete"], "https://example.com"
mock_get_device.assert_called_once_with(
expected_urls["client_id"],
expected_urls["device_code_url"],
expected_urls.get("audience", None),
)
mock_display.assert_called_once_with(
{"device_code": "test_code", "user_code": "123456"}
)
mock_poll.assert_called_once_with(
{"device_code": "test_code", "user_code": "123456"},
expected_urls["client_id"],
expected_urls["token_url"],
) )
self.assertEqual(device_code_data["interval"], 5)
@patch("crewai.cli.authentication.main.webbrowser")
@patch("crewai.cli.authentication.main.console.print") @patch("crewai.cli.authentication.main.console.print")
def test_display_auth_instructions(self, mock_console_print, mock_webbrowser): @patch("crewai.cli.authentication.main.webbrowser.open")
def test_display_auth_instructions(self, mock_open, mock_print):
device_code_data = { device_code_data = {
"verification_uri_complete": "https://example.com/auth", "verification_uri_complete": "https://example.com",
"user_code": "123456", "user_code": "ABCDEF",
} }
self.auth_command._display_auth_instructions(device_code_data) self.auth_command._display_auth_instructions(device_code_data)
expected_calls = [ mock_print.assert_any_call("1. Navigate to: ", "https://example.com")
call("1. Navigate to: ", "https://example.com/auth"), mock_print.assert_any_call("2. Enter the following code: ", "ABCDEF")
call("2. Enter the following code: ", "123456"), mock_open.assert_called_once_with("https://example.com")
]
mock_console_print.assert_has_calls(expected_calls)
mock_webbrowser.open.assert_called_once_with("https://example.com/auth")
@pytest.mark.parametrize(
"user_provider,jwt_config",
[
(
"auth0",
{
"jwks_url": f"https://{AUTH0_DOMAIN}/.well-known/jwks.json",
"issuer": f"https://{AUTH0_DOMAIN}/",
"audience": AUTH0_AUDIENCE,
},
),
(
"workos",
{
"jwks_url": f"https://{WORKOS_DOMAIN}/oauth2/jwks",
"issuer": f"https://{WORKOS_DOMAIN}",
"audience": WORKOS_ENVIRONMENT_ID,
},
),
],
)
@pytest.mark.parametrize("has_expiration", [True, False])
@patch("crewai.cli.authentication.main.validate_jwt_token")
@patch("crewai.cli.authentication.main.TokenManager.save_tokens")
def test_validate_and_save_token(
self,
mock_save_tokens,
mock_validate_jwt,
user_provider,
jwt_config,
has_expiration,
):
self.auth_command.user_provider = user_provider
token_data = {"access_token": "test_access_token", "id_token": "test_id_token"}
if has_expiration:
future_timestamp = int((datetime.now() + timedelta(days=100)).timestamp())
decoded_token = {"exp": future_timestamp}
else:
decoded_token = {}
mock_validate_jwt.return_value = decoded_token
self.auth_command._validate_and_save_token(token_data)
mock_validate_jwt.assert_called_once_with(
jwt_token="test_access_token",
jwks_url=jwt_config["jwks_url"],
issuer=jwt_config["issuer"],
audience=jwt_config["audience"],
)
if has_expiration:
mock_save_tokens.assert_called_once_with(
"test_access_token", future_timestamp
)
else:
mock_save_tokens.assert_called_once_with("test_access_token", 0)
@patch("crewai.cli.tools.main.ToolCommand") @patch("crewai.cli.tools.main.ToolCommand")
@patch("crewai.cli.authentication.main.Settings") @patch("crewai.cli.authentication.main.requests.post")
@patch("crewai.cli.authentication.main.validate_token")
@patch("crewai.cli.authentication.main.console.print") @patch("crewai.cli.authentication.main.console.print")
def test_login_to_tool_repository_success( def test_poll_for_token_success(
self, mock_console_print, mock_settings, mock_tool_command self, mock_print, mock_validate_token, mock_post, mock_tool
): ):
mock_tool_instance = MagicMock()
mock_tool_command.return_value = mock_tool_instance
mock_settings_instance = MagicMock()
mock_settings_instance.org_name = "Test Org"
mock_settings_instance.org_uuid = "test-uuid-123"
mock_settings.return_value = mock_settings_instance
self.auth_command._login_to_tool_repository()
mock_tool_command.assert_called_once()
mock_tool_instance.login.assert_called_once()
expected_calls = [
call(
"Now logging you in to the Tool Repository... ",
style="bold blue",
end="",
),
call("Success!\n", style="bold green"),
call(
"You are authenticated to the tool repository as [bold cyan]'Test Org'[/bold cyan] (test-uuid-123)",
style="green",
),
]
mock_console_print.assert_has_calls(expected_calls)
@patch("crewai.cli.tools.main.ToolCommand")
@patch("crewai.cli.authentication.main.console.print")
def test_login_to_tool_repository_error(
self, mock_console_print, mock_tool_command
):
mock_tool_instance = MagicMock()
mock_tool_instance.login.side_effect = Exception("Tool repository error")
mock_tool_command.return_value = mock_tool_instance
self.auth_command._login_to_tool_repository()
mock_tool_command.assert_called_once()
mock_tool_instance.login.assert_called_once()
expected_calls = [
call(
"Now logging you in to the Tool Repository... ",
style="bold blue",
end="",
),
call(
"\n[bold yellow]Warning:[/bold yellow] Authentication with the Tool Repository failed.",
style="yellow",
),
call(
"Other features will work normally, but you may experience limitations with downloading and publishing tools.\nRun [bold]crewai login[/bold] to try logging in again.\n",
style="yellow",
),
]
mock_console_print.assert_has_calls(expected_calls)
@pytest.mark.parametrize(
"api_response,expected_provider",
[
({"provider": "auth0"}, "auth0"),
({"provider": "workos"}, "workos"),
({"provider": "none"}, "workos"), # Default to workos for any other value
(
{},
"workos",
), # Default to workos if no provider key is sent in the response
],
)
@patch("crewai.cli.authentication.main.PlusAPI")
@patch("crewai.cli.authentication.main.console.print")
@patch("builtins.input", return_value="test@example.com")
def test_determine_user_provider_success(
self,
mock_input,
mock_console_print,
mock_plus_api,
api_response,
expected_provider,
):
mock_api_instance = MagicMock()
mock_response = MagicMock() mock_response = MagicMock()
mock_response.status_code = 200 mock_response.status_code = 200
mock_response.json.return_value = api_response
mock_api_instance._make_request.return_value = mock_response
mock_plus_api.return_value = mock_api_instance
result = self.auth_command._determine_user_provider()
mock_input.assert_called_once()
mock_plus_api.assert_called_once_with("")
mock_api_instance._make_request.assert_called_once_with(
"GET", "/crewai_plus/api/v1/me/provider?email=test%40example.com"
)
assert result == expected_provider
@patch("crewai.cli.authentication.main.PlusAPI")
@patch("crewai.cli.authentication.main.console.print")
@patch("builtins.input", return_value="test@example.com")
def test_determine_user_provider_error(
self, mock_input, mock_console_print, mock_plus_api
):
mock_api_instance = MagicMock()
mock_response = MagicMock()
mock_response.status_code = 500
mock_api_instance._make_request.return_value = mock_response
mock_plus_api.return_value = mock_api_instance
with pytest.raises(SystemExit):
self.auth_command._determine_user_provider()
mock_input.assert_called_once()
mock_plus_api.assert_called_once_with("")
mock_api_instance._make_request.assert_called_once_with(
"GET", "/crewai_plus/api/v1/me/provider?email=test%40example.com"
)
mock_console_print.assert_has_calls(
[
call(
"Enter your CrewAI Enterprise account email: ",
style="bold blue",
end="",
),
call(
"Error: Failed to authenticate with crewai enterprise. Ensure that you are using the latest crewai version and please try again. If the problem persists, contact support@crewai.com.",
style="red",
),
]
)
@patch("requests.post")
def test_get_device_code(self, mock_post):
mock_response = MagicMock()
mock_response.json.return_value = { mock_response.json.return_value = {
"device_code": "test_device_code", "id_token": "TOKEN",
"user_code": "123456", "access_token": "ACCESS_TOKEN",
"verification_uri_complete": "https://example.com/auth",
} }
mock_post.return_value = mock_response mock_post.return_value = mock_response
result = self.auth_command._get_device_code( mock_instance = mock_tool.return_value
client_id="test_client", mock_instance.login.return_value = None
device_code_url="https://example.com/device",
audience="test_audience", self.auth_command._poll_for_token({"device_code": "123456"})
mock_validate_token.assert_called_once_with("TOKEN")
mock_print.assert_called_once_with(
"\n[bold green]Welcome to CrewAI Enterprise![/bold green]\n"
) )
mock_post.assert_called_once_with( @patch("crewai.cli.authentication.main.requests.post")
url="https://example.com/device",
data={
"client_id": "test_client",
"scope": "openid",
"audience": "test_audience",
},
timeout=20,
)
assert result == {
"device_code": "test_device_code",
"user_code": "123456",
"verification_uri_complete": "https://example.com/auth",
}
@patch("requests.post")
@patch("crewai.cli.authentication.main.console.print") @patch("crewai.cli.authentication.main.console.print")
def test_poll_for_token_success(self, mock_console_print, mock_post): def test_poll_for_token_error(self, mock_print, mock_post):
mock_response_success = MagicMock() mock_response = MagicMock()
mock_response_success.status_code = 200 mock_response.status_code = 400
mock_response_success.json.return_value = { mock_response.json.return_value = {
"access_token": "test_access_token", "error": "invalid_request",
"id_token": "test_id_token", "error_description": "Invalid request",
} }
mock_post.return_value = mock_response_success mock_post.return_value = mock_response
device_code_data = {"device_code": "test_device_code", "interval": 1} with self.assertRaises(requests.HTTPError):
self.auth_command._poll_for_token({"device_code": "123456"})
with ( mock_print.assert_not_called()
patch.object(
self.auth_command, "_validate_and_save_token"
) as mock_validate,
patch.object(
self.auth_command, "_login_to_tool_repository"
) as mock_tool_login,
):
self.auth_command._poll_for_token(
device_code_data, "test_client", "https://example.com/token"
)
mock_post.assert_called_once_with( @patch("crewai.cli.authentication.main.requests.post")
"https://example.com/token",
data={
"grant_type": "urn:ietf:params:oauth:grant-type:device_code",
"device_code": "test_device_code",
"client_id": "test_client",
},
timeout=30,
)
mock_validate.assert_called_once()
mock_tool_login.assert_called_once()
expected_calls = [
call("\nWaiting for authentication... ", style="bold blue", end=""),
call("Success!", style="bold green"),
call("\n[bold green]Welcome to CrewAI Enterprise![/bold green]\n"),
]
mock_console_print.assert_has_calls(expected_calls)
@patch("requests.post")
@patch("crewai.cli.authentication.main.console.print") @patch("crewai.cli.authentication.main.console.print")
def test_poll_for_token_timeout(self, mock_console_print, mock_post): def test_poll_for_token_timeout(self, mock_print, mock_post):
mock_response_pending = MagicMock() mock_response = MagicMock()
mock_response_pending.status_code = 400 mock_response.status_code = 400
mock_response_pending.json.return_value = {"error": "authorization_pending"} mock_response.json.return_value = {
mock_post.return_value = mock_response_pending "error": "authorization_pending",
"error_description": "Authorization pending",
device_code_data = {
"device_code": "test_device_code",
"interval": 0.1, # Short interval for testing
} }
mock_post.return_value = mock_response
self.auth_command._poll_for_token( self.auth_command._poll_for_token({"device_code": "123456", "interval": 0.01})
device_code_data, "test_client", "https://example.com/token"
)
mock_console_print.assert_any_call( mock_print.assert_called_once_with(
"Timeout: Failed to get the token. Please try again.", style="bold red" "Timeout: Failed to get the token. Please try again.", style="bold red"
) )
@patch("requests.post")
def test_poll_for_token_error(self, mock_post):
"""Test the method to poll for token (error path)."""
# Setup mock to return error
mock_response_error = MagicMock()
mock_response_error.status_code = 400
mock_response_error.json.return_value = {
"error": "access_denied",
"error_description": "User denied access",
}
mock_post.return_value = mock_response_error
device_code_data = {"device_code": "test_device_code", "interval": 1}
with pytest.raises(requests.HTTPError):
self.auth_command._poll_for_token(
device_code_data, "test_client", "https://example.com/token"
)

View File

@@ -1,110 +1,31 @@
import json import json
import jwt
import unittest import unittest
from datetime import datetime, timedelta from datetime import datetime, timedelta
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
from cryptography.fernet import Fernet from cryptography.fernet import Fernet
from crewai.cli.authentication.utils import TokenManager, validate_jwt_token from crewai.cli.authentication.utils import TokenManager, validate_token
@patch("crewai.cli.authentication.utils.PyJWKClient", return_value=MagicMock())
@patch("crewai.cli.authentication.utils.jwt")
class TestValidateToken(unittest.TestCase): class TestValidateToken(unittest.TestCase):
def test_validate_jwt_token(self, mock_jwt, mock_pyjwkclient): @patch("crewai.cli.authentication.utils.AsymmetricSignatureVerifier")
mock_jwt.decode.return_value = {"exp": 1719859200} @patch("crewai.cli.authentication.utils.TokenVerifier")
def test_validate_token(self, mock_token_verifier, mock_asymmetric_verifier):
mock_verifier_instance = mock_token_verifier.return_value
mock_id_token = "mock_id_token"
# Create signing key object mock with a .key attribute validate_token(mock_id_token)
mock_pyjwkclient.return_value.get_signing_key_from_jwt.return_value = MagicMock(
key="mock_signing_key" mock_asymmetric_verifier.assert_called_once_with(
"https://crewai.us.auth0.com/.well-known/jwks.json"
) )
mock_token_verifier.assert_called_once_with(
decoded_token = validate_jwt_token( signature_verifier=mock_asymmetric_verifier.return_value,
jwt_token="aaaaa.bbbbbb.cccccc", issuer="https://crewai.us.auth0.com/",
jwks_url="https://mock_jwks_url", audience="DEVC5Fw6NlRoSzmDCcOhVq85EfLBjKa8",
issuer="https://mock_issuer",
audience="app_id_xxxx",
) )
mock_verifier_instance.verify.assert_called_once_with(mock_id_token)
mock_jwt.decode.assert_called_with(
"aaaaa.bbbbbb.cccccc",
"mock_signing_key",
algorithms=["RS256"],
audience="app_id_xxxx",
issuer="https://mock_issuer",
options={
"verify_signature": True,
"verify_exp": True,
"verify_nbf": True,
"verify_iat": True,
"require": ["exp", "iat", "iss", "aud", "sub"],
},
)
mock_pyjwkclient.assert_called_once_with("https://mock_jwks_url")
self.assertEqual(decoded_token, {"exp": 1719859200})
def test_validate_jwt_token_expired(self, mock_jwt, mock_pyjwkclient):
mock_jwt.decode.side_effect = jwt.ExpiredSignatureError
with self.assertRaises(Exception):
validate_jwt_token(
jwt_token="aaaaa.bbbbbb.cccccc",
jwks_url="https://mock_jwks_url",
issuer="https://mock_issuer",
audience="app_id_xxxx",
)
def test_validate_jwt_token_invalid_audience(self, mock_jwt, mock_pyjwkclient):
mock_jwt.decode.side_effect = jwt.InvalidAudienceError
with self.assertRaises(Exception):
validate_jwt_token(
jwt_token="aaaaa.bbbbbb.cccccc",
jwks_url="https://mock_jwks_url",
issuer="https://mock_issuer",
audience="app_id_xxxx",
)
def test_validate_jwt_token_invalid_issuer(self, mock_jwt, mock_pyjwkclient):
mock_jwt.decode.side_effect = jwt.InvalidIssuerError
with self.assertRaises(Exception):
validate_jwt_token(
jwt_token="aaaaa.bbbbbb.cccccc",
jwks_url="https://mock_jwks_url",
issuer="https://mock_issuer",
audience="app_id_xxxx",
)
def test_validate_jwt_token_missing_required_claims(
self, mock_jwt, mock_pyjwkclient
):
mock_jwt.decode.side_effect = jwt.MissingRequiredClaimError
with self.assertRaises(Exception):
validate_jwt_token(
jwt_token="aaaaa.bbbbbb.cccccc",
jwks_url="https://mock_jwks_url",
issuer="https://mock_issuer",
audience="app_id_xxxx",
)
def test_validate_jwt_token_jwks_error(self, mock_jwt, mock_pyjwkclient):
mock_jwt.decode.side_effect = jwt.exceptions.PyJWKClientError
with self.assertRaises(Exception):
validate_jwt_token(
jwt_token="aaaaa.bbbbbb.cccccc",
jwks_url="https://mock_jwks_url",
issuer="https://mock_issuer",
audience="app_id_xxxx",
)
def test_validate_jwt_token_invalid_token(self, mock_jwt, mock_pyjwkclient):
mock_jwt.decode.side_effect = jwt.InvalidTokenError
with self.assertRaises(Exception):
validate_jwt_token(
jwt_token="aaaaa.bbbbbb.cccccc",
jwks_url="https://mock_jwks_url",
issuer="https://mock_issuer",
audience="app_id_xxxx",
)
class TestTokenManager(unittest.TestCase): class TestTokenManager(unittest.TestCase):
@@ -141,9 +62,9 @@ class TestTokenManager(unittest.TestCase):
@patch("crewai.cli.authentication.utils.TokenManager.save_secure_file") @patch("crewai.cli.authentication.utils.TokenManager.save_secure_file")
def test_save_tokens(self, mock_save): def test_save_tokens(self, mock_save):
access_token = "test_token" access_token = "test_token"
expires_at = int((datetime.now() + timedelta(seconds=3600)).timestamp()) expires_in = 3600
self.token_manager.save_tokens(access_token, expires_at) self.token_manager.save_tokens(access_token, expires_in)
mock_save.assert_called_once() mock_save.assert_called_once()
args = mock_save.call_args[0] args = mock_save.call_args[0]
@@ -152,7 +73,11 @@ class TestTokenManager(unittest.TestCase):
data = json.loads(decrypted_data) data = json.loads(decrypted_data)
self.assertEqual(data["access_token"], access_token) self.assertEqual(data["access_token"], access_token)
expiration = datetime.fromisoformat(data["expiration"]) expiration = datetime.fromisoformat(data["expiration"])
self.assertEqual(expiration, datetime.fromtimestamp(expires_at)) self.assertAlmostEqual(
expiration,
datetime.now() + timedelta(seconds=expires_in),
delta=timedelta(seconds=1),
)
@patch("crewai.cli.authentication.utils.TokenManager.read_secure_file") @patch("crewai.cli.authentication.utils.TokenManager.read_secure_file")
def test_get_token_valid(self, mock_read): def test_get_token_valid(self, mock_read):

View File

@@ -13,7 +13,7 @@ from crewai.cli.cli import (
deply_status, deply_status,
flow_add_crew, flow_add_crew,
reset_memories, reset_memories,
login, signup,
test, test,
train, train,
version, version,
@@ -261,12 +261,12 @@ def test_test_invalid_string_iterations(evaluate_crew, runner):
@mock.patch("crewai.cli.cli.AuthenticationCommand") @mock.patch("crewai.cli.cli.AuthenticationCommand")
def test_login(command, runner): def test_signup(command, runner):
mock_auth = command.return_value mock_auth = command.return_value
result = runner.invoke(login) result = runner.invoke(signup)
assert result.exit_code == 0 assert result.exit_code == 0
mock_auth.login.assert_called_once() mock_auth.signup.assert_called_once()
@mock.patch("crewai.cli.cli.DeployCommand") @mock.patch("crewai.cli.cli.DeployCommand")

View File

@@ -2,7 +2,6 @@ import os
import tempfile import tempfile
import unittest import unittest
import unittest.mock import unittest.mock
from datetime import datetime, timedelta
from contextlib import contextmanager from contextlib import contextmanager
from unittest import mock from unittest import mock
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
@@ -27,9 +26,7 @@ def in_temp_dir():
@pytest.fixture @pytest.fixture
def tool_command(): def tool_command():
TokenManager().save_tokens( TokenManager().save_tokens("test-token", 36000)
"test-token", (datetime.now() + timedelta(seconds=36000)).timestamp()
)
tool_command = ToolCommand() tool_command = ToolCommand()
with patch.object(tool_command, "login"): with patch.object(tool_command, "login"):
yield tool_command yield tool_command
@@ -60,9 +57,7 @@ def test_create_success(mock_subprocess, capsys, tool_command):
@patch("crewai.cli.tools.main.subprocess.run") @patch("crewai.cli.tools.main.subprocess.run")
@patch("crewai.cli.plus_api.PlusAPI.get_tool") @patch("crewai.cli.plus_api.PlusAPI.get_tool")
@patch("crewai.cli.tools.main.ToolCommand._print_current_organization") @patch("crewai.cli.tools.main.ToolCommand._print_current_organization")
def test_install_success( def test_install_success(mock_print_org, mock_get, mock_subprocess_run, capsys, tool_command):
mock_print_org, mock_get, mock_subprocess_run, capsys, tool_command
):
mock_get_response = MagicMock() mock_get_response = MagicMock()
mock_get_response.status_code = 200 mock_get_response.status_code = 200
mock_get_response.json.return_value = { mock_get_response.json.return_value = {
@@ -94,7 +89,6 @@ def test_install_success(
# Verify _print_current_organization was called # Verify _print_current_organization was called
mock_print_org.assert_called_once() mock_print_org.assert_called_once()
@patch("crewai.cli.tools.main.subprocess.run") @patch("crewai.cli.tools.main.subprocess.run")
@patch("crewai.cli.plus_api.PlusAPI.get_tool") @patch("crewai.cli.plus_api.PlusAPI.get_tool")
def test_install_success_from_pypi(mock_get, mock_subprocess_run, capsys, tool_command): def test_install_success_from_pypi(mock_get, mock_subprocess_run, capsys, tool_command):
@@ -175,10 +169,7 @@ def test_publish_when_not_in_sync(mock_is_synced, capsys, tool_command):
) )
@patch("crewai.cli.plus_api.PlusAPI.publish_tool") @patch("crewai.cli.plus_api.PlusAPI.publish_tool")
@patch("crewai.cli.tools.main.git.Repository.is_synced", return_value=False) @patch("crewai.cli.tools.main.git.Repository.is_synced", return_value=False)
@patch( @patch("crewai.cli.tools.main.extract_available_exports", return_value=[{"name": "SampleTool"}])
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
@patch("crewai.cli.tools.main.ToolCommand._print_current_organization") @patch("crewai.cli.tools.main.ToolCommand._print_current_organization")
def test_publish_when_not_in_sync_and_force( def test_publish_when_not_in_sync_and_force(
mock_print_org, mock_print_org,
@@ -232,10 +223,7 @@ def test_publish_when_not_in_sync_and_force(
) )
@patch("crewai.cli.plus_api.PlusAPI.publish_tool") @patch("crewai.cli.plus_api.PlusAPI.publish_tool")
@patch("crewai.cli.tools.main.git.Repository.is_synced", return_value=True) @patch("crewai.cli.tools.main.git.Repository.is_synced", return_value=True)
@patch( @patch("crewai.cli.tools.main.extract_available_exports", return_value=[{"name": "SampleTool"}])
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
def test_publish_success( def test_publish_success(
mock_available_exports, mock_available_exports,
mock_is_synced, mock_is_synced,
@@ -285,10 +273,7 @@ def test_publish_success(
read_data=b"sample tarball content", read_data=b"sample tarball content",
) )
@patch("crewai.cli.plus_api.PlusAPI.publish_tool") @patch("crewai.cli.plus_api.PlusAPI.publish_tool")
@patch( @patch("crewai.cli.tools.main.extract_available_exports", return_value=[{"name": "SampleTool"}])
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
def test_publish_failure( def test_publish_failure(
mock_available_exports, mock_available_exports,
mock_publish, mock_publish,
@@ -326,10 +311,7 @@ def test_publish_failure(
read_data=b"sample tarball content", read_data=b"sample tarball content",
) )
@patch("crewai.cli.plus_api.PlusAPI.publish_tool") @patch("crewai.cli.plus_api.PlusAPI.publish_tool")
@patch( @patch("crewai.cli.tools.main.extract_available_exports", return_value=[{"name": "SampleTool"}])
"crewai.cli.tools.main.extract_available_exports",
return_value=[{"name": "SampleTool"}],
)
def test_publish_api_error( def test_publish_api_error(
mock_available_exports, mock_available_exports,
mock_publish, mock_publish,
@@ -356,6 +338,7 @@ def test_publish_api_error(
mock_publish.assert_called_once() mock_publish.assert_called_once()
@patch("crewai.cli.tools.main.Settings") @patch("crewai.cli.tools.main.Settings")
def test_print_current_organization_with_org(mock_settings, capsys, tool_command): def test_print_current_organization_with_org(mock_settings, capsys, tool_command):
mock_settings_instance = MagicMock() mock_settings_instance = MagicMock()

View File

@@ -52,7 +52,6 @@ from crewai.utilities.events.memory_events import (
MemoryRetrievalCompletedEvent, MemoryRetrievalCompletedEvent,
) )
@pytest.fixture @pytest.fixture
def ceo(): def ceo():
return Agent( return Agent(
@@ -936,27 +935,12 @@ def test_cache_hitting_between_agents(researcher, writer, ceo):
read.return_value = "12" read.return_value = "12"
crew.kickoff() crew.kickoff()
assert read.call_count == 2, "read was not called exactly twice" assert read.call_count == 2, "read was not called exactly twice"
# Check if read was called with the expected arguments
# Filter the mock calls to only include the ones with 'tool' and 'input' keywords expected_calls = [
cache_calls = [ call(tool="multiplier", input={"first_number": 2, "second_number": 6}),
call call(tool="multiplier", input={"first_number": 2, "second_number": 6}),
for call in read.call_args_list
if len(call.kwargs) == 2
and "tool" in call.kwargs
and "input" in call.kwargs
] ]
read.assert_has_calls(expected_calls, any_order=False)
# Check if we have the expected number of cache calls
assert len(cache_calls) == 2, f"Expected 2 cache calls, got {len(cache_calls)}"
# Check if both calls were made with the expected arguments
expected_call = call(
tool="multiplier", input={"first_number": 2, "second_number": 6}
)
assert cache_calls[0] == expected_call, f"First call mismatch: {cache_calls[0]}"
assert (
cache_calls[1] == expected_call
), f"Second call mismatch: {cache_calls[1]}"
@pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.vcr(filter_headers=["authorization"])
@@ -1813,7 +1797,7 @@ def test_hierarchical_kickoff_usage_metrics_include_manager(researcher):
agent=researcher, # *regular* agent agent=researcher, # *regular* agent
) )
# ── 2. Stub out each agent's _token_process.get_summary() ─────────────────── # ── 2. Stub out each agents _token_process.get_summary() ───────────────────
researcher_metrics = UsageMetrics( researcher_metrics = UsageMetrics(
total_tokens=120, prompt_tokens=80, completion_tokens=40, successful_requests=2 total_tokens=120, prompt_tokens=80, completion_tokens=40, successful_requests=2
) )
@@ -1837,7 +1821,7 @@ def test_hierarchical_kickoff_usage_metrics_include_manager(researcher):
process=Process.hierarchical, process=Process.hierarchical,
) )
# We don't care about LLM output here; patch execute_sync to avoid network # We dont care about LLM output here; patch execute_sync to avoid network
with patch.object( with patch.object(
Task, Task,
"execute_sync", "execute_sync",
@@ -2505,19 +2489,17 @@ def test_using_contextual_memory():
memory=True, memory=True,
) )
with patch.object( with patch.object(ContextualMemory, "build_context_for_task", return_value="") as contextual_mem:
ContextualMemory, "build_context_for_task", return_value=""
) as contextual_mem:
crew.kickoff() crew.kickoff()
contextual_mem.assert_called_once() contextual_mem.assert_called_once()
@pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.vcr(filter_headers=["authorization"])
def test_memory_events_are_emitted(): def test_memory_events_are_emitted():
events = defaultdict(list) events = defaultdict(list)
with crewai_event_bus.scoped_handlers(): with crewai_event_bus.scoped_handlers():
@crewai_event_bus.on(MemorySaveStartedEvent) @crewai_event_bus.on(MemorySaveStartedEvent)
def handle_memory_save_started(source, event): def handle_memory_save_started(source, event):
events["MemorySaveStartedEvent"].append(event) events["MemorySaveStartedEvent"].append(event)
@@ -2580,7 +2562,6 @@ def test_memory_events_are_emitted():
assert len(events["MemoryRetrievalStartedEvent"]) == 1 assert len(events["MemoryRetrievalStartedEvent"]) == 1
assert len(events["MemoryRetrievalCompletedEvent"]) == 1 assert len(events["MemoryRetrievalCompletedEvent"]) == 1
@pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.vcr(filter_headers=["authorization"])
def test_using_contextual_memory_with_long_term_memory(): def test_using_contextual_memory_with_long_term_memory():
from unittest.mock import patch from unittest.mock import patch
@@ -2604,9 +2585,7 @@ def test_using_contextual_memory_with_long_term_memory():
long_term_memory=LongTermMemory(), long_term_memory=LongTermMemory(),
) )
with patch.object( with patch.object(ContextualMemory, "build_context_for_task", return_value="") as contextual_mem:
ContextualMemory, "build_context_for_task", return_value=""
) as contextual_mem:
crew.kickoff() crew.kickoff()
contextual_mem.assert_called_once() contextual_mem.assert_called_once()
assert crew.memory is False assert crew.memory is False
@@ -2707,9 +2686,7 @@ def test_using_contextual_memory_with_short_term_memory():
short_term_memory=ShortTermMemory(), short_term_memory=ShortTermMemory(),
) )
with patch.object( with patch.object(ContextualMemory, "build_context_for_task", return_value="") as contextual_mem:
ContextualMemory, "build_context_for_task", return_value=""
) as contextual_mem:
crew.kickoff() crew.kickoff()
contextual_mem.assert_called_once() contextual_mem.assert_called_once()
assert crew.memory is False assert crew.memory is False
@@ -2738,9 +2715,7 @@ def test_disabled_memory_using_contextual_memory():
memory=False, memory=False,
) )
with patch.object( with patch.object(ContextualMemory, "build_context_for_task", return_value="") as contextual_mem:
ContextualMemory, "build_context_for_task", return_value=""
) as contextual_mem:
crew.kickoff() crew.kickoff()
contextual_mem.assert_not_called() contextual_mem.assert_not_called()

View File

@@ -1,28 +0,0 @@
import pytest
from unittest.mock import MagicMock
from crewai.agent import Agent
from crewai.task import Task
class BaseEvaluationMetricsTest:
@pytest.fixture
def mock_agent(self):
agent = MagicMock(spec=Agent)
agent.id = "test_agent_id"
agent.role = "Test Agent"
agent.goal = "Test goal"
agent.tools = []
return agent
@pytest.fixture
def mock_task(self):
task = MagicMock(spec=Task)
task.description = "Test task description"
task.expected_output = "Test expected output"
return task
@pytest.fixture
def execution_trace(self):
return {
"thinking": ["I need to analyze this data carefully"],
"actions": ["Gathered information", "Analyzed data"]
}

View File

@@ -1,59 +0,0 @@
from unittest.mock import patch, MagicMock
from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest
from crewai.evaluation.base_evaluator import EvaluationScore
from crewai.evaluation.metrics.goal_metrics import GoalAlignmentEvaluator
from crewai.utilities.llm_utils import LLM
class TestGoalAlignmentEvaluator(BaseEvaluationMetricsTest):
@patch("crewai.utilities.llm_utils.create_llm")
def test_evaluate_success(self, mock_create_llm, mock_agent, mock_task, execution_trace):
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"score": 8.5,
"feedback": "The agent correctly understood the task and produced relevant output."
}
"""
mock_create_llm.return_value = mock_llm
evaluator = GoalAlignmentEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="This is the final output"
)
assert isinstance(result, EvaluationScore)
assert result.score == 8.5
assert "correctly understood the task" in result.feedback
mock_llm.call.assert_called_once()
prompt = mock_llm.call.call_args[0][0]
assert len(prompt) >= 2
assert "system" in prompt[0]["role"]
assert "user" in prompt[1]["role"]
assert mock_agent.role in prompt[1]["content"]
assert mock_task.description in prompt[1]["content"]
@patch("crewai.utilities.llm_utils.create_llm")
def test_evaluate_error_handling(self, mock_create_llm, mock_agent, mock_task, execution_trace):
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = "Invalid JSON response"
mock_create_llm.return_value = mock_llm
evaluator = GoalAlignmentEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="This is the final output"
)
assert isinstance(result, EvaluationScore)
assert result.score is None
assert "Failed to parse" in result.feedback

View File

@@ -1,166 +0,0 @@
import pytest
from unittest.mock import patch, MagicMock
from typing import List, Dict, Any
from crewai.tasks.task_output import TaskOutput
from crewai.evaluation.metrics.reasoning_metrics import (
ReasoningEfficiencyEvaluator,
)
from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest
from crewai.utilities.llm_utils import LLM
from crewai.evaluation.base_evaluator import EvaluationScore
class TestReasoningEfficiencyEvaluator(BaseEvaluationMetricsTest):
@pytest.fixture
def mock_output(self):
output = MagicMock(spec=TaskOutput)
output.raw = "This is the test output"
return output
@pytest.fixture
def llm_calls(self) -> List[Dict[str, Any]]:
return [
{
"prompt": "How should I approach this task?",
"response": "I'll first research the topic, then compile findings.",
"timestamp": 1626987654
},
{
"prompt": "What resources should I use?",
"response": "I'll use relevant academic papers and reliable websites.",
"timestamp": 1626987754
},
{
"prompt": "How should I structure the output?",
"response": "I'll organize information clearly with headings and bullet points.",
"timestamp": 1626987854
}
]
def test_insufficient_llm_calls(self, mock_agent, mock_task, mock_output):
execution_trace = {"llm_calls": []}
evaluator = ReasoningEfficiencyEvaluator()
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output=mock_output
)
assert isinstance(result, EvaluationScore)
assert result.score is None
assert "Insufficient LLM calls" in result.feedback
@patch("crewai.utilities.llm_utils.create_llm")
def test_successful_evaluation(self, mock_create_llm, mock_agent, mock_task, mock_output, llm_calls):
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"scores": {
"focus": 8.0,
"progression": 7.0,
"decision_quality": 7.5,
"conciseness": 8.0,
"loop_avoidance": 9.0
},
"overall_score": 7.9,
"feedback": "The agent demonstrated good reasoning efficiency.",
"optimization_suggestions": "The agent could improve by being more concise."
}
"""
mock_create_llm.return_value = mock_llm
# Setup execution trace with sufficient LLM calls
execution_trace = {"llm_calls": llm_calls}
# Mock the _detect_loops method to return a simple result
evaluator = ReasoningEfficiencyEvaluator(llm=mock_llm)
evaluator._detect_loops = MagicMock(return_value=(False, []))
# Evaluate
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output=mock_output
)
# Assertions
assert isinstance(result, EvaluationScore)
assert result.score == 7.9
assert "The agent demonstrated good reasoning efficiency" in result.feedback
assert "Reasoning Efficiency Evaluation:" in result.feedback
assert "• Focus: 8.0/10" in result.feedback
# Verify LLM was called
mock_llm.call.assert_called_once()
@patch("crewai.utilities.llm_utils.create_llm")
def test_parse_error_handling(self, mock_create_llm, mock_agent, mock_task, mock_output, llm_calls):
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = "Invalid JSON response"
mock_create_llm.return_value = mock_llm
# Setup execution trace
execution_trace = {"llm_calls": llm_calls}
# Mock the _detect_loops method
evaluator = ReasoningEfficiencyEvaluator(llm=mock_llm)
evaluator._detect_loops = MagicMock(return_value=(False, []))
# Evaluate
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output=mock_output
)
# Assertions for error handling
assert isinstance(result, EvaluationScore)
assert result.score is None
assert "Failed to parse reasoning efficiency evaluation" in result.feedback
@patch("crewai.utilities.llm_utils.create_llm")
def test_loop_detection(self, mock_create_llm, mock_agent, mock_task, mock_output):
# Setup LLM calls with a repeating pattern
repetitive_llm_calls = [
{"prompt": "How to solve?", "response": "I'll try method A", "timestamp": 1000},
{"prompt": "Let me try method A", "response": "It didn't work", "timestamp": 1100},
{"prompt": "How to solve?", "response": "I'll try method A again", "timestamp": 1200},
{"prompt": "Let me try method A", "response": "It didn't work", "timestamp": 1300},
{"prompt": "How to solve?", "response": "I'll try method A one more time", "timestamp": 1400}
]
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"scores": {
"focus": 6.0,
"progression": 3.0,
"decision_quality": 4.0,
"conciseness": 6.0,
"loop_avoidance": 2.0
},
"overall_score": 4.2,
"feedback": "The agent is stuck in a reasoning loop.",
"optimization_suggestions": "The agent should try different approaches when one fails."
}
"""
mock_create_llm.return_value = mock_llm
execution_trace = {"llm_calls": repetitive_llm_calls}
evaluator = ReasoningEfficiencyEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output=mock_output
)
assert isinstance(result, EvaluationScore)
assert result.score == 4.2
assert "• Loop Avoidance: 2.0/10" in result.feedback

View File

@@ -1,82 +0,0 @@
from unittest.mock import patch, MagicMock
from crewai.evaluation.base_evaluator import EvaluationScore
from crewai.evaluation.metrics.semantic_quality_metrics import SemanticQualityEvaluator
from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest
from crewai.utilities.llm_utils import LLM
class TestSemanticQualityEvaluator(BaseEvaluationMetricsTest):
@patch("crewai.utilities.llm_utils.create_llm")
def test_evaluate_success(self, mock_create_llm, mock_agent, mock_task, execution_trace):
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"score": 8.5,
"feedback": "The output is clear, coherent, and logically structured."
}
"""
mock_create_llm.return_value = mock_llm
evaluator = SemanticQualityEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="This is a well-structured analysis of the data."
)
assert isinstance(result, EvaluationScore)
assert result.score == 8.5
assert "clear, coherent" in result.feedback
mock_llm.call.assert_called_once()
prompt = mock_llm.call.call_args[0][0]
assert len(prompt) >= 2
assert "system" in prompt[0]["role"]
assert "user" in prompt[1]["role"]
assert mock_agent.role in prompt[1]["content"]
assert mock_task.description in prompt[1]["content"]
@patch("crewai.utilities.llm_utils.create_llm")
def test_evaluate_with_empty_output(self, mock_create_llm, mock_agent, mock_task, execution_trace):
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"score": 2.0,
"feedback": "The output is empty or minimal, lacking substance."
}
"""
mock_create_llm.return_value = mock_llm
evaluator = SemanticQualityEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output=""
)
assert isinstance(result, EvaluationScore)
assert result.score == 2.0
assert "empty or minimal" in result.feedback
@patch("crewai.utilities.llm_utils.create_llm")
def test_evaluate_error_handling(self, mock_create_llm, mock_agent, mock_task, execution_trace):
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = "Invalid JSON response"
mock_create_llm.return_value = mock_llm
evaluator = SemanticQualityEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="This is the output."
)
assert isinstance(result, EvaluationScore)
assert result.score is None
assert "Failed to parse" in result.feedback

View File

@@ -1,230 +0,0 @@
from unittest.mock import patch, MagicMock
from crewai.evaluation.metrics.tools_metrics import (
ToolSelectionEvaluator,
ParameterExtractionEvaluator,
ToolInvocationEvaluator
)
from crewai.utilities.llm_utils import LLM
from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest
class TestToolSelectionEvaluator(BaseEvaluationMetricsTest):
def test_no_tools_available(self, mock_task, mock_agent):
# Create agent with no tools
mock_agent.tools = []
execution_trace = {"tool_uses": []}
evaluator = ToolSelectionEvaluator()
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="Final output"
)
assert result.score is None
assert "no tools available" in result.feedback.lower()
def test_tools_available_but_none_used(self, mock_agent, mock_task):
mock_agent.tools = ["tool1", "tool2"]
execution_trace = {"tool_uses": []}
evaluator = ToolSelectionEvaluator()
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="Final output"
)
assert result.score is None
assert "had tools available but didn't use any" in result.feedback.lower()
@patch("crewai.utilities.llm_utils.create_llm")
def test_successful_evaluation(self, mock_create_llm, mock_agent, mock_task):
# Setup mock LLM response
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"overall_score": 8.5,
"feedback": "The agent made good tool selections."
}
"""
mock_create_llm.return_value = mock_llm
# Setup execution trace with tool uses
execution_trace = {
"tool_uses": [
{"tool": "search_tool", "input": {"query": "test query"}, "output": "search results"},
{"tool": "calculator", "input": {"expression": "2+2"}, "output": "4"}
]
}
evaluator = ToolSelectionEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="Final output"
)
assert result.score == 8.5
assert "The agent made good tool selections" in result.feedback
# Verify LLM was called with correct prompt
mock_llm.call.assert_called_once()
prompt = mock_llm.call.call_args[0][0]
assert isinstance(prompt, list)
assert len(prompt) >= 2
assert "system" in prompt[0]["role"]
assert "user" in prompt[1]["role"]
class TestParameterExtractionEvaluator(BaseEvaluationMetricsTest):
def test_no_tool_uses(self, mock_agent, mock_task):
execution_trace = {"tool_uses": []}
evaluator = ParameterExtractionEvaluator()
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="Final output"
)
assert result.score is None
assert "no tool usage" in result.feedback.lower()
@patch("crewai.utilities.llm_utils.create_llm")
def test_successful_evaluation(self, mock_create_llm, mock_agent, mock_task):
mock_agent.tools = ["tool1", "tool2"]
# Setup mock LLM response
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"overall_score": 9.0,
"feedback": "The agent extracted parameters correctly."
}
"""
mock_create_llm.return_value = mock_llm
# Setup execution trace with tool uses
execution_trace = {
"tool_uses": [
{
"tool": "search_tool",
"input": {"query": "test query"},
"output": "search results",
"error": None
},
{
"tool": "calculator",
"input": {"expression": "2+2"},
"output": "4",
"error": None
}
]
}
evaluator = ParameterExtractionEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="Final output"
)
assert result.score == 9.0
assert "The agent extracted parameters correctly" in result.feedback
class TestToolInvocationEvaluator(BaseEvaluationMetricsTest):
def test_no_tool_uses(self, mock_agent, mock_task):
execution_trace = {"tool_uses": []}
evaluator = ToolInvocationEvaluator()
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="Final output"
)
assert result.score is None
assert "no tool usage" in result.feedback.lower()
@patch("crewai.utilities.llm_utils.create_llm")
def test_successful_evaluation(self, mock_create_llm, mock_agent, mock_task):
mock_agent.tools = ["tool1", "tool2"]
# Setup mock LLM response
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"overall_score": 8.0,
"feedback": "The agent invoked tools correctly."
}
"""
mock_create_llm.return_value = mock_llm
# Setup execution trace with tool uses
execution_trace = {
"tool_uses": [
{"tool": "search_tool", "input": {"query": "test query"}, "output": "search results"},
{"tool": "calculator", "input": {"expression": "2+2"}, "output": "4"}
]
}
evaluator = ToolInvocationEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="Final output"
)
assert result.score == 8.0
assert "The agent invoked tools correctly" in result.feedback
@patch("crewai.utilities.llm_utils.create_llm")
def test_evaluation_with_errors(self, mock_create_llm, mock_agent, mock_task):
mock_agent.tools = ["tool1", "tool2"]
# Setup mock LLM response
mock_llm = MagicMock(spec=LLM)
mock_llm.call.return_value = """
{
"overall_score": 5.5,
"feedback": "The agent had some errors in tool invocation."
}
"""
mock_create_llm.return_value = mock_llm
# Setup execution trace with tool uses including errors
execution_trace = {
"tool_uses": [
{
"tool": "search_tool",
"input": {"query": "test query"},
"output": "search results",
"error": None
},
{
"tool": "calculator",
"input": {"expression": "2+"},
"output": None,
"error": "Invalid expression"
}
]
}
evaluator = ToolInvocationEvaluator(llm=mock_llm)
result = evaluator.evaluate(
agent=mock_agent,
task=mock_task,
execution_trace=execution_trace,
final_output="Final output"
)
assert result.score == 5.5
assert "The agent had some errors in tool invocation" in result.feedback

View File

@@ -1,95 +0,0 @@
import pytest
from crewai.agent import Agent
from crewai.task import Task
from crewai.crew import Crew
from crewai.evaluation.agent_evaluator import AgentEvaluator
from crewai.evaluation.base_evaluator import AgentEvaluationResult
from crewai.evaluation import (
GoalAlignmentEvaluator,
SemanticQualityEvaluator,
ToolSelectionEvaluator,
ParameterExtractionEvaluator,
ToolInvocationEvaluator,
ReasoningEfficiencyEvaluator
)
from crewai.evaluation import create_default_evaluator
class TestAgentEvaluator:
@pytest.fixture
def mock_crew(self):
agent = Agent(
role="Test Agent",
goal="Complete test tasks successfully",
backstory="An agent created for testing purposes",
allow_delegation=False,
verbose=False
)
task = Task(
description="Test task description",
agent=agent,
expected_output="Expected test output"
)
crew = Crew(
agents=[agent],
tasks=[task]
)
return crew
def test_set_iteration(self):
agent_evaluator = AgentEvaluator()
agent_evaluator.set_iteration(3)
assert agent_evaluator.iteration == 3
@pytest.mark.vcr(filter_headers=["authorization"])
def test_evaluate_current_iteration(self, mock_crew):
agent_evaluator = AgentEvaluator(crew=mock_crew, evaluators=[GoalAlignmentEvaluator()])
mock_crew.kickoff()
results = agent_evaluator.evaluate_current_iteration()
assert isinstance(results, dict)
agent, = mock_crew.agents
task, = mock_crew.tasks
assert len(mock_crew.agents) == 1
assert agent.role in results
assert len(results[agent.role]) == 1
result, = results[agent.role]
assert isinstance(result, AgentEvaluationResult)
assert result.agent_id == str(agent.id)
assert result.task_id == str(task.id)
goal_alignment, = result.metrics.values()
assert goal_alignment.score == 5.0
expected_feedback = "The agent's output demonstrates an understanding of the need for a comprehensive document"
assert expected_feedback in goal_alignment.feedback
assert goal_alignment.raw_response is not None
assert '"score": 5' in goal_alignment.raw_response
def test_create_default_evaluator(self, mock_crew):
agent_evaluator = create_default_evaluator(crew=mock_crew)
assert isinstance(agent_evaluator, AgentEvaluator)
assert agent_evaluator.crew == mock_crew
expected_types = [
GoalAlignmentEvaluator,
SemanticQualityEvaluator,
ToolSelectionEvaluator,
ParameterExtractionEvaluator,
ToolInvocationEvaluator,
ReasoningEfficiencyEvaluator
]
assert len(agent_evaluator.evaluators) == len(expected_types)
for evaluator, expected_type in zip(agent_evaluator.evaluators, expected_types):
assert isinstance(evaluator, expected_type)

View File

@@ -601,7 +601,7 @@ def test_handle_streaming_tool_calls(get_weather_tool_schema, mock_emit):
def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_emit): def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_emit):
def get_weather_error(location): def get_weather_error(location):
raise Exception("Error") raise Exception("Error")
llm = LLM(model="openai/gpt-4o", stream=True) llm = LLM(model="openai/gpt-4o", stream=True)
response = llm.call( response = llm.call(
messages=[ messages=[
@@ -619,7 +619,7 @@ def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_em
expected_stream_chunk=9, expected_stream_chunk=9,
expected_completed_llm_call=1, expected_completed_llm_call=1,
expected_tool_usage_started=1, expected_tool_usage_started=1,
expected_tool_usage_error=1, expected_tool_usage_error=1,
expected_final_chunk_result=expected_final_chunk_result, expected_final_chunk_result=expected_final_chunk_result,
) )

View File

@@ -1,226 +0,0 @@
import asyncio
import threading
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, Any, Callable
from unittest.mock import patch
import pytest
from crewai import Agent, Crew, Task
from crewai.utilities.crew.crew_context import get_crew_context
@pytest.fixture
def simple_agent_factory():
def create_agent(name: str) -> Agent:
return Agent(
role=f"{name} Agent",
goal=f"Complete {name} task",
backstory=f"I am agent for {name}",
)
return create_agent
@pytest.fixture
def simple_task_factory():
def create_task(name: str, callback: Callable = None) -> Task:
return Task(
description=f"Task for {name}", expected_output="Done", callback=callback
)
return create_task
@pytest.fixture
def crew_factory(simple_agent_factory, simple_task_factory):
def create_crew(name: str, task_callback: Callable = None) -> Crew:
agent = simple_agent_factory(name)
task = simple_task_factory(name, callback=task_callback)
task.agent = agent
return Crew(agents=[agent], tasks=[task], verbose=False)
return create_crew
class TestCrewThreadSafety:
@patch("crewai.Agent.execute_task")
def test_parallel_crews_thread_safety(self, mock_execute_task, crew_factory):
mock_execute_task.return_value = "Task completed"
num_crews = 5
def run_crew_with_context_check(crew_id: str) -> Dict[str, Any]:
results = {"crew_id": crew_id, "contexts": []}
def check_context_task(output):
context = get_crew_context()
results["contexts"].append(
{
"stage": "task_callback",
"crew_id": context.id if context else None,
"crew_key": context.key if context else None,
"thread": threading.current_thread().name,
}
)
return output
context_before = get_crew_context()
results["contexts"].append(
{
"stage": "before_kickoff",
"crew_id": context_before.id if context_before else None,
"thread": threading.current_thread().name,
}
)
crew = crew_factory(crew_id, task_callback=check_context_task)
output = crew.kickoff()
context_after = get_crew_context()
results["contexts"].append(
{
"stage": "after_kickoff",
"crew_id": context_after.id if context_after else None,
"thread": threading.current_thread().name,
}
)
results["crew_uuid"] = str(crew.id)
results["output"] = output.raw
return results
with ThreadPoolExecutor(max_workers=num_crews) as executor:
futures = []
for i in range(num_crews):
future = executor.submit(run_crew_with_context_check, f"crew_{i}")
futures.append(future)
results = [f.result() for f in futures]
for result in results:
crew_uuid = result["crew_uuid"]
before_ctx = next(
ctx for ctx in result["contexts"] if ctx["stage"] == "before_kickoff"
)
assert (
before_ctx["crew_id"] is None
), f"Context should be None before kickoff for {result['crew_id']}"
task_ctx = next(
ctx for ctx in result["contexts"] if ctx["stage"] == "task_callback"
)
assert (
task_ctx["crew_id"] == crew_uuid
), f"Context mismatch during task for {result['crew_id']}"
after_ctx = next(
ctx for ctx in result["contexts"] if ctx["stage"] == "after_kickoff"
)
assert (
after_ctx["crew_id"] is None
), f"Context should be None after kickoff for {result['crew_id']}"
thread_name = before_ctx["thread"]
assert (
"ThreadPoolExecutor" in thread_name
), f"Should run in thread pool for {result['crew_id']}"
@pytest.mark.asyncio
@patch("crewai.Agent.execute_task")
async def test_async_crews_thread_safety(self, mock_execute_task, crew_factory):
mock_execute_task.return_value = "Task completed"
num_crews = 5
async def run_crew_async(crew_id: str) -> Dict[str, Any]:
task_context = {"crew_id": crew_id, "context": None}
def capture_context(output):
ctx = get_crew_context()
task_context["context"] = {
"crew_id": ctx.id if ctx else None,
"crew_key": ctx.key if ctx else None,
}
return output
crew = crew_factory(crew_id, task_callback=capture_context)
output = await crew.kickoff_async()
return {
"crew_id": crew_id,
"crew_uuid": str(crew.id),
"output": output.raw,
"task_context": task_context,
}
tasks = [run_crew_async(f"async_crew_{i}") for i in range(num_crews)]
results = await asyncio.gather(*tasks)
for result in results:
crew_uuid = result["crew_uuid"]
task_ctx = result["task_context"]["context"]
assert (
task_ctx is not None
), f"Context should exist during task for {result['crew_id']}"
assert (
task_ctx["crew_id"] == crew_uuid
), f"Context mismatch for {result['crew_id']}"
@patch("crewai.Agent.execute_task")
def test_concurrent_kickoff_for_each(self, mock_execute_task, crew_factory):
mock_execute_task.return_value = "Task completed"
contexts_captured = []
def capture_context(output):
ctx = get_crew_context()
contexts_captured.append(
{
"context_id": ctx.id if ctx else None,
"thread": threading.current_thread().name,
}
)
return output
crew = crew_factory("for_each_test", task_callback=capture_context)
inputs = [{"item": f"input_{i}"} for i in range(3)]
results = crew.kickoff_for_each(inputs=inputs)
assert len(results) == len(inputs)
assert len(contexts_captured) == len(inputs)
context_ids = [ctx["context_id"] for ctx in contexts_captured]
assert len(set(context_ids)) == len(
inputs
), "Each execution should have unique context"
@patch("crewai.Agent.execute_task")
def test_no_context_leakage_between_crews(self, mock_execute_task, crew_factory):
mock_execute_task.return_value = "Task completed"
contexts = []
def check_context(output):
ctx = get_crew_context()
contexts.append(
{
"context_id": ctx.id if ctx else None,
"context_key": ctx.key if ctx else None,
}
)
return output
def run_crew(name: str):
crew = crew_factory(name, task_callback=check_context)
crew.kickoff()
return str(crew.id)
crew1_id = run_crew("First")
crew2_id = run_crew("Second")
assert len(contexts) == 2
assert contexts[0]["context_id"] == crew1_id
assert contexts[1]["context_id"] == crew2_id
assert contexts[0]["context_id"] != contexts[1]["context_id"]

View File

@@ -1,88 +0,0 @@
import uuid
import pytest
from opentelemetry import baggage
from opentelemetry.context import attach, detach
from crewai.utilities.crew.crew_context import get_crew_context
from crewai.utilities.crew.models import CrewContext
def test_crew_context_creation():
crew_id = str(uuid.uuid4())
context = CrewContext(id=crew_id, key="test-crew")
assert context.id == crew_id
assert context.key == "test-crew"
def test_get_crew_context_with_baggage():
crew_id = str(uuid.uuid4())
assert get_crew_context() is None
crew_ctx = CrewContext(id=crew_id, key="test-key")
ctx = baggage.set_baggage("crew_context", crew_ctx)
token = attach(ctx)
try:
context = get_crew_context()
assert context is not None
assert context.id == crew_id
assert context.key == "test-key"
finally:
detach(token)
assert get_crew_context() is None
def test_get_crew_context_empty():
assert get_crew_context() is None
def test_baggage_nested_contexts():
crew_id1 = str(uuid.uuid4())
crew_id2 = str(uuid.uuid4())
crew_ctx1 = CrewContext(id=crew_id1, key="outer")
ctx1 = baggage.set_baggage("crew_context", crew_ctx1)
token1 = attach(ctx1)
try:
outer_context = get_crew_context()
assert outer_context.id == crew_id1
assert outer_context.key == "outer"
crew_ctx2 = CrewContext(id=crew_id2, key="inner")
ctx2 = baggage.set_baggage("crew_context", crew_ctx2)
token2 = attach(ctx2)
try:
inner_context = get_crew_context()
assert inner_context.id == crew_id2
assert inner_context.key == "inner"
finally:
detach(token2)
restored_context = get_crew_context()
assert restored_context.id == crew_id1
assert restored_context.key == "outer"
finally:
detach(token1)
assert get_crew_context() is None
def test_baggage_exception_handling():
crew_id = str(uuid.uuid4())
crew_ctx = CrewContext(id=crew_id, key="test")
ctx = baggage.set_baggage("crew_context", crew_ctx)
token = attach(ctx)
with pytest.raises(ValueError):
try:
assert get_crew_context() is not None
raise ValueError("Test exception")
finally:
detach(token)
assert get_crew_context() is None

5458
uv.lock generated

File diff suppressed because it is too large Load Diff