Files
crewAI/src/crewai/agent.py
Devin AI 0fd0b5c74f fix: Improve LLM parameter handling and fix test timeouts
- Add proper model name extraction in LLM class
- Handle optional parameters correctly in litellm calls
- Fix Agent constructor compatibility with BaseAgent
- Add token process utility for better tracking
- Clean up parameter handling in LLM class

Co-Authored-By: Joe Moura <joao@crewai.com>
2024-12-31 23:12:13 +00:00

685 lines
29 KiB
Python

import os
import shutil
import subprocess
from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import Field, InstanceOf, PrivateAttr, model_validator
from crewai.agents import CacheHandler
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.cli.constants import ENV_VARS, LITELLM_PARAMS
from crewai.knowledge.knowledge import Knowledge
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.knowledge.utils.knowledge_utils import extract_knowledge_context
from crewai.llm import LLM
from crewai.memory.contextual.contextual_memory import ContextualMemory
from crewai.task import Task
from crewai.tools import BaseTool
from crewai.tools.agent_tools.agent_tools import AgentTools
from crewai.tools.base_tool import Tool
from crewai.utilities import Converter, Prompts
from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
from crewai.utilities.converter import generate_model_description
from crewai.utilities.token_counter_callback import TokenCalcHandler
from crewai.utilities.training_handler import CrewTrainingHandler
from crewai.utilities.logger import Logger
from crewai.utilities.rpm_controller import RPMController
from crewai.utilities.token_process import TokenProcess
agentops = None
try:
import agentops # type: ignore # Name "agentops" is already defined
from agentops import track_agent # type: ignore
except ImportError:
def track_agent():
def noop(f):
return f
return noop
@track_agent()
class Agent(BaseAgent):
"""Represents an agent in a system.
Each agent has a role, a goal, a backstory, and an optional language model (llm).
The agent can also have memory, can operate in verbose mode, and can delegate tasks to other agents.
Args:
role (Optional[str]): The role of the agent
goal (Optional[str]): The objective of the agent
backstory (Optional[str]): The backstory of the agent
allow_delegation (bool): Whether the agent can delegate tasks
config (Optional[Dict[str, Any]]): Configuration for the agent
verbose (bool): Whether to enable verbose output
max_rpm (Optional[int]): Maximum requests per minute
tools (Optional[List[Any]]): Tools available to the agent
llm (Optional[Union[str, Any]]): Language model to use
function_calling_llm (Optional[Any]): Language model for tool calling
max_iter (Optional[int]): Maximum iterations for task execution
memory (bool): Whether the agent should have memory
step_callback (Optional[Any]): Callback after each execution step
knowledge_sources (Optional[List[BaseKnowledgeSource]]): Knowledge sources
"""
model_config = {
"arbitrary_types_allowed": True,
"extra": "allow",
}
def __init__(
self,
role: Optional[str] = None,
goal: Optional[str] = None,
backstory: Optional[str] = None,
allow_delegation: bool = False,
config: Optional[Dict[str, Any]] = None,
verbose: bool = False,
max_rpm: Optional[int] = None,
tools: Optional[List[Any]] = None,
llm: Optional[Union[str, LLM, Any]] = None,
function_calling_llm: Optional[Any] = None,
max_iter: Optional[int] = None,
memory: bool = True,
step_callback: Optional[Any] = None,
knowledge_sources: Optional[List[BaseKnowledgeSource]] = None,
**kwargs
) -> None:
"""Initialize an Agent with the given parameters."""
# Process tools before passing to parent
processed_tools = []
if tools:
from crewai.tools import BaseTool
for tool in tools:
if isinstance(tool, BaseTool):
processed_tools.append(tool)
elif callable(tool):
# Convert function to BaseTool
processed_tools.append(Tool.from_function(tool))
else:
raise ValueError(f"Tool {tool} must be either a BaseTool instance or a callable")
# Process LLM before passing to parent
processed_llm = None
if isinstance(llm, str):
processed_llm = LLM(model=llm)
elif isinstance(llm, LLM):
processed_llm = llm
elif llm is not None and hasattr(llm, 'model') and hasattr(llm, 'temperature'):
# Handle ChatOpenAI and similar objects
model_name = getattr(llm, 'model', None)
if model_name is not None:
if not isinstance(model_name, str):
model_name = str(model_name)
processed_llm = LLM(
model=model_name,
temperature=getattr(llm, 'temperature', None),
api_key=getattr(llm, 'api_key', None),
base_url=getattr(llm, 'base_url', None)
)
# If no valid LLM configuration found, leave as None for post_init_setup
# Initialize all fields in a dict
init_dict = {
"role": role,
"goal": goal,
"backstory": backstory,
"allow_delegation": allow_delegation,
"config": config,
"verbose": verbose,
"max_rpm": max_rpm,
"tools": processed_tools,
"llm": processed_llm,
"max_iter": max_iter if max_iter is not None else 25,
"function_calling_llm": function_calling_llm,
"step_callback": step_callback,
"knowledge_sources": knowledge_sources,
**kwargs
}
# Initialize base model with all fields
super().__init__(**init_dict)
# Store original values for interpolation
self._original_role = role
self._original_goal = goal
self._original_backstory = backstory
# Initialize private attributes
self._logger = Logger(verbose=self.verbose)
if self.max_rpm:
self._rpm_controller = RPMController(max_rpm=self.max_rpm, logger=self._logger)
self._token_process = TokenProcess()
_times_executed: int = PrivateAttr(default=0)
max_execution_time: Optional[int] = Field(
default=None,
description="Maximum execution time for an agent to execute a task",
)
agent_ops_agent_name: str = None # type: ignore # Incompatible types in assignment (expression has type "None", variable has type "str")
agent_ops_agent_id: str = None # type: ignore # Incompatible types in assignment (expression has type "None", variable has type "str")
cache_handler: InstanceOf[CacheHandler] = Field(
default=None, description="An instance of the CacheHandler class."
)
step_callback: Optional[Any] = Field(
default=None,
description="Callback to be executed after each step of the agent execution.",
)
use_system_prompt: Optional[bool] = Field(
default=True,
description="Use system prompt for the agent.",
)
llm: Union[str, InstanceOf[LLM], Any] = Field(
description="Language model that will run the agent.", default=None
)
function_calling_llm: Optional[Any] = Field(
description="Language model that will run the agent.", default=None
)
system_template: Optional[str] = Field(
default=None, description="System format for the agent."
)
prompt_template: Optional[str] = Field(
default=None, description="Prompt format for the agent."
)
response_template: Optional[str] = Field(
default=None, description="Response format for the agent."
)
tools_results: Optional[List[Any]] = Field(
default=[], description="Results of the tools used by the agent."
)
allow_code_execution: Optional[bool] = Field(
default=False, description="Enable code execution for the agent."
)
respect_context_window: bool = Field(
default=True,
description="Keep messages under the context window size by summarizing content.",
)
max_iter: int = Field(
default=20,
description="Maximum number of iterations for an agent to execute a task before giving it's best answer",
)
max_retry_limit: int = Field(
default=2,
description="Maximum number of retries for an agent to execute a task when an error occurs.",
)
multimodal: bool = Field(
default=False,
description="Whether the agent is multimodal.",
)
code_execution_mode: Literal["safe", "unsafe"] = Field(
default="safe",
description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).",
)
embedder_config: Optional[Dict[str, Any]] = Field(
default=None,
description="Embedder configuration for the agent.",
)
knowledge_sources: Optional[List[BaseKnowledgeSource]] = Field(
default=None,
description="Knowledge sources for the agent.",
)
_knowledge: Optional[Knowledge] = PrivateAttr(
default=None,
)
@model_validator(mode="after")
def post_init_setup(self):
self._set_knowledge()
self.agent_ops_agent_name = self.role or "agent"
unaccepted_attributes = [
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"AWS_REGION_NAME",
]
# Handle LLM initialization if not already done
if self.llm is None:
# Determine the model name from environment variables or use default
model_name = (
os.environ.get("OPENAI_MODEL_NAME")
or os.environ.get("MODEL")
or "gpt-4o-mini"
)
llm_params = {"model": model_name}
api_base = os.environ.get("OPENAI_API_BASE") or os.environ.get(
"OPENAI_BASE_URL"
)
if api_base:
llm_params["base_url"] = api_base
set_provider = model_name.split("/")[0] if "/" in model_name else "openai"
# Iterate over all environment variables to find matching API keys or use defaults
for provider, env_vars in ENV_VARS.items():
if provider == set_provider:
for env_var in env_vars:
# Check if the environment variable is set
key_name = env_var.get("key_name")
if key_name and key_name not in unaccepted_attributes:
env_value = os.environ.get(key_name)
if env_value:
key_name = key_name.lower()
for pattern in LITELLM_PARAMS:
if pattern in key_name:
key_name = pattern
break
llm_params[key_name] = env_value
# Check for default values if the environment variable is not set
elif env_var.get("default", False):
for key, value in env_var.items():
if key not in ["prompt", "key_name", "default"]:
# Only add default if the key is already set in os.environ
if key in os.environ:
try:
# Create a new dictionary for properly typed parameters
typed_params = {}
# Convert and validate values based on parameter type
if key in ['temperature', 'top_p', 'presence_penalty', 'frequency_penalty']:
if value is not None:
try:
typed_params[key] = float(value)
except (ValueError, TypeError):
pass
elif key in ['n', 'max_tokens', 'max_completion_tokens', 'seed']:
if value is not None:
try:
typed_params[key] = int(value)
except (ValueError, TypeError):
pass
elif key == 'logit_bias' and isinstance(value, str):
try:
bias_dict = {}
for pair in value.split(','):
token_id, bias = pair.split(':')
bias_dict[int(token_id.strip())] = float(bias.strip())
typed_params[key] = bias_dict
except (ValueError, AttributeError):
pass
elif key == 'response_format' and isinstance(value, str):
try:
import json
typed_params[key] = json.loads(value)
except json.JSONDecodeError:
pass
elif key == 'logprobs':
if value is not None:
typed_params[key] = bool(value.lower() == 'true') if isinstance(value, str) else bool(value)
elif key == 'callbacks':
typed_params[key] = [] if value is None else [value] if isinstance(value, str) else value
elif key == 'stop':
typed_params[key] = [value] if isinstance(value, str) else value
elif key in ['model', 'base_url', 'api_version', 'api_key']:
typed_params[key] = value
# Update llm_params with properly typed values
if typed_params:
llm_params.update(typed_params)
except (ValueError, AttributeError, json.JSONDecodeError):
continue
# Create LLM instance with properly typed parameters
valid_params = {
'model', 'timeout', 'temperature', 'top_p', 'n', 'stop',
'max_completion_tokens', 'max_tokens', 'presence_penalty',
'frequency_penalty', 'logit_bias', 'response_format',
'seed', 'logprobs', 'top_logprobs', 'base_url',
'api_version', 'api_key', 'callbacks'
}
# Filter out None values and invalid parameters
filtered_params = {}
for k, v in llm_params.items():
if k in valid_params and v is not None:
filtered_params[k] = v
# Create LLM instance with properly typed parameters
self.llm = LLM(**filtered_params)
else:
# For any other type, attempt to extract relevant attributes
llm_params = {
"model": getattr(self.llm, "model_name", None)
or getattr(self.llm, "deployment_name", None)
or str(self.llm),
"temperature": getattr(self.llm, "temperature", None),
"max_tokens": getattr(self.llm, "max_tokens", None),
"logprobs": getattr(self.llm, "logprobs", None),
"timeout": getattr(self.llm, "timeout", None),
"max_retries": getattr(self.llm, "max_retries", None),
"api_key": getattr(self.llm, "api_key", None),
"base_url": getattr(self.llm, "base_url", None),
"organization": getattr(self.llm, "organization", None),
}
# Remove None values to avoid passing unnecessary parameters
llm_params = {k: v for k, v in llm_params.items() if v is not None}
self.llm = LLM(**llm_params)
# Similar handling for function_calling_llm
if self.function_calling_llm:
if isinstance(self.function_calling_llm, str):
self.function_calling_llm = LLM(model=self.function_calling_llm)
elif not isinstance(self.function_calling_llm, LLM):
self.function_calling_llm = LLM(
model=getattr(self.function_calling_llm, "model_name", None)
or getattr(self.function_calling_llm, "deployment_name", None)
or str(self.function_calling_llm)
)
if not self.agent_executor:
self._setup_agent_executor()
if self.allow_code_execution:
self._validate_docker_installation()
return self
def _setup_agent_executor(self):
if not self.cache_handler:
self.cache_handler = CacheHandler()
self.set_cache_handler(self.cache_handler)
def _set_knowledge(self):
try:
if self.knowledge_sources:
knowledge_agent_name = f"{(self.role or 'agent').replace(' ', '_')}"
if isinstance(self.knowledge_sources, list) and all(
isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources
):
self._knowledge = Knowledge(
sources=self.knowledge_sources,
embedder_config=self.embedder_config,
collection_name=knowledge_agent_name,
)
except (TypeError, ValueError) as e:
raise ValueError(f"Invalid Knowledge Configuration: {str(e)}")
def execute_task(
self,
task: Task,
context: Optional[str] = None,
tools: Optional[List[BaseTool]] = None,
) -> str:
"""Execute a task with the agent.
Args:
task: Task to execute.
context: Context to execute the task in.
tools: Tools to use for the task.
Returns:
Output of the agent
"""
if self.tools_handler:
self.tools_handler.last_used_tool = {} # type: ignore # Incompatible types in assignment (expression has type "dict[Never, Never]", variable has type "ToolCalling")
task_prompt = task.prompt()
# If the task requires output in JSON or Pydantic format,
# append specific instructions to the task prompt to ensure
# that the final answer does not include any code block markers
if task.output_json or task.output_pydantic:
# Generate the schema based on the output format
if task.output_json:
# schema = json.dumps(task.output_json, indent=2)
schema = generate_model_description(task.output_json)
elif task.output_pydantic:
schema = generate_model_description(task.output_pydantic)
task_prompt += "\n" + self.i18n.slice("formatted_task_instructions").format(
output_format=schema
)
if context:
task_prompt = self.i18n.slice("task_with_context").format(
task=task_prompt, context=context
)
if self.crew and self.crew.memory:
contextual_memory = ContextualMemory(
self.crew.memory_config,
self.crew._short_term_memory,
self.crew._long_term_memory,
self.crew._entity_memory,
self.crew._user_memory,
)
memory = contextual_memory.build_context_for_task(task, context)
if memory.strip() != "":
task_prompt += self.i18n.slice("memory").format(memory=memory)
if self._knowledge:
agent_knowledge_snippets = self._knowledge.query([task.prompt()])
if agent_knowledge_snippets:
agent_knowledge_context = extract_knowledge_context(
agent_knowledge_snippets
)
if agent_knowledge_context:
task_prompt += agent_knowledge_context
if self.crew:
knowledge_snippets = self.crew.query_knowledge([task.prompt()])
if knowledge_snippets:
crew_knowledge_context = extract_knowledge_context(knowledge_snippets)
if crew_knowledge_context:
task_prompt += crew_knowledge_context
tools = tools or self.tools or []
self.create_agent_executor(tools=tools, task=task)
if self.crew and self.crew._train:
task_prompt = self._training_handler(task_prompt=task_prompt)
else:
task_prompt = self._use_trained_data(task_prompt=task_prompt)
try:
result = self.agent_executor.invoke(
{
"input": task_prompt,
"tool_names": self.agent_executor.tools_names,
"tools": self.agent_executor.tools_description,
"ask_for_human_input": task.human_input,
}
)["output"]
except Exception as e:
self._times_executed += 1
if self._times_executed > self.max_retry_limit:
raise e
result = self.execute_task(task, context, tools)
if self.max_rpm and self._rpm_controller:
self._rpm_controller.stop_rpm_counter()
# If there was any tool in self.tools_results that had result_as_answer
# set to True, return the results of the last tool that had
# result_as_answer set to True
for tool_result in self.tools_results: # type: ignore # Item "None" of "list[Any] | None" has no attribute "__iter__" (not iterable)
if tool_result.get("result_as_answer", False):
result = tool_result["result"]
return result
def create_agent_executor(
self, tools: Optional[List[BaseTool]] = None, task=None
) -> None:
"""Create an agent executor for the agent.
Returns:
An instance of the CrewAgentExecutor class.
"""
tools = tools or self.tools or []
parsed_tools = self._parse_tools(tools)
prompt = Prompts(
agent=self,
tools=tools,
i18n=self.i18n,
use_system_prompt=self.use_system_prompt,
system_template=self.system_template,
prompt_template=self.prompt_template,
response_template=self.response_template,
).task_execution()
stop_words = [self.i18n.slice("observation")]
if self.response_template:
stop_words.append(
self.response_template.split("{{ .Response }}")[1].strip()
)
self.agent_executor = CrewAgentExecutor(
llm=self.llm,
task=task,
agent=self,
crew=self.crew,
tools=parsed_tools,
prompt=prompt,
original_tools=tools,
stop_words=stop_words,
max_iter=self.max_iter,
tools_handler=self.tools_handler,
tools_names=self.__tools_names(parsed_tools),
tools_description=self._render_text_description_and_args(parsed_tools),
step_callback=self.step_callback,
function_calling_llm=self.function_calling_llm,
respect_context_window=self.respect_context_window,
request_within_rpm_limit=(
self._rpm_controller.check_or_wait if self._rpm_controller else None
),
callbacks=[TokenCalcHandler(self._token_process)],
)
def get_delegation_tools(self, agents: List[BaseAgent]):
agent_tools = AgentTools(agents=agents)
tools = agent_tools.tools()
return tools
def get_multimodal_tools(self) -> List[Tool]:
from crewai.tools.agent_tools.add_image_tool import AddImageTool
return [AddImageTool()]
def get_code_execution_tools(self):
try:
from crewai_tools import CodeInterpreterTool
# Set the unsafe_mode based on the code_execution_mode attribute
unsafe_mode = self.code_execution_mode == "unsafe"
return [CodeInterpreterTool(unsafe_mode=unsafe_mode)]
except ModuleNotFoundError:
self._logger.log(
"info", "Coding tools not available. Install crewai_tools. "
)
def get_output_converter(self, llm, text, model, instructions):
return Converter(llm=llm, text=text, model=model, instructions=instructions)
def _parse_tools(self, tools: List[Any]) -> List[Any]: # type: ignore
"""Parse tools to be used for the task."""
tools_list = []
try:
# tentatively try to import from crewai_tools import BaseTool as CrewAITool
from crewai.tools import BaseTool as CrewAITool
for tool in tools:
if isinstance(tool, CrewAITool):
tools_list.append(tool.to_structured_tool())
else:
tools_list.append(tool)
except ModuleNotFoundError:
tools_list = []
for tool in tools:
tools_list.append(tool)
return tools_list
def _training_handler(self, task_prompt: str) -> str:
"""Handle training data for the agent task prompt to improve output on Training."""
if data := CrewTrainingHandler(TRAINING_DATA_FILE).load():
agent_id = str(self.id)
if data.get(agent_id):
human_feedbacks = [
i["human_feedback"] for i in data.get(agent_id, {}).values()
]
task_prompt += (
"\n\nYou MUST follow these instructions: \n "
+ "\n - ".join(human_feedbacks)
)
return task_prompt
def _use_trained_data(self, task_prompt: str) -> str:
"""Use trained data for the agent task prompt to improve output."""
if data := CrewTrainingHandler(TRAINED_AGENTS_DATA_FILE).load():
if trained_data_output := data.get(self.role):
task_prompt += (
"\n\nYou MUST follow these instructions: \n - "
+ "\n - ".join(trained_data_output["suggestions"])
)
return task_prompt
def _render_text_description(self, tools: List[Any]) -> str:
"""Render the tool name and description in plain text.
Output will be in the format of:
.. code-block:: markdown
search: This tool is used for search
calculator: This tool is used for math
"""
description = "\n".join(
[
f"Tool name: {tool.name}\nTool description:\n{tool.description}"
for tool in tools
]
)
return description
def _render_text_description_and_args(self, tools: List[BaseTool]) -> str:
"""Render the tool name, description, and args in plain text.
Output will be in the format of:
.. code-block:: markdown
search: This tool is used for search, args: {"query": {"type": "string"}}
calculator: This tool is used for math, \
args: {"expression": {"type": "string"}}
"""
tool_strings = []
for tool in tools:
tool_strings.append(tool.description)
return "\n".join(tool_strings)
def _validate_docker_installation(self) -> None:
"""Check if Docker is installed and running."""
if not shutil.which("docker"):
raise RuntimeError(
f"Docker is not installed. Please install Docker to use code execution with agent: {self.role}"
)
try:
subprocess.run(
["docker", "info"],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
except subprocess.CalledProcessError:
raise RuntimeError(
f"Docker is not running. Please start Docker to use code execution with agent: {self.role}"
)
@staticmethod
def __tools_names(tools) -> str:
return ", ".join([t.name for t in tools])
def __repr__(self):
return f"Agent(role={self.role}, goal={self.goal}, backstory={self.backstory})"