Bugfix/kickoff hangs when llm call fails (#1943)

* Wip to address https://github.com/crewAIInc/crewAI/issues/1934

* implement proper try / except

* clean up PR

* add tests

* Fix tests and code that was broken

* mnore clean up

* Fixing tests

* fix stop type errors]

* more fixes
This commit is contained in:
Brandon Hancock (bhancock_ai)
2025-01-22 14:24:00 -05:00
committed by GitHub
parent c642ebf97e
commit 67f0de1f90
6 changed files with 273 additions and 95 deletions

View File

@@ -3,6 +3,7 @@ import shutil
import subprocess
from typing import Any, Dict, List, Literal, Optional, Union
from litellm import AuthenticationError as LiteLLMAuthenticationError
from pydantic import Field, InstanceOf, PrivateAttr, model_validator
from crewai.agents import CacheHandler
@@ -261,6 +262,9 @@ class Agent(BaseAgent):
}
)["output"]
except Exception as e:
if isinstance(e, LiteLLMAuthenticationError):
# Do not retry on authentication errors
raise e
self._times_executed += 1
if self._times_executed > self.max_retry_limit:
raise e

View File

@@ -3,6 +3,8 @@ import re
from dataclasses import dataclass
from typing import Any, Callable, Dict, List, Optional, Union
from litellm.exceptions import AuthenticationError as LiteLLMAuthenticationError
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.agents.agent_builder.base_agent_executor_mixin import CrewAgentExecutorMixin
from crewai.agents.parser import (
@@ -13,6 +15,7 @@ from crewai.agents.parser import (
OutputParserException,
)
from crewai.agents.tools_handler import ToolsHandler
from crewai.llm import LLM
from crewai.tools.base_tool import BaseTool
from crewai.tools.tool_usage import ToolUsage, ToolUsageErrorException
from crewai.utilities import I18N, Printer
@@ -54,7 +57,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
callbacks: List[Any] = [],
):
self._i18n: I18N = I18N()
self.llm = llm
self.llm: LLM = llm
self.task = task
self.agent = agent
self.crew = crew
@@ -80,10 +83,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self.tool_name_to_tool_map: Dict[str, BaseTool] = {
tool.name: tool for tool in self.tools
}
if self.llm.stop:
self.llm.stop = list(set(self.llm.stop + self.stop))
else:
self.llm.stop = self.stop
self.stop = stop_words
self.llm.stop = list(set(self.llm.stop + self.stop))
def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
if "system" in self.prompt:
@@ -98,7 +99,11 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self._show_start_logs()
self.ask_for_human_input = bool(inputs.get("ask_for_human_input", False))
formatted_answer = self._invoke_loop()
try:
formatted_answer = self._invoke_loop()
except Exception as e:
raise e
if self.ask_for_human_input:
formatted_answer = self._handle_human_feedback(formatted_answer)
@@ -124,7 +129,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self._enforce_rpm_limit()
answer = self._get_llm_response()
formatted_answer = self._process_llm_response(answer)
if isinstance(formatted_answer, AgentAction):
@@ -145,10 +149,40 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
if self._is_context_length_exceeded(e):
self._handle_context_length()
continue
elif self._is_litellm_authentication_error(e):
self._handle_litellm_auth_error(e)
raise e
else:
self._printer.print(
content=f"Unhandled exception: {e}",
color="red",
)
finally:
self.iterations += 1
self._show_logs(formatted_answer)
return formatted_answer
def _is_litellm_authentication_error(self, exception: Exception) -> bool:
"""Check if the exception is a litellm authentication error."""
if LiteLLMAuthenticationError and isinstance(
exception, LiteLLMAuthenticationError
):
return True
return False
def _handle_litellm_auth_error(self, exception: Exception) -> None:
"""Handle litellm authentication error by informing the user and exiting."""
self._printer.print(
content="Authentication error with litellm occurred. Please check your API key and configuration.",
color="red",
)
self._printer.print(
content=f"Error details: {exception}",
color="red",
)
def _has_reached_max_iterations(self) -> bool:
"""Check if the maximum number of iterations has been reached."""
return self.iterations >= self.max_iter
@@ -160,10 +194,17 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
def _get_llm_response(self) -> str:
"""Call the LLM and return the response, handling any invalid responses."""
answer = self.llm.call(
self.messages,
callbacks=self.callbacks,
)
try:
answer = self.llm.call(
self.messages,
callbacks=self.callbacks,
)
except Exception as e:
self._printer.print(
content=f"Error during LLM call: {e}",
color="red",
)
raise e
if not answer:
self._printer.print(
@@ -184,7 +225,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
if FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE in e.error:
answer = answer.split("Observation:")[0].strip()
self.iterations += 1
return self._format_answer(answer)
def _handle_agent_action(

View File

@@ -142,7 +142,6 @@ class LLM:
self.temperature = temperature
self.top_p = top_p
self.n = n
self.stop = stop
self.max_completion_tokens = max_completion_tokens
self.max_tokens = max_tokens
self.presence_penalty = presence_penalty
@@ -160,6 +159,14 @@ class LLM:
litellm.drop_params = True
# Normalize self.stop to always be a List[str]
if stop is None:
self.stop: List[str] = []
elif isinstance(stop, str):
self.stop = [stop]
else:
self.stop = stop
self.set_callbacks(callbacks)
self.set_env_callbacks()
@@ -222,7 +229,7 @@ class LLM:
].message
text_response = response_message.content or ""
tool_calls = getattr(response_message, "tool_calls", [])
# Ensure callbacks get the full response object with usage info
if callbacks and len(callbacks) > 0:
for callback in callbacks:

View File

@@ -24,12 +24,10 @@ def create_llm(
# 1) If llm_value is already an LLM object, return it directly
if isinstance(llm_value, LLM):
print("LLM value is already an LLM object")
return llm_value
# 2) If llm_value is a string (model name)
if isinstance(llm_value, str):
print("LLM value is a string")
try:
created_llm = LLM(model=llm_value)
return created_llm
@@ -39,12 +37,10 @@ def create_llm(
# 3) If llm_value is None, parse environment variables or use default
if llm_value is None:
print("LLM value is None")
return _llm_via_environment_or_fallback()
# 4) Otherwise, attempt to extract relevant attributes from an unknown object
try:
print("LLM value is an unknown object")
# Extract attributes with explicit types
model = (
getattr(llm_value, "model_name", None)