Bugfix/kickoff hangs when llm call fails (#1943)

* Wip to address https://github.com/crewAIInc/crewAI/issues/1934 * implement proper try / except * clean up PR * add tests * Fix tests and code that was broken * mnore clean up * Fixing tests * fix stop type errors] * more fixes
2026-01-10 00:28:31 +00:00 · 2025-01-22 14:24:00 -05:00
parent c642ebf97e
commit 67f0de1f90
6 changed files with 273 additions and 95 deletions
--- a/src/crewai/agent.py
+++ b/src/crewai/agent.py
@@ -3,6 +3,7 @@ import shutil
 import subprocess
 from typing import Any, Dict, List, Literal, Optional, Union

+from litellm import AuthenticationError as LiteLLMAuthenticationError
 from pydantic import Field, InstanceOf, PrivateAttr, model_validator

 from crewai.agents import CacheHandler
@@ -261,6 +262,9 @@ class Agent(BaseAgent):
                }
            )["output"]
        except Exception as e:
+            if isinstance(e, LiteLLMAuthenticationError):
+                # Do not retry on authentication errors
+                raise e
            self._times_executed += 1
            if self._times_executed > self.max_retry_limit:
                raise e
--- a/src/crewai/agents/crew_agent_executor.py
+++ b/src/crewai/agents/crew_agent_executor.py
@@ -3,6 +3,8 @@ import re
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Union

+from litellm.exceptions import AuthenticationError as LiteLLMAuthenticationError
+
 from crewai.agents.agent_builder.base_agent import BaseAgent
 from crewai.agents.agent_builder.base_agent_executor_mixin import CrewAgentExecutorMixin
 from crewai.agents.parser import (
@@ -13,6 +15,7 @@ from crewai.agents.parser import (
    OutputParserException,
 )
 from crewai.agents.tools_handler import ToolsHandler
+from crewai.llm import LLM
 from crewai.tools.base_tool import BaseTool
 from crewai.tools.tool_usage import ToolUsage, ToolUsageErrorException
 from crewai.utilities import I18N, Printer
@@ -54,7 +57,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        callbacks: List[Any] = [],
    ):
        self._i18n: I18N = I18N()
-        self.llm = llm
+        self.llm: LLM = llm
        self.task = task
        self.agent = agent
        self.crew = crew
@@ -80,10 +83,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        self.tool_name_to_tool_map: Dict[str, BaseTool] = {
            tool.name: tool for tool in self.tools
        }
-        if self.llm.stop:
-            self.llm.stop = list(set(self.llm.stop + self.stop))
-        else:
-            self.llm.stop = self.stop
+        self.stop = stop_words
+        self.llm.stop = list(set(self.llm.stop + self.stop))

    def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
        if "system" in self.prompt:
@@ -98,7 +99,11 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        self._show_start_logs()

        self.ask_for_human_input = bool(inputs.get("ask_for_human_input", False))
-        formatted_answer = self._invoke_loop()
+
+        try:
+            formatted_answer = self._invoke_loop()
+        except Exception as e:
+            raise e

        if self.ask_for_human_input:
            formatted_answer = self._handle_human_feedback(formatted_answer)
@@ -124,7 +129,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                self._enforce_rpm_limit()

                answer = self._get_llm_response()
-
                formatted_answer = self._process_llm_response(answer)

                if isinstance(formatted_answer, AgentAction):
@@ -145,10 +149,40 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                if self._is_context_length_exceeded(e):
                    self._handle_context_length()
                    continue
+                elif self._is_litellm_authentication_error(e):
+                    self._handle_litellm_auth_error(e)
+                    raise e
+                else:
+                    self._printer.print(
+                        content=f"Unhandled exception: {e}",
+                        color="red",
+                    )
+            finally:
+                self.iterations += 1

        self._show_logs(formatted_answer)
        return formatted_answer

+    def _is_litellm_authentication_error(self, exception: Exception) -> bool:
+        """Check if the exception is a litellm authentication error."""
+        if LiteLLMAuthenticationError and isinstance(
+            exception, LiteLLMAuthenticationError
+        ):
+            return True
+
+        return False
+
+    def _handle_litellm_auth_error(self, exception: Exception) -> None:
+        """Handle litellm authentication error by informing the user and exiting."""
+        self._printer.print(
+            content="Authentication error with litellm occurred. Please check your API key and configuration.",
+            color="red",
+        )
+        self._printer.print(
+            content=f"Error details: {exception}",
+            color="red",
+        )
+
    def _has_reached_max_iterations(self) -> bool:
        """Check if the maximum number of iterations has been reached."""
        return self.iterations >= self.max_iter
@@ -160,10 +194,17 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

    def _get_llm_response(self) -> str:
        """Call the LLM and return the response, handling any invalid responses."""
-        answer = self.llm.call(
-            self.messages,
-            callbacks=self.callbacks,
-        )
+        try:
+            answer = self.llm.call(
+                self.messages,
+                callbacks=self.callbacks,
+            )
+        except Exception as e:
+            self._printer.print(
+                content=f"Error during LLM call: {e}",
+                color="red",
+            )
+            raise e

        if not answer:
            self._printer.print(
@@ -184,7 +225,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                if FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE in e.error:
                    answer = answer.split("Observation:")[0].strip()

-        self.iterations += 1
        return self._format_answer(answer)

    def _handle_agent_action(
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -142,7 +142,6 @@ class LLM:
        self.temperature = temperature
        self.top_p = top_p
        self.n = n
-        self.stop = stop
        self.max_completion_tokens = max_completion_tokens
        self.max_tokens = max_tokens
        self.presence_penalty = presence_penalty
@@ -160,6 +159,14 @@ class LLM:

        litellm.drop_params = True

+        # Normalize self.stop to always be a List[str]
+        if stop is None:
+            self.stop: List[str] = []
+        elif isinstance(stop, str):
+            self.stop = [stop]
+        else:
+            self.stop = stop
+
        self.set_callbacks(callbacks)
        self.set_env_callbacks()

@@ -222,7 +229,7 @@ class LLM:
                ].message
                text_response = response_message.content or ""
                tool_calls = getattr(response_message, "tool_calls", [])
-                
+
                # Ensure callbacks get the full response object with usage info
                if callbacks and len(callbacks) > 0:
                    for callback in callbacks:
--- a/src/crewai/utilities/llm_utils.py
+++ b/src/crewai/utilities/llm_utils.py
@@ -24,12 +24,10 @@ def create_llm(

    # 1) If llm_value is already an LLM object, return it directly
    if isinstance(llm_value, LLM):
-        print("LLM value is already an LLM object")
        return llm_value

    # 2) If llm_value is a string (model name)
    if isinstance(llm_value, str):
-        print("LLM value is a string")
        try:
            created_llm = LLM(model=llm_value)
            return created_llm
@@ -39,12 +37,10 @@ def create_llm(

    # 3) If llm_value is None, parse environment variables or use default
    if llm_value is None:
-        print("LLM value is None")
        return _llm_via_environment_or_fallback()

    # 4) Otherwise, attempt to extract relevant attributes from an unknown object
    try:
-        print("LLM value is an unknown object")
        # Extract attributes with explicit types
        model = (
            getattr(llm_value, "model_name", None)