diff --git a/src/crewai/llm.py b/src/crewai/llm.py index e15ea61ea..f61b5b0c3 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -33,8 +33,11 @@ with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) import litellm from litellm import Choices + from litellm.litellm_core_utils.get_supported_openai_params import ( + get_supported_openai_params, + ) from litellm.types.utils import ModelResponse - from litellm.utils import get_supported_openai_params, supports_response_schema + from litellm.utils import supports_response_schema from crewai.utilities.events import crewai_event_bus @@ -296,6 +299,7 @@ class LLM: full_response = "" last_chunk = None chunk_count = 0 + usage_info = None # --- 2) Make sure stream is set to True and include usage metrics params["stream"] = True @@ -310,39 +314,55 @@ class LLM: # Extract content from the chunk chunk_content = None - # Handle ModelResponse objects - if isinstance(chunk, ModelResponse): - # Get usage information from the chunk (if any) - usage_info = getattr(chunk, "usage", None) + # Safely extract content from various chunk formats + try: + # Try to access choices safely + choices = None + if isinstance(chunk, dict) and "choices" in chunk: + choices = chunk["choices"] + elif hasattr(chunk, "choices"): + # Check if choices is not a type but an actual attribute with value + if not isinstance(getattr(chunk, "choices"), type): + choices = getattr(chunk, "choices") + + # Try to extract usage information if available + if isinstance(chunk, dict) and "usage" in chunk: + usage_info = chunk["usage"] + elif hasattr(chunk, "usage"): + # Check if usage is not a type but an actual attribute with value + if not isinstance(getattr(chunk, "usage"), type): + usage_info = getattr(chunk, "usage") - choices = getattr(chunk, "choices", []) if choices and len(choices) > 0: choice = choices[0] - # Handle dictionary-style choices - if isinstance(choice, dict): - delta = choice.get("delta", {}) - if ( - isinstance(delta, dict) - and "content" in delta - and delta["content"] is not None - ): - chunk_content = delta["content"] + # Handle different delta formats + delta = None + if isinstance(choice, dict) and "delta" in choice: + delta = choice["delta"] + elif hasattr(choice, "delta"): + delta = getattr(choice, "delta") - # Handle object-style choices - else: - delta = getattr(choice, "delta", None) + # Extract content from delta + if delta: + # Handle dict format + if isinstance(delta, dict): + if "content" in delta and delta["content"] is not None: + chunk_content = delta["content"] + # Handle object format + elif hasattr(delta, "content"): + chunk_content = getattr(delta, "content") - if delta is not None: - if ( - hasattr(delta, "content") - and getattr(delta, "content", None) is not None - ): - chunk_content = getattr(delta, "content") - elif isinstance(delta, str): - chunk_content = delta + # Handle case where content might be None or empty + if chunk_content is None and isinstance(delta, dict): + # Some models might send empty content chunks + chunk_content = "" + except Exception as e: + logging.debug(f"Error extracting content from chunk: {e}") + logging.debug(f"Chunk format: {type(chunk)}, content: {chunk}") - if chunk_content: + # Only add non-None content to the response + if chunk_content is not None: # Add the chunk content to the full response full_response += chunk_content @@ -368,47 +388,110 @@ class LLM: # --- 5) Handle empty response with chunks if not full_response.strip() and chunk_count > 0: - if last_chunk is not None and isinstance(last_chunk, ModelResponse): - usage_info = getattr(last_chunk, "usage", None) + logging.warning( + f"Received {chunk_count} chunks but no content was extracted" + ) + if last_chunk is not None: + try: + # Try to extract content from the last chunk's message + choices = None + if isinstance(last_chunk, dict) and "choices" in last_chunk: + choices = last_chunk["choices"] + elif hasattr(last_chunk, "choices"): + if not isinstance(getattr(last_chunk, "choices"), type): + choices = getattr(last_chunk, "choices") + + if choices and len(choices) > 0: + choice = choices[0] + + # Try to get content from message + message = None + if isinstance(choice, dict) and "message" in choice: + message = choice["message"] + elif hasattr(choice, "message"): + message = getattr(choice, "message") + + if message: + content = None + if isinstance(message, dict) and "content" in message: + content = message["content"] + elif hasattr(message, "content"): + content = getattr(message, "content") + + if content: + full_response = content + logging.info( + f"Extracted content from last chunk message: {full_response}" + ) + except Exception as e: + logging.debug(f"Error extracting content from last chunk: {e}") + logging.debug( + f"Last chunk format: {type(last_chunk)}, content: {last_chunk}" + ) + + # --- 6) If still empty, use a default response + if not full_response.strip(): + logging.warning("Using default response as fallback") + full_response = "I apologize, but I couldn't generate a proper response. Please try again or rephrase your request." + + # --- 7) Check for tool calls in the final response + try: + if last_chunk: + choices = None + if isinstance(last_chunk, dict) and "choices" in last_chunk: + choices = last_chunk["choices"] + elif hasattr(last_chunk, "choices"): + if not isinstance(getattr(last_chunk, "choices"), type): + choices = getattr(last_chunk, "choices") - choices = getattr(last_chunk, "choices", []) if choices and len(choices) > 0: choice = choices[0] - message = getattr(choice, "message", None) - if message is not None and getattr(message, "content", None): - full_response = getattr(message, "content") - logging.info( - f"Extracted content from last chunk message: {full_response}" - ) - elif getattr(choice, "text", None): - full_response = getattr(choice, "text") - logging.info( - f"Extracted text from last chunk: {full_response}" - ) - # --- 6) Check for tool calls in the final response - if isinstance(last_chunk, ModelResponse): - usage_info = getattr(last_chunk, "usage", None) - choices = getattr(last_chunk, "choices", []) - if choices and len(choices) > 0: - choice = choices[0] - message = getattr(choice, "message", None) - if message is not None: - tool_calls = getattr(message, "tool_calls", []) - tool_result = self._handle_tool_call( - tool_calls, available_functions - ) - if tool_result is not None: - return tool_result + message = None + if isinstance(choice, dict) and "message" in choice: + message = choice["message"] + elif hasattr(choice, "message"): + message = getattr(choice, "message") - # --- 7) Log token usage if available in streaming mode + if message: + tool_calls = None + if isinstance(message, dict) and "tool_calls" in message: + tool_calls = message["tool_calls"] + elif hasattr(message, "tool_calls"): + tool_calls = getattr(message, "tool_calls") + + if tool_calls: + tool_result = self._handle_tool_call( + tool_calls, available_functions + ) + if tool_result is not None: + return tool_result + except Exception as e: + logging.debug(f"Error checking for tool calls: {e}") + + # --- 8) Log token usage if available in streaming mode # Safely handle callbacks with usage info if callbacks and len(callbacks) > 0: for callback in callbacks: if hasattr(callback, "log_success_event"): - usage_info = ( - getattr(last_chunk, "usage", None) if last_chunk else None - ) + # Use the usage_info we've been tracking + if not usage_info: + # Try to get usage from the last chunk if we haven't already + try: + if last_chunk: + if ( + isinstance(last_chunk, dict) + and "usage" in last_chunk + ): + usage_info = last_chunk["usage"] + elif hasattr(last_chunk, "usage"): + if not isinstance( + getattr(last_chunk, "usage"), type + ): + usage_info = getattr(last_chunk, "usage") + except Exception as e: + logging.debug(f"Error extracting usage info: {e}") + if usage_info: callback.log_success_event( kwargs=params, @@ -417,7 +500,7 @@ class LLM: end_time=0, ) - # --- 8) Emit completion event and return response + # --- 9) Emit completion event and return response self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL) return full_response @@ -614,6 +697,8 @@ class LLM: # --- 6) Prepare parameters for the completion call params = self._prepare_completion_params(messages, tools) + print("IS STREAMING", self.stream) + # --- 7) Make the completion call and handle response if self.stream: return self._handle_streaming_response( @@ -697,7 +782,7 @@ class LLM: return messages - def _get_custom_llm_provider(self) -> str: + def _get_custom_llm_provider(self) -> Optional[str]: """ Derives the custom_llm_provider from the model string. - For example, if the model is "openrouter/deepseek/deepseek-chat", returns "openrouter". @@ -706,7 +791,7 @@ class LLM: """ if "/" in self.model: return self.model.split("/")[0] - return "openai" + return None def _validate_call_params(self) -> None: """ @@ -729,10 +814,12 @@ class LLM: def supports_function_calling(self) -> bool: try: - params = get_supported_openai_params(model=self.model) - return params is not None and "tools" in params + provider = self._get_custom_llm_provider() + return litellm.utils.supports_function_calling( + self.model, custom_llm_provider=provider + ) except Exception as e: - logging.error(f"Failed to get supported params: {str(e)}") + logging.error(f"Failed to check function calling support: {str(e)}") return False def supports_stop_words(self) -> bool: