This commit is contained in:
Brandon Hancock
2025-03-06 10:31:54 -05:00
parent 9c4a03edd6
commit cdb8f68aa6

View File

@@ -33,8 +33,11 @@ with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
import litellm
from litellm import Choices
from litellm.litellm_core_utils.get_supported_openai_params import (
get_supported_openai_params,
)
from litellm.types.utils import ModelResponse
from litellm.utils import get_supported_openai_params, supports_response_schema
from litellm.utils import supports_response_schema
from crewai.utilities.events import crewai_event_bus
@@ -296,6 +299,7 @@ class LLM:
full_response = ""
last_chunk = None
chunk_count = 0
usage_info = None
# --- 2) Make sure stream is set to True and include usage metrics
params["stream"] = True
@@ -310,39 +314,55 @@ class LLM:
# Extract content from the chunk
chunk_content = None
# Handle ModelResponse objects
if isinstance(chunk, ModelResponse):
# Get usage information from the chunk (if any)
usage_info = getattr(chunk, "usage", None)
# Safely extract content from various chunk formats
try:
# Try to access choices safely
choices = None
if isinstance(chunk, dict) and "choices" in chunk:
choices = chunk["choices"]
elif hasattr(chunk, "choices"):
# Check if choices is not a type but an actual attribute with value
if not isinstance(getattr(chunk, "choices"), type):
choices = getattr(chunk, "choices")
# Try to extract usage information if available
if isinstance(chunk, dict) and "usage" in chunk:
usage_info = chunk["usage"]
elif hasattr(chunk, "usage"):
# Check if usage is not a type but an actual attribute with value
if not isinstance(getattr(chunk, "usage"), type):
usage_info = getattr(chunk, "usage")
choices = getattr(chunk, "choices", [])
if choices and len(choices) > 0:
choice = choices[0]
# Handle dictionary-style choices
if isinstance(choice, dict):
delta = choice.get("delta", {})
if (
isinstance(delta, dict)
and "content" in delta
and delta["content"] is not None
):
chunk_content = delta["content"]
# Handle different delta formats
delta = None
if isinstance(choice, dict) and "delta" in choice:
delta = choice["delta"]
elif hasattr(choice, "delta"):
delta = getattr(choice, "delta")
# Handle object-style choices
else:
delta = getattr(choice, "delta", None)
# Extract content from delta
if delta:
# Handle dict format
if isinstance(delta, dict):
if "content" in delta and delta["content"] is not None:
chunk_content = delta["content"]
# Handle object format
elif hasattr(delta, "content"):
chunk_content = getattr(delta, "content")
if delta is not None:
if (
hasattr(delta, "content")
and getattr(delta, "content", None) is not None
):
chunk_content = getattr(delta, "content")
elif isinstance(delta, str):
chunk_content = delta
# Handle case where content might be None or empty
if chunk_content is None and isinstance(delta, dict):
# Some models might send empty content chunks
chunk_content = ""
except Exception as e:
logging.debug(f"Error extracting content from chunk: {e}")
logging.debug(f"Chunk format: {type(chunk)}, content: {chunk}")
if chunk_content:
# Only add non-None content to the response
if chunk_content is not None:
# Add the chunk content to the full response
full_response += chunk_content
@@ -368,47 +388,110 @@ class LLM:
# --- 5) Handle empty response with chunks
if not full_response.strip() and chunk_count > 0:
if last_chunk is not None and isinstance(last_chunk, ModelResponse):
usage_info = getattr(last_chunk, "usage", None)
logging.warning(
f"Received {chunk_count} chunks but no content was extracted"
)
if last_chunk is not None:
try:
# Try to extract content from the last chunk's message
choices = None
if isinstance(last_chunk, dict) and "choices" in last_chunk:
choices = last_chunk["choices"]
elif hasattr(last_chunk, "choices"):
if not isinstance(getattr(last_chunk, "choices"), type):
choices = getattr(last_chunk, "choices")
if choices and len(choices) > 0:
choice = choices[0]
# Try to get content from message
message = None
if isinstance(choice, dict) and "message" in choice:
message = choice["message"]
elif hasattr(choice, "message"):
message = getattr(choice, "message")
if message:
content = None
if isinstance(message, dict) and "content" in message:
content = message["content"]
elif hasattr(message, "content"):
content = getattr(message, "content")
if content:
full_response = content
logging.info(
f"Extracted content from last chunk message: {full_response}"
)
except Exception as e:
logging.debug(f"Error extracting content from last chunk: {e}")
logging.debug(
f"Last chunk format: {type(last_chunk)}, content: {last_chunk}"
)
# --- 6) If still empty, use a default response
if not full_response.strip():
logging.warning("Using default response as fallback")
full_response = "I apologize, but I couldn't generate a proper response. Please try again or rephrase your request."
# --- 7) Check for tool calls in the final response
try:
if last_chunk:
choices = None
if isinstance(last_chunk, dict) and "choices" in last_chunk:
choices = last_chunk["choices"]
elif hasattr(last_chunk, "choices"):
if not isinstance(getattr(last_chunk, "choices"), type):
choices = getattr(last_chunk, "choices")
choices = getattr(last_chunk, "choices", [])
if choices and len(choices) > 0:
choice = choices[0]
message = getattr(choice, "message", None)
if message is not None and getattr(message, "content", None):
full_response = getattr(message, "content")
logging.info(
f"Extracted content from last chunk message: {full_response}"
)
elif getattr(choice, "text", None):
full_response = getattr(choice, "text")
logging.info(
f"Extracted text from last chunk: {full_response}"
)
# --- 6) Check for tool calls in the final response
if isinstance(last_chunk, ModelResponse):
usage_info = getattr(last_chunk, "usage", None)
choices = getattr(last_chunk, "choices", [])
if choices and len(choices) > 0:
choice = choices[0]
message = getattr(choice, "message", None)
if message is not None:
tool_calls = getattr(message, "tool_calls", [])
tool_result = self._handle_tool_call(
tool_calls, available_functions
)
if tool_result is not None:
return tool_result
message = None
if isinstance(choice, dict) and "message" in choice:
message = choice["message"]
elif hasattr(choice, "message"):
message = getattr(choice, "message")
# --- 7) Log token usage if available in streaming mode
if message:
tool_calls = None
if isinstance(message, dict) and "tool_calls" in message:
tool_calls = message["tool_calls"]
elif hasattr(message, "tool_calls"):
tool_calls = getattr(message, "tool_calls")
if tool_calls:
tool_result = self._handle_tool_call(
tool_calls, available_functions
)
if tool_result is not None:
return tool_result
except Exception as e:
logging.debug(f"Error checking for tool calls: {e}")
# --- 8) Log token usage if available in streaming mode
# Safely handle callbacks with usage info
if callbacks and len(callbacks) > 0:
for callback in callbacks:
if hasattr(callback, "log_success_event"):
usage_info = (
getattr(last_chunk, "usage", None) if last_chunk else None
)
# Use the usage_info we've been tracking
if not usage_info:
# Try to get usage from the last chunk if we haven't already
try:
if last_chunk:
if (
isinstance(last_chunk, dict)
and "usage" in last_chunk
):
usage_info = last_chunk["usage"]
elif hasattr(last_chunk, "usage"):
if not isinstance(
getattr(last_chunk, "usage"), type
):
usage_info = getattr(last_chunk, "usage")
except Exception as e:
logging.debug(f"Error extracting usage info: {e}")
if usage_info:
callback.log_success_event(
kwargs=params,
@@ -417,7 +500,7 @@ class LLM:
end_time=0,
)
# --- 8) Emit completion event and return response
# --- 9) Emit completion event and return response
self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL)
return full_response
@@ -614,6 +697,8 @@ class LLM:
# --- 6) Prepare parameters for the completion call
params = self._prepare_completion_params(messages, tools)
print("IS STREAMING", self.stream)
# --- 7) Make the completion call and handle response
if self.stream:
return self._handle_streaming_response(
@@ -697,7 +782,7 @@ class LLM:
return messages
def _get_custom_llm_provider(self) -> str:
def _get_custom_llm_provider(self) -> Optional[str]:
"""
Derives the custom_llm_provider from the model string.
- For example, if the model is "openrouter/deepseek/deepseek-chat", returns "openrouter".
@@ -706,7 +791,7 @@ class LLM:
"""
if "/" in self.model:
return self.model.split("/")[0]
return "openai"
return None
def _validate_call_params(self) -> None:
"""
@@ -729,10 +814,12 @@ class LLM:
def supports_function_calling(self) -> bool:
try:
params = get_supported_openai_params(model=self.model)
return params is not None and "tools" in params
provider = self._get_custom_llm_provider()
return litellm.utils.supports_function_calling(
self.model, custom_llm_provider=provider
)
except Exception as e:
logging.error(f"Failed to get supported params: {str(e)}")
logging.error(f"Failed to check function calling support: {str(e)}")
return False
def supports_stop_words(self) -> bool: