fix contenxt windown

This commit is contained in:
João Moura
2025-04-24 23:07:47 -07:00
parent 685d20f46c
commit 5b9606e8b6
2 changed files with 60 additions and 5 deletions

View File

@@ -37,6 +37,7 @@ with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning) warnings.simplefilter("ignore", UserWarning)
import litellm import litellm
from litellm import Choices from litellm import Choices
from litellm.exceptions import ContextWindowExceededError
from litellm.litellm_core_utils.get_supported_openai_params import ( from litellm.litellm_core_utils.get_supported_openai_params import (
get_supported_openai_params, get_supported_openai_params,
) )
@@ -597,6 +598,11 @@ class LLM(BaseLLM):
self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL) self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL)
return full_response return full_response
except ContextWindowExceededError as e:
# Catch context window errors from litellm and convert them to our own exception type.
# This exception is handled by CrewAgentExecutor._invoke_loop() which can then
# decide whether to summarize the content or abort based on the respect_context_window flag.
raise LLMContextLengthExceededException(str(e))
except Exception as e: except Exception as e:
logging.error(f"Error in streaming response: {str(e)}") logging.error(f"Error in streaming response: {str(e)}")
if full_response.strip(): if full_response.strip():
@@ -711,7 +717,16 @@ class LLM(BaseLLM):
str: The response text str: The response text
""" """
# --- 1) Make the completion call # --- 1) Make the completion call
response = litellm.completion(**params) try:
# Attempt to make the completion call, but catch context window errors
# and convert them to our own exception type for consistent handling
# across the codebase. This allows CrewAgentExecutor to handle context
# length issues appropriately.
response = litellm.completion(**params)
except ContextWindowExceededError as e:
# Convert litellm's context window error to our own exception type
# for consistent handling in the rest of the codebase
raise LLMContextLengthExceededException(str(e))
# --- 2) Extract response message and content # --- 2) Extract response message and content
response_message = cast(Choices, cast(ModelResponse, response).choices)[ response_message = cast(Choices, cast(ModelResponse, response).choices)[
@@ -870,15 +885,17 @@ class LLM(BaseLLM):
params, callbacks, available_functions params, callbacks, available_functions
) )
except LLMContextLengthExceededException:
# Re-raise LLMContextLengthExceededException as it should be handled
# by the CrewAgentExecutor._invoke_loop method, which can then decide
# whether to summarize the content or abort based on the respect_context_window flag
raise
except Exception as e: except Exception as e:
crewai_event_bus.emit( crewai_event_bus.emit(
self, self,
event=LLMCallFailedEvent(error=str(e)), event=LLMCallFailedEvent(error=str(e)),
) )
if not LLMContextLengthExceededException( logging.error(f"LiteLLM call failed: {str(e)}")
str(e)
)._is_context_limit_error(str(e)):
logging.error(f"LiteLLM call failed: {str(e)}")
raise raise
def _handle_emit_call_events(self, response: Any, call_type: LLMCallType): def _handle_emit_call_events(self, response: Any, call_type: LLMCallType):

View File

@@ -373,6 +373,44 @@ def get_weather_tool_schema():
}, },
} }
def test_context_window_exceeded_error_handling():
"""Test that litellm.ContextWindowExceededError is converted to LLMContextLengthExceededException."""
from litellm.exceptions import ContextWindowExceededError
from crewai.utilities.exceptions.context_window_exceeding_exception import (
LLMContextLengthExceededException,
)
llm = LLM(model="gpt-4")
# Test non-streaming response
with patch("litellm.completion") as mock_completion:
mock_completion.side_effect = ContextWindowExceededError(
"This model's maximum context length is 8192 tokens. However, your messages resulted in 10000 tokens.",
model="gpt-4",
llm_provider="openai"
)
with pytest.raises(LLMContextLengthExceededException) as excinfo:
llm.call("This is a test message")
assert "context length exceeded" in str(excinfo.value).lower()
assert "8192 tokens" in str(excinfo.value)
# Test streaming response
llm = LLM(model="gpt-4", stream=True)
with patch("litellm.completion") as mock_completion:
mock_completion.side_effect = ContextWindowExceededError(
"This model's maximum context length is 8192 tokens. However, your messages resulted in 10000 tokens.",
model="gpt-4",
llm_provider="openai"
)
with pytest.raises(LLMContextLengthExceededException) as excinfo:
llm.call("This is a test message")
assert "context length exceeded" in str(excinfo.value).lower()
assert "8192 tokens" in str(excinfo.value)
@pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.vcr(filter_headers=["authorization"])
@pytest.fixture @pytest.fixture