diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
index ffb1905ef..6bf7c0942 100644
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -62,18 +62,6 @@ except ImportError:
 
 
 if TYPE_CHECKING:
-    from litellm.exceptions import ContextWindowExceededError
-    from litellm.litellm_core_utils.get_supported_openai_params import (
-        get_supported_openai_params,
-    )
-    from litellm.types.utils import (
-        ChatCompletionDeltaToolCall,
-        Choices,
-        Function,
-        ModelResponse,
-    )
-    from litellm.utils import supports_response_schema
-
     from crewai.agent.core import Agent
     from crewai.llms.hooks.base import BaseInterceptor
     from crewai.llms.providers.anthropic.completion import AnthropicThinkingConfig
@@ -83,8 +71,6 @@ if TYPE_CHECKING:
 
 try:
     import litellm
-    from litellm.exceptions import ContextWindowExceededError
-    from litellm.integrations.custom_logger import CustomLogger
     from litellm.litellm_core_utils.get_supported_openai_params import (
         get_supported_openai_params,
     )
@@ -99,15 +85,13 @@ try:
     LITELLM_AVAILABLE = True
 except ImportError:
     LITELLM_AVAILABLE = False
-    litellm = None  # type: ignore
-    Choices = None  # type: ignore
-    ContextWindowExceededError = Exception  # type: ignore
-    get_supported_openai_params = None  # type: ignore
-    ChatCompletionDeltaToolCall = None  # type: ignore
-    Function = None  # type: ignore
-    ModelResponse = None  # type: ignore
-    supports_response_schema = None  # type: ignore
-    CustomLogger = None  # type: ignore
+    litellm = None  # type: ignore[assignment]
+    Choices = None  # type: ignore[assignment, misc]
+    get_supported_openai_params = None  # type: ignore[assignment]
+    ChatCompletionDeltaToolCall = None  # type: ignore[assignment, misc]
+    Function = None  # type: ignore[assignment, misc]
+    ModelResponse = None  # type: ignore[assignment, misc]
+    supports_response_schema = None  # type: ignore[assignment]
 
 
 load_dotenv()
@@ -1009,12 +993,15 @@ class LLM(BaseLLM):
             )
             return full_response
 
-        except ContextWindowExceededError as e:
-            # Catch context window errors from litellm and convert them to our own exception type.
-            # This exception is handled by CrewAgentExecutor._invoke_loop() which can then
-            # decide whether to summarize the content or abort based on the respect_context_window flag.
-            raise LLMContextLengthExceededError(str(e)) from e
+        except LLMContextLengthExceededError:
+            # Re-raise our own context length error
+            raise
         except Exception as e:
+            # Check if this is a context window error and convert to our exception type
+            error_msg = str(e)
+            if LLMContextLengthExceededError._is_context_limit_error(error_msg):
+                raise LLMContextLengthExceededError(error_msg) from e
+
             logging.error(f"Error in streaming response: {e!s}")
             if full_response.strip():
                 logging.warning(f"Returning partial response despite error: {e!s}")
@@ -1195,10 +1182,15 @@ class LLM(BaseLLM):
                 usage_info = response.usage
                 self._track_token_usage_internal(usage_info)
 
-        except ContextWindowExceededError as e:
-            # Convert litellm's context window error to our own exception type
-            # for consistent handling in the rest of the codebase
-            raise LLMContextLengthExceededError(str(e)) from e
+        except LLMContextLengthExceededError:
+            # Re-raise our own context length error
+            raise
+        except Exception as e:
+            # Check if this is a context window error and convert to our exception type
+            error_msg = str(e)
+            if LLMContextLengthExceededError._is_context_limit_error(error_msg):
+                raise LLMContextLengthExceededError(error_msg) from e
+            raise
 
         # --- 2) Handle structured output response (when response_model is provided)
         if response_model is not None:
@@ -1330,8 +1322,15 @@ class LLM(BaseLLM):
                 usage_info = response.usage
                 self._track_token_usage_internal(usage_info)
 
-        except ContextWindowExceededError as e:
-            raise LLMContextLengthExceededError(str(e)) from e
+        except LLMContextLengthExceededError:
+            # Re-raise our own context length error
+            raise
+        except Exception as e:
+            # Check if this is a context window error and convert to our exception type
+            error_msg = str(e)
+            if LLMContextLengthExceededError._is_context_limit_error(error_msg):
+                raise LLMContextLengthExceededError(error_msg) from e
+            raise
 
         if response_model is not None:
             if isinstance(response, BaseModel):
@@ -1548,9 +1547,15 @@ class LLM(BaseLLM):
             )
             return full_response
 
-        except ContextWindowExceededError as e:
-            raise LLMContextLengthExceededError(str(e)) from e
-        except Exception:
+        except LLMContextLengthExceededError:
+            # Re-raise our own context length error
+            raise
+        except Exception as e:
+            # Check if this is a context window error and convert to our exception type
+            error_msg = str(e)
+            if LLMContextLengthExceededError._is_context_limit_error(error_msg):
+                raise LLMContextLengthExceededError(error_msg) from e
+
             if chunk_count == 0:
                 raise
             if full_response:
@@ -2157,7 +2162,15 @@ class LLM(BaseLLM):
           - E.g., "openrouter/deepseek/deepseek-chat" yields "openrouter"
           - "gemini/gemini-1.5-pro" yields "gemini"
           - If no slash is present, "openai" is assumed.
+
+        Note: This validation only applies to the litellm fallback path.
+        Native providers have their own validation.
         """
+        if not LITELLM_AVAILABLE or supports_response_schema is None:
+            # When litellm is not available, skip validation
+            # (this path should only be reached for litellm fallback models)
+            return
+
         provider = self._get_custom_llm_provider()
         if self.response_format is not None and not supports_response_schema(
             model=self.model,
@@ -2169,6 +2182,16 @@ class LLM(BaseLLM):
             )
 
     def supports_function_calling(self) -> bool:
+        """Check if the model supports function calling.
+
+        Note: This method is only used by the litellm fallback path.
+        Native providers override this method with their own implementation.
+        """
+        if not LITELLM_AVAILABLE:
+            # When litellm is not available, assume function calling is supported
+            # (all modern models support it)
+            return True
+
         try:
             provider = self._get_custom_llm_provider()
             return litellm.utils.supports_function_calling(
@@ -2176,15 +2199,24 @@ class LLM(BaseLLM):
             )
         except Exception as e:
             logging.error(f"Failed to check function calling support: {e!s}")
-            return False
+            return True  # Default to True for modern models
 
     def supports_stop_words(self) -> bool:
+        """Check if the model supports stop words.
+
+        Note: This method is only used by the litellm fallback path.
+        Native providers override this method with their own implementation.
+        """
+        if not LITELLM_AVAILABLE or get_supported_openai_params is None:
+            # When litellm is not available, assume stop words are supported
+            return True
+
         try:
             params = get_supported_openai_params(model=self.model)
             return params is not None and "stop" in params
         except Exception as e:
             logging.error(f"Failed to get supported params: {e!s}")
-            return False
+            return True  # Default to True
 
     def get_context_window_size(self) -> int:
         """
@@ -2220,7 +2252,15 @@ class LLM(BaseLLM):
         """
         Attempt to keep a single set of callbacks in litellm by removing old
         duplicates and adding new ones.
+
+        Note: This only affects the litellm fallback path. Native providers
+        don't use litellm callbacks - they emit events via base_llm.py.
         """
+        if not LITELLM_AVAILABLE:
+            # When litellm is not available, callbacks are still stored
+            # but not registered with litellm globals
+            return
+
         with suppress_warnings():
             callback_types = [type(callback) for callback in callbacks]
             for callback in litellm.success_callback[:]:
@@ -2245,6 +2285,9 @@ class LLM(BaseLLM):
         If the environment variables are not set or are empty, the corresponding callback lists
         will be set to empty lists.
 
+        Note: This only affects the litellm fallback path. Native providers
+        don't use litellm callbacks - they emit events via base_llm.py.
+
         Examples:
             LITELLM_SUCCESS_CALLBACKS="langfuse,langsmith"
             LITELLM_FAILURE_CALLBACKS="langfuse"
@@ -2252,9 +2295,13 @@ class LLM(BaseLLM):
         This will set `litellm.success_callback` to ["langfuse", "langsmith"] and
         `litellm.failure_callback` to ["langfuse"].
         """
+        if not LITELLM_AVAILABLE:
+            # When litellm is not available, env callbacks have no effect
+            return
+
         with suppress_warnings():
             success_callbacks_str = os.environ.get("LITELLM_SUCCESS_CALLBACKS", "")
-            success_callbacks: list[str | Callable[..., Any] | CustomLogger] = []
+            success_callbacks: list[str | Callable[..., Any]] = []
             if success_callbacks_str:
                 success_callbacks = [
                     cb.strip() for cb in success_callbacks_str.split(",") if cb.strip()
@@ -2262,7 +2309,7 @@ class LLM(BaseLLM):
 
             failure_callbacks_str = os.environ.get("LITELLM_FAILURE_CALLBACKS", "")
             if failure_callbacks_str:
-                failure_callbacks: list[str | Callable[..., Any] | CustomLogger] = [
+                failure_callbacks: list[str | Callable[..., Any]] = [
                     cb.strip() for cb in failure_callbacks_str.split(",") if cb.strip()
                 ]
 
diff --git a/lib/crewai/src/crewai/utilities/token_counter_callback.py b/lib/crewai/src/crewai/utilities/token_counter_callback.py
index 07c27727a..9c3a5cc5f 100644
--- a/lib/crewai/src/crewai/utilities/token_counter_callback.py
+++ b/lib/crewai/src/crewai/utilities/token_counter_callback.py
@@ -1,37 +1,40 @@
 """Token counting callback handler for LLM interactions.
 
 This module provides a callback handler that tracks token usage
-for LLM API calls through the litellm library.
+for LLM API calls. Works standalone and also integrates with litellm
+when available (for the litellm fallback path).
 """
 
-from typing import TYPE_CHECKING, Any
-
-
-if TYPE_CHECKING:
-    from litellm.integrations.custom_logger import CustomLogger
-    from litellm.types.utils import Usage
-else:
-    try:
-        from litellm.integrations.custom_logger import CustomLogger
-        from litellm.types.utils import Usage
-    except ImportError:
-
-        class CustomLogger:
-            """Fallback CustomLogger when litellm is not available."""
-
-        class Usage:
-            """Fallback Usage when litellm is not available."""
-
+from typing import Any
 
 from crewai.agents.agent_builder.utilities.base_token_process import TokenProcess
 from crewai.utilities.logger_utils import suppress_warnings
 
 
-class TokenCalcHandler(CustomLogger):
+# Check if litellm is available for callback integration
+try:
+    from litellm.integrations.custom_logger import CustomLogger as LiteLLMCustomLogger
+
+    LITELLM_AVAILABLE = True
+except ImportError:
+    LiteLLMCustomLogger = None  # type: ignore[misc, assignment]
+    LITELLM_AVAILABLE = False
+
+
+# Create a base class that conditionally inherits from litellm's CustomLogger
+# when available, or from object when not available
+if LITELLM_AVAILABLE and LiteLLMCustomLogger is not None:
+    _BaseClass: type = LiteLLMCustomLogger
+else:
+    _BaseClass = object
+
+
+class TokenCalcHandler(_BaseClass):  # type: ignore[misc]
     """Handler for calculating and tracking token usage in LLM calls.
 
-    This handler integrates with litellm's logging system to track
-    prompt tokens, completion tokens, and cached tokens across requests.
+    This handler tracks prompt tokens, completion tokens, and cached tokens
+    across requests. It works standalone and also integrates with litellm's
+    logging system when litellm is installed (for the fallback path).
 
     Attributes:
         token_cost_process: The token process tracker to accumulate usage metrics.
@@ -43,7 +46,9 @@ class TokenCalcHandler(CustomLogger):
         Args:
             token_cost_process: Optional token process tracker for accumulating metrics.
         """
-        super().__init__(**kwargs)
+        # Only call super().__init__ if we have a real parent class with __init__
+        if LITELLM_AVAILABLE and LiteLLMCustomLogger is not None:
+            super().__init__(**kwargs)
         self.token_cost_process = token_cost_process
 
     def log_success_event(
@@ -55,6 +60,10 @@ class TokenCalcHandler(CustomLogger):
     ) -> None:
         """Log successful LLM API call and track token usage.
 
+        This method has the same interface as litellm's CustomLogger.log_success_event()
+        so it can be used as a litellm callback when litellm is installed, or called
+        directly when litellm is not installed.
+
         Args:
             kwargs: The arguments passed to the LLM call.
             response_obj: The response object from the LLM API.
@@ -66,7 +75,7 @@ class TokenCalcHandler(CustomLogger):
 
         with suppress_warnings():
             if isinstance(response_obj, dict) and "usage" in response_obj:
-                usage: Usage = response_obj["usage"]
+                usage = response_obj["usage"]
                 if usage:
                     self.token_cost_process.sum_successful_requests(1)
                     if hasattr(usage, "prompt_tokens"):