mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-03 00:02:36 +00:00
Adding usage info in llm.py (#4172)
* Adding usage info everywhere * Changing the check * Changing the logic * Adding tests * Adding casellets * Minor change * Fixing testcase * remove the duplicated test case, thanks to cursor * Adding async test cases * Updating test case --------- Co-authored-by: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com>
This commit is contained in:
@@ -925,11 +925,12 @@ class LLM(BaseLLM):
|
||||
except Exception as e:
|
||||
logging.debug(f"Error checking for tool calls: {e}")
|
||||
|
||||
# Track token usage and log callbacks if available in streaming mode
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||
|
||||
if not tool_calls or not available_functions:
|
||||
# Track token usage and log callbacks if available in streaming mode
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||
|
||||
if response_model and self.is_litellm:
|
||||
instructor_instance = InternalInstructor(
|
||||
@@ -962,12 +963,7 @@ class LLM(BaseLLM):
|
||||
if tool_result is not None:
|
||||
return tool_result
|
||||
|
||||
# --- 10) Track token usage and log callbacks if available in streaming mode
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||
|
||||
# --- 11) Emit completion event and return response
|
||||
# --- 10) Emit completion event and return response
|
||||
self._handle_emit_call_events(
|
||||
response=full_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
@@ -1148,6 +1144,10 @@ class LLM(BaseLLM):
|
||||
if response_model:
|
||||
params["response_model"] = response_model
|
||||
response = litellm.completion(**params)
|
||||
|
||||
if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage:
|
||||
usage_info = response.usage
|
||||
self._track_token_usage_internal(usage_info)
|
||||
|
||||
except ContextWindowExceededError as e:
|
||||
# Convert litellm's context window error to our own exception type
|
||||
@@ -1273,6 +1273,10 @@ class LLM(BaseLLM):
|
||||
params["response_model"] = response_model
|
||||
response = await litellm.acompletion(**params)
|
||||
|
||||
if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage:
|
||||
usage_info = response.usage
|
||||
self._track_token_usage_internal(usage_info)
|
||||
|
||||
except ContextWindowExceededError as e:
|
||||
raise LLMContextLengthExceededError(str(e)) from e
|
||||
|
||||
@@ -1359,6 +1363,7 @@ class LLM(BaseLLM):
|
||||
"""
|
||||
full_response = ""
|
||||
chunk_count = 0
|
||||
|
||||
usage_info = None
|
||||
|
||||
accumulated_tool_args: defaultdict[int, AccumulatedToolArgs] = defaultdict(
|
||||
@@ -1444,6 +1449,9 @@ class LLM(BaseLLM):
|
||||
end_time=0,
|
||||
)
|
||||
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
|
||||
if accumulated_tool_args and available_functions:
|
||||
# Convert accumulated tool args to ChatCompletionDeltaToolCall objects
|
||||
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
|
||||
|
||||
Reference in New Issue
Block a user