mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 08:38:30 +00:00
feat: add prompt observability code (#2027)
* feat: add prompt observability code * feat: improve logic for llm call * feat: add tests for traces * feat: remove unused improt * feat: add function to clear and add task traces * feat: fix import * feat: chagne time * feat: fix type checking issues * feat: add fixed time to fix test * feat: fix datetime test issue * feat: add add task traces function * feat: add same logic as entp * feat: add start_time as reference for duplication of tool call * feat: add max_depth * feat: add protocols file to properly import on LLM --------- Co-authored-by: Brandon Hancock (bhancock_ai) <109994880+bhancockio@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
1cb5f57864
commit
90f1bee602
@@ -1,3 +1,4 @@
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -5,7 +6,17 @@ import sys
|
||||
import threading
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Dict, List, Literal, Optional, Type, Union, cast
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel
|
||||
@@ -18,9 +29,11 @@ with warnings.catch_warnings():
|
||||
from litellm.utils import supports_response_schema
|
||||
|
||||
|
||||
from crewai.traces.unified_trace_controller import trace_llm_call
|
||||
from crewai.utilities.exceptions.context_window_exceeding_exception import (
|
||||
LLMContextLengthExceededException,
|
||||
)
|
||||
from crewai.utilities.protocols import AgentExecutorProtocol
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -164,6 +177,7 @@ class LLM:
|
||||
self.context_window_size = 0
|
||||
self.reasoning_effort = reasoning_effort
|
||||
self.additional_params = kwargs
|
||||
self._message_history: List[Dict[str, str]] = []
|
||||
self.is_anthropic = self._is_anthropic_model(model)
|
||||
|
||||
litellm.drop_params = True
|
||||
@@ -179,16 +193,22 @@ class LLM:
|
||||
self.set_callbacks(callbacks)
|
||||
self.set_env_callbacks()
|
||||
|
||||
@trace_llm_call
|
||||
def _call_llm(self, params: Dict[str, Any]) -> Any:
|
||||
with suppress_warnings():
|
||||
response = litellm.completion(**params)
|
||||
return response
|
||||
|
||||
def _is_anthropic_model(self, model: str) -> bool:
|
||||
"""Determine if the model is from Anthropic provider.
|
||||
|
||||
|
||||
Args:
|
||||
model: The model identifier string.
|
||||
|
||||
|
||||
Returns:
|
||||
bool: True if the model is from Anthropic, False otherwise.
|
||||
"""
|
||||
ANTHROPIC_PREFIXES = ('anthropic/', 'claude-', 'claude/')
|
||||
ANTHROPIC_PREFIXES = ("anthropic/", "claude-", "claude/")
|
||||
return any(prefix in model.lower() for prefix in ANTHROPIC_PREFIXES)
|
||||
|
||||
def call(
|
||||
@@ -199,7 +219,7 @@ class LLM:
|
||||
available_functions: Optional[Dict[str, Any]] = None,
|
||||
) -> Union[str, Any]:
|
||||
"""High-level LLM call method.
|
||||
|
||||
|
||||
Args:
|
||||
messages: Input messages for the LLM.
|
||||
Can be a string or list of message dictionaries.
|
||||
@@ -211,22 +231,22 @@ class LLM:
|
||||
during and after the LLM call.
|
||||
available_functions: Optional dict mapping function names to callables
|
||||
that can be invoked by the LLM.
|
||||
|
||||
|
||||
Returns:
|
||||
Union[str, Any]: Either a text response from the LLM (str) or
|
||||
the result of a tool function call (Any).
|
||||
|
||||
|
||||
Raises:
|
||||
TypeError: If messages format is invalid
|
||||
ValueError: If response format is not supported
|
||||
LLMContextLengthExceededException: If input exceeds model's context limit
|
||||
|
||||
|
||||
Examples:
|
||||
# Example 1: Simple string input
|
||||
>>> response = llm.call("Return the name of a random city.")
|
||||
>>> print(response)
|
||||
"Paris"
|
||||
|
||||
|
||||
# Example 2: Message list with system and user messages
|
||||
>>> messages = [
|
||||
... {"role": "system", "content": "You are a geography expert"},
|
||||
@@ -288,7 +308,7 @@ class LLM:
|
||||
params = {k: v for k, v in params.items() if v is not None}
|
||||
|
||||
# --- 2) Make the completion call
|
||||
response = litellm.completion(**params)
|
||||
response = self._call_llm(params)
|
||||
response_message = cast(Choices, cast(ModelResponse, response).choices)[
|
||||
0
|
||||
].message
|
||||
@@ -348,36 +368,40 @@ class LLM:
|
||||
logging.error(f"LiteLLM call failed: {str(e)}")
|
||||
raise
|
||||
|
||||
def _format_messages_for_provider(self, messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
|
||||
def _format_messages_for_provider(
|
||||
self, messages: List[Dict[str, str]]
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Format messages according to provider requirements.
|
||||
|
||||
|
||||
Args:
|
||||
messages: List of message dictionaries with 'role' and 'content' keys.
|
||||
Can be empty or None.
|
||||
|
||||
|
||||
Returns:
|
||||
List of formatted messages according to provider requirements.
|
||||
For Anthropic models, ensures first message has 'user' role.
|
||||
|
||||
|
||||
Raises:
|
||||
TypeError: If messages is None or contains invalid message format.
|
||||
"""
|
||||
if messages is None:
|
||||
raise TypeError("Messages cannot be None")
|
||||
|
||||
|
||||
# Validate message format first
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict) or "role" not in msg or "content" not in msg:
|
||||
raise TypeError("Invalid message format. Each message must be a dict with 'role' and 'content' keys")
|
||||
|
||||
raise TypeError(
|
||||
"Invalid message format. Each message must be a dict with 'role' and 'content' keys"
|
||||
)
|
||||
|
||||
if not self.is_anthropic:
|
||||
return messages
|
||||
|
||||
|
||||
# Anthropic requires messages to start with 'user' role
|
||||
if not messages or messages[0]["role"] == "system":
|
||||
# If first message is system or empty, add a placeholder user message
|
||||
return [{"role": "user", "content": "."}, *messages]
|
||||
|
||||
|
||||
return messages
|
||||
|
||||
def _get_custom_llm_provider(self) -> str:
|
||||
@@ -495,3 +519,95 @@ class LLM:
|
||||
|
||||
litellm.success_callback = success_callbacks
|
||||
litellm.failure_callback = failure_callbacks
|
||||
|
||||
def _get_execution_context(self) -> Tuple[Optional[Any], Optional[Any]]:
|
||||
"""Get the agent and task from the execution context.
|
||||
|
||||
Returns:
|
||||
tuple: (agent, task) from any AgentExecutor context, or (None, None) if not found
|
||||
"""
|
||||
frame = inspect.currentframe()
|
||||
caller_frame = frame.f_back if frame else None
|
||||
agent = None
|
||||
task = None
|
||||
|
||||
# Add a maximum depth to prevent infinite loops
|
||||
max_depth = 100 # Reasonable limit for call stack depth
|
||||
current_depth = 0
|
||||
|
||||
while caller_frame and current_depth < max_depth:
|
||||
if "self" in caller_frame.f_locals:
|
||||
caller_self = caller_frame.f_locals["self"]
|
||||
if isinstance(caller_self, AgentExecutorProtocol):
|
||||
agent = caller_self.agent
|
||||
task = caller_self.task
|
||||
break
|
||||
caller_frame = caller_frame.f_back
|
||||
current_depth += 1
|
||||
|
||||
return agent, task
|
||||
|
||||
def _get_new_messages(self, messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
|
||||
"""Get only the new messages that haven't been processed before."""
|
||||
if not hasattr(self, "_message_history"):
|
||||
self._message_history = []
|
||||
|
||||
new_messages = []
|
||||
for message in messages:
|
||||
message_key = (message["role"], message["content"])
|
||||
if message_key not in [
|
||||
(m["role"], m["content"]) for m in self._message_history
|
||||
]:
|
||||
new_messages.append(message)
|
||||
self._message_history.append(message)
|
||||
return new_messages
|
||||
|
||||
def _get_new_tool_results(self, agent) -> List[Dict]:
|
||||
"""Get only the new tool results that haven't been processed before."""
|
||||
if not agent or not agent.tools_results:
|
||||
return []
|
||||
|
||||
if not hasattr(self, "_tool_results_history"):
|
||||
self._tool_results_history: List[Dict] = []
|
||||
|
||||
new_tool_results = []
|
||||
|
||||
for result in agent.tools_results:
|
||||
# Process tool arguments to extract actual values
|
||||
processed_args = {}
|
||||
if isinstance(result["tool_args"], dict):
|
||||
for key, value in result["tool_args"].items():
|
||||
if isinstance(value, dict) and "type" in value:
|
||||
# Skip metadata and just store the actual value
|
||||
continue
|
||||
processed_args[key] = value
|
||||
|
||||
# Create a clean result with processed arguments
|
||||
clean_result = {
|
||||
"tool_name": result["tool_name"],
|
||||
"tool_args": processed_args,
|
||||
"result": result["result"],
|
||||
"content": result.get("content", ""),
|
||||
"start_time": result.get("start_time", ""),
|
||||
}
|
||||
|
||||
# Check if this exact tool execution exists in history
|
||||
is_duplicate = False
|
||||
for history_result in self._tool_results_history:
|
||||
if (
|
||||
clean_result["tool_name"] == history_result["tool_name"]
|
||||
and str(clean_result["tool_args"])
|
||||
== str(history_result["tool_args"])
|
||||
and str(clean_result["result"]) == str(history_result["result"])
|
||||
and clean_result["content"] == history_result.get("content", "")
|
||||
and clean_result["start_time"]
|
||||
== history_result.get("start_time", "")
|
||||
):
|
||||
is_duplicate = True
|
||||
break
|
||||
|
||||
if not is_duplicate:
|
||||
new_tool_results.append(clean_result)
|
||||
self._tool_results_history.append(clean_result)
|
||||
|
||||
return new_tool_results
|
||||
|
||||
Reference in New Issue
Block a user