Compare commits

..

3 Commits

Author SHA1 Message Date
Devin AI
2dcaddd29f refactor: enhance LLM validation and error handling
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-09 22:22:45 +00:00
Devin AI
fafcd1d27a test: update test cassettes and dependencies
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-09 22:12:40 +00:00
Devin AI
f3a681c7d9 feat: enable custom LLM support for Crew.test()
- Add llm parameter to Crew.test() that accepts string or LLM instance
- Maintain backward compatibility with openai_model_name parameter
- Update CrewEvaluator to handle any LLM implementation
- Add comprehensive test coverage

Fixes #2076

Co-Authored-By: Joe Moura <joao@crewai.com>
2025-02-09 22:12:04 +00:00
27 changed files with 398 additions and 458 deletions

View File

@@ -294,7 +294,14 @@ class Agent(BaseAgent):
)
if self.crew and self.crew.memory:
memory = self.crew.contextual_memory.build_context_for_task(task, context)
contextual_memory = ContextualMemory(
self.crew.memory_config,
self.crew._short_term_memory,
self.crew._long_term_memory,
self.crew._entity_memory,
self.crew._user_memory,
)
memory = contextual_memory.build_context_for_task(task, context)
if memory.strip() != "":
task_prompt += self.i18n.slice("memory").format(memory=memory)

View File

@@ -358,9 +358,9 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
if self.crew is not None and hasattr(self.crew, "_train_iteration"):
train_iteration = self.crew._train_iteration
if agent_id in training_data and isinstance(train_iteration, int):
training_data[agent_id][train_iteration]["improved_output"] = (
result.output
)
training_data[agent_id][train_iteration][
"improved_output"
] = result.output
training_handler.save(training_data)
else:
self._printer.print(

View File

@@ -153,12 +153,8 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
login_response_json = login_response.json()
settings = Settings()
settings.tool_repository_username = login_response_json["credential"][
"username"
]
settings.tool_repository_password = login_response_json["credential"][
"password"
]
settings.tool_repository_username = login_response_json["credential"]["username"]
settings.tool_repository_password = login_response_json["credential"]["password"]
settings.dump()
console.print(
@@ -183,7 +179,7 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
capture_output=False,
env=self._build_env_with_credentials(repository_handle),
text=True,
check=True,
check=True
)
if add_package_result.stderr:
@@ -208,11 +204,7 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
settings = Settings()
env = os.environ.copy()
env[f"UV_INDEX_{repository_handle}_USERNAME"] = str(
settings.tool_repository_username or ""
)
env[f"UV_INDEX_{repository_handle}_PASSWORD"] = str(
settings.tool_repository_password or ""
)
env[f"UV_INDEX_{repository_handle}_USERNAME"] = str(settings.tool_repository_username or "")
env[f"UV_INDEX_{repository_handle}_PASSWORD"] = str(settings.tool_repository_password or "")
return env

View File

@@ -25,7 +25,6 @@ from crewai.crews.crew_output import CrewOutput
from crewai.knowledge.knowledge import Knowledge
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.llm import LLM
from crewai.memory.contextual.contextual_memory import ContextualMemory
from crewai.memory.entity.entity_memory import EntityMemory
from crewai.memory.long_term.long_term_memory import LongTermMemory
from crewai.memory.short_term.short_term_memory import ShortTermMemory
@@ -279,13 +278,6 @@ class Crew(BaseModel):
)
else:
self._user_memory = None
self.contextual_memory = ContextualMemory(
memory_config=self.memory_config,
stm=self._short_term_memory,
ltm=self._long_term_memory,
em=self._entity_memory,
um=self._user_memory,
)
return self
@model_validator(mode="after")
@@ -1083,19 +1075,36 @@ class Crew(BaseModel):
def test(
self,
n_iterations: int,
llm: Optional[Union[str, LLM]] = None,
openai_model_name: Optional[str] = None,
inputs: Optional[Dict[str, Any]] = None,
) -> None:
"""Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures."""
"""Test and evaluate the Crew with the given inputs for n iterations.
Args:
n_iterations: Number of iterations to run
llm: LLM instance or model name to use for evaluation
openai_model_name: (Deprecated) OpenAI model name for backward compatibility
inputs: Optional inputs for the crew
Raises:
ValueError: If llm parameter is neither a string nor LLM instance
"""
if llm and not isinstance(llm, (str, LLM)):
raise ValueError("llm parameter must be either a string model name or LLM instance")
test_crew = self.copy()
# Handle backward compatibility
if openai_model_name:
llm = openai_model_name
self._test_execution_span = test_crew._telemetry.test_execution_span(
test_crew,
n_iterations,
inputs,
openai_model_name, # type: ignore[arg-type]
) # type: ignore[arg-type]
evaluator = CrewEvaluator(test_crew, openai_model_name) # type: ignore[arg-type]
str(llm) if isinstance(llm, str) else (llm.model if llm else None),
)
evaluator = CrewEvaluator(test_crew, llm)
for i in range(1, n_iterations + 1):
evaluator.set_iteration(i)

View File

@@ -14,13 +14,13 @@ class Knowledge(BaseModel):
Knowledge is a collection of sources and setup for the vector store to save and query relevant context.
Args:
sources: List[BaseKnowledgeSource] = Field(default_factory=list)
storage: Optional[KnowledgeStorage] = Field(default=None)
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
embedder_config: Optional[Dict[str, Any]] = None
"""
sources: List[BaseKnowledgeSource] = Field(default_factory=list)
model_config = ConfigDict(arbitrary_types_allowed=True)
storage: Optional[KnowledgeStorage] = Field(default=None)
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
embedder_config: Optional[Dict[str, Any]] = None
collection_name: Optional[str] = None
@@ -49,13 +49,8 @@ class Knowledge(BaseModel):
"""
Query across all knowledge sources to find the most relevant information.
Returns the top_k most relevant chunks.
Raises:
ValueError: If storage is not initialized.
"""
if self.storage is None:
raise ValueError("Storage is not initialized.")
results = self.storage.search(
query,
limit,

View File

@@ -22,7 +22,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
default_factory=list, description="The path to the file"
)
content: Dict[Path, str] = Field(init=False, default_factory=dict)
storage: Optional[KnowledgeStorage] = Field(default=None)
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
safe_file_paths: List[Path] = Field(default_factory=list)
@field_validator("file_path", "file_paths", mode="before")
@@ -62,10 +62,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
def _save_documents(self):
"""Save the documents to the storage."""
if self.storage:
self.storage.save(self.chunks)
else:
raise ValueError("No storage found to save documents.")
self.storage.save(self.chunks)
def convert_to_path(self, path: Union[Path, str]) -> Path:
"""Convert a path to a Path object."""

View File

@@ -16,7 +16,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
chunk_embeddings: List[np.ndarray] = Field(default_factory=list)
model_config = ConfigDict(arbitrary_types_allowed=True)
storage: Optional[KnowledgeStorage] = Field(default=None)
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
metadata: Dict[str, Any] = Field(default_factory=dict) # Currently unused
collection_name: Optional[str] = Field(default=None)
@@ -46,7 +46,4 @@ class BaseKnowledgeSource(BaseModel, ABC):
Save the documents to the storage.
This method should be called after the chunks and embeddings are generated.
"""
if self.storage:
self.storage.save(self.chunks)
else:
raise ValueError("No storage found to save documents.")
self.storage.save(self.chunks)

View File

@@ -1,5 +1,4 @@
from typing import Any, Dict, Optional
from crewai.task import Task
from crewai.memory import EntityMemory, LongTermMemory, ShortTermMemory, UserMemory
@@ -11,7 +10,7 @@ class ContextualMemory:
stm: ShortTermMemory,
ltm: LongTermMemory,
em: EntityMemory,
um: Optional[UserMemory],
um: UserMemory,
):
if memory_config is not None:
self.memory_provider = memory_config.get("provider")
@@ -22,7 +21,7 @@ class ContextualMemory:
self.em = em
self.um = um
def build_context_for_task(self, task: Task, context: str) -> str:
def build_context_for_task(self, task, context) -> str:
"""
Automatically builds a minimal, highly relevant set of contextual information
for a given task.
@@ -40,7 +39,7 @@ class ContextualMemory:
context.append(self._fetch_user_context(query))
return "\n".join(filter(None, context))
def _fetch_stm_context(self, query: str) -> str:
def _fetch_stm_context(self, query) -> str:
"""
Fetches recent relevant insights from STM related to the task's description and expected_output,
formatted as bullet points.
@@ -54,7 +53,7 @@ class ContextualMemory:
)
return f"Recent Insights:\n{formatted_results}" if stm_results else ""
def _fetch_ltm_context(self, task: str) -> Optional[str]:
def _fetch_ltm_context(self, task) -> Optional[str]:
"""
Fetches historical data or insights from LTM that are relevant to the task's description and expected_output,
formatted as bullet points.
@@ -73,7 +72,7 @@ class ContextualMemory:
return f"Historical Data:\n{formatted_results}" if ltm_results else ""
def _fetch_entity_context(self, query: str) -> str:
def _fetch_entity_context(self, query) -> str:
"""
Fetches relevant entity information from Entity Memory related to the task's description and expected_output,
formatted as bullet points.
@@ -95,8 +94,6 @@ class ContextualMemory:
Returns:
str: Formatted user memories as bullet points, or an empty string if none found.
"""
if not self.um:
return ""
user_memories = self.um.search(query)
if not user_memories:
return ""

View File

@@ -11,7 +11,7 @@ class EntityMemory(Memory):
"""
def __init__(self, crew=None, embedder_config=None, storage=None, path=None):
if crew and hasattr(crew, "memory_config") and crew.memory_config is not None:
if hasattr(crew, "memory_config") and crew.memory_config is not None:
self.memory_provider = crew.memory_config.get("provider")
else:
self.memory_provider = None

View File

@@ -15,17 +15,8 @@ class LongTermMemory(Memory):
"""
def __init__(self, storage=None, path=None):
"""Initialize long term memory.
Args:
storage: Optional custom storage instance
path: Optional custom path for storage location
Note:
If both storage and path are provided, storage takes precedence
"""
if not storage:
storage = LTMSQLiteStorage(storage_path=path) if path else LTMSQLiteStorage()
storage = LTMSQLiteStorage(db_path=path) if path else LTMSQLiteStorage()
super().__init__(storage)
def save(self, item: LongTermMemoryItem) -> None: # type: ignore # BUG?: Signature of "save" incompatible with supertype "Memory"

View File

@@ -15,7 +15,7 @@ class ShortTermMemory(Memory):
"""
def __init__(self, crew=None, embedder_config=None, storage=None, path=None):
if crew and hasattr(crew, "memory_config") and crew.memory_config is not None:
if hasattr(crew, "memory_config") and crew.memory_config is not None:
self.memory_provider = crew.memory_config.get("provider")
else:
self.memory_provider = None

View File

@@ -1,11 +1,5 @@
from abc import ABC, abstractmethod
from pathlib import Path
import os
from typing import Any, Dict, List, Optional, TypeVar
from abc import ABC, abstractmethod
from pathlib import Path
from crewai.utilities.paths import get_default_storage_path
from typing import Any, Dict, List, Optional
class BaseRAGStorage(ABC):
@@ -18,46 +12,17 @@ class BaseRAGStorage(ABC):
def __init__(
self,
type: str,
storage_path: Optional[Path] = None,
allow_reset: bool = True,
embedder_config: Optional[Any] = None,
crew: Any = None,
) -> None:
"""Initialize the BaseRAGStorage.
Args:
type: Type of storage being used
storage_path: Optional custom path for storage location
allow_reset: Whether storage can be reset
embedder_config: Optional configuration for the embedder
crew: Optional crew instance this storage belongs to
Raises:
PermissionError: If storage path is not writable
OSError: If storage path cannot be created
"""
):
self.type = type
self.storage_path = storage_path if storage_path else get_default_storage_path('rag')
# Validate storage path
try:
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
if not os.access(self.storage_path.parent, os.W_OK):
raise PermissionError(f"No write permission for storage path: {self.storage_path}")
except OSError as e:
raise OSError(f"Failed to initialize storage path: {str(e)}")
self.allow_reset = allow_reset
self.embedder_config = embedder_config
self.crew = crew
self.agents = self._initialize_agents()
def _initialize_agents(self) -> str:
"""Initialize agent identifiers for storage.
Returns:
str: Underscore-joined string of sanitized agent role names
"""
if self.crew:
return "_".join(
[self._sanitize_role(agent.role) for agent in self.crew.agents]
@@ -66,27 +31,12 @@ class BaseRAGStorage(ABC):
@abstractmethod
def _sanitize_role(self, role: str) -> str:
"""Sanitizes agent roles to ensure valid directory names.
Args:
role: The agent role name to sanitize
Returns:
str: Sanitized role name safe for use in paths
"""
"""Sanitizes agent roles to ensure valid directory names."""
pass
@abstractmethod
def save(self, value: Any, metadata: Dict[str, Any]) -> None:
"""Save a value with metadata to the storage.
Args:
value: The value to store
metadata: Additional metadata to store with the value
Raises:
OSError: If there is an error writing to storage
"""
"""Save a value with metadata to the storage."""
pass
@abstractmethod
@@ -96,55 +46,25 @@ class BaseRAGStorage(ABC):
limit: int = 3,
filter: Optional[dict] = None,
score_threshold: float = 0.35,
) -> List[Dict[str, Any]]:
"""Search for entries in the storage.
Args:
query: The search query string
limit: Maximum number of results to return
filter: Optional filter criteria
score_threshold: Minimum similarity score threshold
Returns:
List[Dict[str, Any]]: List of matching entries with their metadata
"""
) -> List[Any]:
"""Search for entries in the storage."""
pass
@abstractmethod
def reset(self) -> None:
"""Reset the storage.
Raises:
OSError: If there is an error clearing storage
PermissionError: If reset is not allowed
"""
"""Reset the storage."""
pass
@abstractmethod
def _generate_embedding(
self, text: str, metadata: Optional[Dict[str, Any]] = None
) -> List[float]:
"""Generate an embedding for the given text and metadata.
Args:
text: Text to generate embedding for
metadata: Optional metadata to include in embedding
Returns:
List[float]: Vector embedding of the text
Raises:
ValueError: If text is empty or invalid
"""
) -> Any:
"""Generate an embedding for the given text and metadata."""
pass
@abstractmethod
def _initialize_app(self) -> None:
"""Initialize the vector db.
Raises:
OSError: If vector db initialization fails
"""
def _initialize_app(self):
"""Initialize the vector db."""
pass
def setup_config(self, config: Dict[str, Any]):

View File

@@ -1,13 +1,11 @@
import json
import os
import sqlite3
from pathlib import Path
from typing import Any, Dict, List, Optional
from crewai.task import Task
from crewai.utilities import Printer
from crewai.utilities.crew_json_encoder import CrewJSONEncoder
from crewai.utilities.paths import get_default_storage_path
from crewai.utilities.paths import db_storage_path
class KickoffTaskOutputsSQLiteStorage:
@@ -15,26 +13,10 @@ class KickoffTaskOutputsSQLiteStorage:
An updated SQLite storage class for kickoff task outputs storage.
"""
def __init__(self, storage_path: Optional[Path] = None) -> None:
"""Initialize kickoff task outputs storage.
Args:
storage_path: Optional custom path for storage location
Raises:
PermissionError: If storage path is not writable
OSError: If storage path cannot be created
"""
self.storage_path = storage_path if storage_path else get_default_storage_path('kickoff')
# Validate storage path
try:
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
if not os.access(self.storage_path.parent, os.W_OK):
raise PermissionError(f"No write permission for storage path: {self.storage_path}")
except OSError as e:
raise OSError(f"Failed to initialize storage path: {str(e)}")
def __init__(
self, db_path: str = f"{db_storage_path()}/latest_kickoff_task_outputs.db"
) -> None:
self.db_path = db_path
self._printer: Printer = Printer()
self._initialize_db()
@@ -43,7 +25,7 @@ class KickoffTaskOutputsSQLiteStorage:
Initializes the SQLite database and creates LTM table
"""
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"""
@@ -73,21 +55,9 @@ class KickoffTaskOutputsSQLiteStorage:
task_index: int,
was_replayed: bool = False,
inputs: Dict[str, Any] = {},
) -> None:
"""Add a task output to storage.
Args:
task: The task whose output is being stored
output: The output data from the task
task_index: Index of this task in the sequence
was_replayed: Whether this was from a replay
inputs: Optional input data that led to this output
Raises:
sqlite3.Error: If there is an error saving to database
"""
):
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"""
@@ -120,7 +90,7 @@ class KickoffTaskOutputsSQLiteStorage:
Updates an existing row in the latest_kickoff_task_outputs table based on task_index.
"""
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
fields = []
@@ -149,7 +119,7 @@ class KickoffTaskOutputsSQLiteStorage:
def load(self) -> Optional[List[Dict[str, Any]]]:
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT *
@@ -185,7 +155,7 @@ class KickoffTaskOutputsSQLiteStorage:
Deletes all rows from the latest_kickoff_task_outputs table.
"""
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM latest_kickoff_task_outputs")
conn.commit()

View File

@@ -1,11 +1,9 @@
import json
import os
import sqlite3
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from crewai.utilities import Printer
from crewai.utilities.paths import get_default_storage_path
from crewai.utilities.paths import db_storage_path
class LTMSQLiteStorage:
@@ -13,26 +11,10 @@ class LTMSQLiteStorage:
An updated SQLite storage class for LTM data storage.
"""
def __init__(self, storage_path: Optional[Path] = None) -> None:
"""Initialize LTM SQLite storage.
Args:
storage_path: Optional custom path for storage location
Raises:
PermissionError: If storage path is not writable
OSError: If storage path cannot be created
"""
self.storage_path = storage_path if storage_path else get_default_storage_path('ltm')
# Validate storage path
try:
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
if not os.access(self.storage_path.parent, os.W_OK):
raise PermissionError(f"No write permission for storage path: {self.storage_path}")
except OSError as e:
raise OSError(f"Failed to initialize storage path: {str(e)}")
def __init__(
self, db_path: str = f"{db_storage_path()}/long_term_memory_storage.db"
) -> None:
self.db_path = db_path
self._printer: Printer = Printer()
self._initialize_db()
@@ -41,7 +23,7 @@ class LTMSQLiteStorage:
Initializes the SQLite database and creates LTM table
"""
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"""
@@ -69,20 +51,9 @@ class LTMSQLiteStorage:
datetime: str,
score: Union[int, float],
) -> None:
"""Save a memory entry to long-term memory.
Args:
task_description: Description of the task this memory relates to
metadata: Additional data to store with the memory
datetime: Timestamp for when this memory was created
score: Relevance score for this memory (higher is more relevant)
Raises:
sqlite3.Error: If there is an error saving to the database
"""
"""Saves data to the LTM table with error handling."""
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"""
@@ -103,7 +74,7 @@ class LTMSQLiteStorage:
) -> Optional[List[Dict[str, Any]]]:
"""Queries the LTM table by task description with error handling."""
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
f"""
@@ -138,7 +109,7 @@ class LTMSQLiteStorage:
) -> None:
"""Resets the LTM table with error handling."""
try:
with sqlite3.connect(str(self.storage_path)) as conn:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM long_term_memories")
conn.commit()

View File

@@ -19,7 +19,7 @@ class Mem0Storage(Storage):
self.memory_type = type
self.crew = crew
self.memory_config = crew.memory_config if crew else None
self.memory_config = crew.memory_config
# User ID is required for user memory type "user" since it's used as a unique identifier for the user.
user_id = self._get_user_id()
@@ -27,10 +27,9 @@ class Mem0Storage(Storage):
raise ValueError("User ID is required for user memory type")
# API key in memory config overrides the environment variable
if self.memory_config and self.memory_config.get("config"):
mem0_api_key = self.memory_config.get("config").get("api_key")
else:
mem0_api_key = os.getenv("MEM0_API_KEY")
mem0_api_key = self.memory_config.get("config", {}).get("api_key") or os.getenv(
"MEM0_API_KEY"
)
self.memory = MemoryClient(api_key=mem0_api_key)
def _sanitize_role(self, role: str) -> str:

View File

@@ -11,6 +11,7 @@ from chromadb.api import ClientAPI
from crewai.memory.storage.base_rag_storage import BaseRAGStorage
from crewai.utilities import EmbeddingConfigurator
from crewai.utilities.constants import MAX_FILE_NAME_LENGTH
from crewai.utilities.paths import db_storage_path
@contextlib.contextmanager
@@ -39,15 +40,9 @@ class RAGStorage(BaseRAGStorage):
app: ClientAPI | None = None
def __init__(
self,
type,
storage_path=None,
allow_reset=True,
embedder_config=None,
crew=None,
path=None,
self, type, allow_reset=True, embedder_config=None, crew=None, path=None
):
super().__init__(type, storage_path, allow_reset, embedder_config, crew)
super().__init__(type, allow_reset, embedder_config, crew)
agents = crew.agents if crew else []
agents = [self._sanitize_role(agent.role) for agent in agents]
agents = "_".join(agents)
@@ -95,7 +90,7 @@ class RAGStorage(BaseRAGStorage):
"""
Ensures file name does not exceed max allowed by OS
"""
base_path = f"{self.storage_path}/{type}"
base_path = f"{db_storage_path()}/{type}"
if len(file_name) > MAX_FILE_NAME_LENGTH:
logging.warning(
@@ -157,7 +152,7 @@ class RAGStorage(BaseRAGStorage):
try:
if self.app:
self.app.reset()
shutil.rmtree(f"{self.storage_path}/{self.type}")
shutil.rmtree(f"{db_storage_path()}/{self.type}")
self.app = None
self.collection = None
except Exception as e:

View File

@@ -66,6 +66,7 @@ def cache_handler(func):
def crew(func) -> Callable[..., Crew]:
@wraps(func)
def wrapper(self, *args, **kwargs) -> Crew:
instantiated_tasks = []

View File

@@ -216,5 +216,5 @@ def CrewBase(cls: T) -> T:
# Include base class (qual)name in the wrapper class (qual)name.
WrappedClass.__name__ = CrewBase.__name__ + "(" + cls.__name__ + ")"
WrappedClass.__qualname__ = CrewBase.__qualname__ + "(" + cls.__name__ + ")"
return cast(T, WrappedClass)

View File

@@ -373,9 +373,7 @@ class Task(BaseModel):
content = (
json_output
if json_output
else pydantic_output.model_dump_json()
if pydantic_output
else result
else pydantic_output.model_dump_json() if pydantic_output else result
)
self._save_file(content)

View File

@@ -27,7 +27,7 @@ class EmbeddingConfigurator:
if embedder_config is None:
return self._create_default_embedding_function()
provider = embedder_config.get("provider", "")
provider = embedder_config.get("provider")
config = embedder_config.get("config", {})
model_name = config.get("model")
@@ -38,13 +38,12 @@ class EmbeddingConfigurator:
except Exception as e:
raise ValueError(f"Invalid custom embedding function: {str(e)}")
embedding_function = self.embedding_functions.get(provider, None)
if not embedding_function:
if provider not in self.embedding_functions:
raise Exception(
f"Unsupported embedding provider: {provider}, supported providers: {list(self.embedding_functions.keys())}"
)
return embedding_function(config, model_name)
return self.embedding_functions[provider](config, model_name)
@staticmethod
def _create_default_embedding_function():

View File

@@ -1,4 +1,6 @@
import logging
from collections import defaultdict
from typing import Optional, Union
from pydantic import BaseModel, Field
from rich.box import HEAVY_EDGE
@@ -6,11 +8,22 @@ from rich.console import Console
from rich.table import Table
from crewai.agent import Agent
from crewai.llm import LLM
from crewai.task import Task
from crewai.tasks.task_output import TaskOutput
from crewai.telemetry import Telemetry
class CrewEvaluationError(Exception):
"""Raised when there is an error during crew evaluation."""
pass
# Default values for evaluation metrics
DEFAULT_TASK_SCORE = 9.0
DEFAULT_EXECUTION_TIME = 60 # seconds
class TaskEvaluationPydanticOutput(BaseModel):
quality: float = Field(
description="A score from 1 to 10 evaluating on completion, quality, and overall performance from the task_description and task_expected_output to the actual Task Output."
@@ -32,9 +45,27 @@ class CrewEvaluator:
run_execution_times: defaultdict = defaultdict(list)
iteration: int = 0
def __init__(self, crew, openai_model_name: str):
def __init__(self, crew, llm: Optional[Union[str, LLM]] = None):
"""Initialize CrewEvaluator.
Args:
crew: The crew to evaluate
llm: LLM instance or model name for evaluation
"""
self.crew = crew
self.openai_model_name = openai_model_name
logging.info(f"Initializing CrewEvaluator with LLM: {llm}")
# Initialize tasks_scores with default values to avoid division by zero
self.tasks_scores = defaultdict(list)
for i in range(1, len(crew.tasks) + 1):
self.tasks_scores[i] = [DEFAULT_TASK_SCORE]
# Initialize run_execution_times with default values
self.run_execution_times = defaultdict(list)
for i in range(1, len(crew.tasks) + 1):
self.run_execution_times[i] = [DEFAULT_EXECUTION_TIME]
self.llm = llm if isinstance(llm, LLM) else (
LLM(model=llm) if isinstance(llm, str) else None
)
self._telemetry = Telemetry()
self._setup_for_evaluating()
@@ -51,7 +82,7 @@ class CrewEvaluator:
),
backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
verbose=False,
llm=self.openai_model_name,
llm=self.llm,
)
def _evaluation_task(
@@ -157,35 +188,57 @@ class CrewEvaluator:
console.print(table)
def evaluate(self, task_output: TaskOutput):
"""Evaluates the performance of the agents in the crew based on the tasks they have performed."""
current_task = None
for task in self.crew.tasks:
if task.description == task_output.description:
current_task = task
break
"""Evaluates the performance of the agents in the crew based on the tasks they have performed.
Args:
task_output: The output from the task execution to evaluate
Raises:
CrewEvaluationError: If evaluation fails or produces unexpected results
ValueError: If required inputs are missing or invalid
"""
try:
# Find the matching task
current_task = None
for task in self.crew.tasks:
if task.description == task_output.description:
current_task = task
break
if not current_task or not task_output:
raise ValueError(
"Task to evaluate and task output are required for evaluation"
if not current_task or not task_output:
raise ValueError(
"Task to evaluate and task output are required for evaluation"
)
# Create and execute evaluation task
evaluator_agent = self._evaluator_agent()
evaluation_task = self._evaluation_task(
evaluator_agent, current_task, task_output.raw
)
evaluator_agent = self._evaluator_agent()
evaluation_task = self._evaluation_task(
evaluator_agent, current_task, task_output.raw
)
logging.info(f"Evaluating task: {current_task.description}")
evaluation_result = evaluation_task.execute_sync()
evaluation_result = evaluation_task.execute_sync()
# Process evaluation results
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
self._test_result_span = self._telemetry.individual_test_result_span(
self.crew,
evaluation_result.pydantic.quality,
current_task._execution_time,
str(self.llm.model if self.llm else None),
)
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
self.run_execution_times[self.iteration].append(
current_task._execution_time
)
logging.info(f"Task evaluation completed with score: {evaluation_result.pydantic.quality}")
else:
raise CrewEvaluationError("Evaluation result is not in the expected format")
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
self._test_result_span = self._telemetry.individual_test_result_span(
self.crew,
evaluation_result.pydantic.quality,
current_task._execution_time,
self.openai_model_name,
)
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
self.run_execution_times[self.iteration].append(
current_task._execution_time
)
else:
raise ValueError("Evaluation result is not in the expected format")
except ValueError as e:
logging.error(f"Invalid input for task evaluation: {e}")
raise
except Exception as e:
logging.error(f"Error during task evaluation: {e}")
raise CrewEvaluationError(f"Failed to evaluate task: {e}")

View File

@@ -22,26 +22,3 @@ def get_project_directory_name():
cwd = Path.cwd()
project_directory_name = cwd.name
return project_directory_name
def get_default_storage_path(storage_type: str) -> Path:
"""Returns the default storage path for a given storage type.
Args:
storage_type: Type of storage ('ltm', 'kickoff', 'rag')
Returns:
Path: Default storage path for the specified type
Raises:
ValueError: If storage_type is not recognized
"""
base_path = db_storage_path()
if storage_type == 'ltm':
return base_path / 'latest_long_term_memories.db'
elif storage_type == 'kickoff':
return base_path / 'latest_kickoff_task_outputs.db'
elif storage_type == 'rag':
return base_path
else:
raise ValueError(f"Unknown storage type: {storage_type}")

View File

@@ -1,4 +1,87 @@
interactions:
- request:
body: !!binary |
CqcXCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkS/hYKEgoQY3Jld2FpLnRl
bGVtZXRyeRJ5ChBuJJtOdNaB05mOW/p3915eEgj2tkAd3rZcASoQVG9vbCBVc2FnZSBFcnJvcjAB
OYa7/URvKBUYQUpcFEVvKBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoPCgNsbG0SCAoG
Z3B0LTRvegIYAYUBAAEAABLJBwoQifhX01E5i+5laGdALAlZBBIIBuGM1aN+OPgqDENyZXcgQ3Jl
YXRlZDABORVGruBvKBUYQaipwOBvKBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoaCg5w
eXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19rZXkSIgogN2U2NjA4OTg5ODU5YTY3ZWVj
ODhlZWY3ZmNlODUyMjVKMQoHY3Jld19pZBImCiRiOThiNWEwMC01YTI1LTQxMDctYjQwNS1hYmYz
MjBhOGYzYThKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVlbnRpYWxKEQoLY3Jld19tZW1vcnkSAhAA
ShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVjcmV3X251bWJlcl9vZl9hZ2VudHMSAhgB
SuQCCgtjcmV3X2FnZW50cxLUAgrRAlt7ImtleSI6ICIyMmFjZDYxMWU0NGVmNWZhYzA1YjUzM2Q3
NWU4ODkzYiIsICJpZCI6ICJkNWIyMzM1YS0yMmIyLTQyZWEtYmYwNS03OTc3NmU3MmYzOTIiLCAi
cm9sZSI6ICJEYXRhIFNjaWVudGlzdCIsICJ2ZXJib3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAy
MCwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJn
cHQtNG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4
ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFsi
Z2V0IGdyZWV0aW5ncyJdfV1KkgIKCmNyZXdfdGFza3MSgwIKgAJbeyJrZXkiOiAiYTI3N2IzNGIy
YzE0NmYwYzU2YzVlMTM1NmU4ZjhhNTciLCAiaWQiOiAiMjJiZWMyMzEtY2QyMS00YzU4LTgyN2Ut
MDU4MWE4ZjBjMTExIiwgImFzeW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6
IGZhbHNlLCAiYWdlbnRfcm9sZSI6ICJEYXRhIFNjaWVudGlzdCIsICJhZ2VudF9rZXkiOiAiMjJh
Y2Q2MTFlNDRlZjVmYWMwNWI1MzNkNzVlODg5M2IiLCAidG9vbHNfbmFtZXMiOiBbImdldCBncmVl
dGluZ3MiXX1degIYAYUBAAEAABKOAgoQ5WYoxRtTyPjge4BduhL0rRIIv2U6rvWALfwqDFRhc2sg
Q3JlYXRlZDABOX068uBvKBUYQZkv8+BvKBUYSi4KCGNyZXdfa2V5EiIKIDdlNjYwODk4OTg1OWE2
N2VlYzg4ZWVmN2ZjZTg1MjI1SjEKB2NyZXdfaWQSJgokYjk4YjVhMDAtNWEyNS00MTA3LWI0MDUt
YWJmMzIwYThmM2E4Si4KCHRhc2tfa2V5EiIKIGEyNzdiMzRiMmMxNDZmMGM1NmM1ZTEzNTZlOGY4
YTU3SjEKB3Rhc2tfaWQSJgokMjJiZWMyMzEtY2QyMS00YzU4LTgyN2UtMDU4MWE4ZjBjMTExegIY
AYUBAAEAABKQAQoQXyeDtJDFnyp2Fjk9YEGTpxIIaNE7gbhPNYcqClRvb2wgVXNhZ2UwATkaXTvj
bygVGEGvx0rjbygVGEoaCg5jcmV3YWlfdmVyc2lvbhIICgYwLjg2LjBKHAoJdG9vbF9uYW1lEg8K
DUdldCBHcmVldGluZ3NKDgoIYXR0ZW1wdHMSAhgBegIYAYUBAAEAABLVBwoQMWfznt0qwauEzl7T
UOQxRBII9q+pUS5EdLAqDENyZXcgQ3JlYXRlZDABORONPORvKBUYQSAoS+RvKBUYShoKDmNyZXdh
aV92ZXJzaW9uEggKBjAuODYuMEoaCg5weXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19r
ZXkSIgogYzMwNzYwMDkzMjY3NjE0NDRkNTdjNzFkMWRhM2YyN2NKMQoHY3Jld19pZBImCiQ3OTQw
MTkyNS1iOGU5LTQ3MDgtODUzMC00NDhhZmEzYmY4YjBKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVl
bnRpYWxKEQoLY3Jld19tZW1vcnkSAhAAShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVj
cmV3X251bWJlcl9vZl9hZ2VudHMSAhgBSuoCCgtjcmV3X2FnZW50cxLaAgrXAlt7ImtleSI6ICI5
OGYzYjFkNDdjZTk2OWNmMDU3NzI3Yjc4NDE0MjVjZCIsICJpZCI6ICI5OTJkZjYyZi1kY2FiLTQy
OTUtOTIwNi05MDBkNDExNGIxZTkiLCAicm9sZSI6ICJGcmllbmRseSBOZWlnaGJvciIsICJ2ZXJi
b3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAyMCwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25f
Y2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJncHQtNG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJs
ZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9s
aW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFsiZGVjaWRlIGdyZWV0aW5ncyJdfV1KmAIKCmNyZXdf
dGFza3MSiQIKhgJbeyJrZXkiOiAiODBkN2JjZDQ5MDk5MjkwMDgzODMyZjBlOTgzMzgwZGYiLCAi
aWQiOiAiMmZmNjE5N2UtYmEyNy00YjczLWI0YTctNGZhMDQ4ZTYyYjQ3IiwgImFzeW5jX2V4ZWN1
dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdlbnRfcm9sZSI6ICJGcmll
bmRseSBOZWlnaGJvciIsICJhZ2VudF9rZXkiOiAiOThmM2IxZDQ3Y2U5NjljZjA1NzcyN2I3ODQx
NDI1Y2QiLCAidG9vbHNfbmFtZXMiOiBbImRlY2lkZSBncmVldGluZ3MiXX1degIYAYUBAAEAABKO
AgoQnjTp5boK7/+DQxztYIpqihIIgGnMUkBtzHEqDFRhc2sgQ3JlYXRlZDABOcpYcuRvKBUYQalE
c+RvKBUYSi4KCGNyZXdfa2V5EiIKIGMzMDc2MDA5MzI2NzYxNDQ0ZDU3YzcxZDFkYTNmMjdjSjEK
B2NyZXdfaWQSJgokNzk0MDE5MjUtYjhlOS00NzA4LTg1MzAtNDQ4YWZhM2JmOGIwSi4KCHRhc2tf
a2V5EiIKIDgwZDdiY2Q0OTA5OTI5MDA4MzgzMmYwZTk4MzM4MGRmSjEKB3Rhc2tfaWQSJgokMmZm
NjE5N2UtYmEyNy00YjczLWI0YTctNGZhMDQ4ZTYyYjQ3egIYAYUBAAEAABKTAQoQ26H9pLUgswDN
p9XhJwwL6BIIx3bw7mAvPYwqClRvb2wgVXNhZ2UwATmy7NPlbygVGEEvb+HlbygVGEoaCg5jcmV3
YWlfdmVyc2lvbhIICgYwLjg2LjBKHwoJdG9vbF9uYW1lEhIKEERlY2lkZSBHcmVldGluZ3NKDgoI
YXR0ZW1wdHMSAhgBegIYAYUBAAEAAA==
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '2986'
Content-Type:
- application/x-protobuf
User-Agent:
- OTel-OTLP-Exporter-Python/1.27.0
method: POST
uri: https://telemetry.crewai.com:4319/v1/traces
response:
body:
string: "\n\0"
headers:
Content-Length:
- '2'
Content-Type:
- application/x-protobuf
Date:
- Fri, 27 Dec 2024 22:14:53 GMT
status:
code: 200
message: OK
- request:
body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour
personal goal is: test goal\nTo give my best complete final answer to the task
@@ -22,18 +105,20 @@ interactions:
- '824'
content-type:
- application/json
cookie:
- _cfuvid=ePJSDFdHag2D8lj21_ijAMWjoA6xfnPNxN4uekvC728-1727226247743-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- arm64
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
- Linux
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
@@ -47,8 +132,8 @@ interactions:
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AaqIIsTxhvf75xvuu7gQScIlRSKbW\",\n \"object\":
\"chat.completion\",\n \"created\": 1733344190,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
content: "{\n \"id\": \"chatcmpl-AjCtZLLrWi8ZASpP9bz6HaCV7xBIn\",\n \"object\":
\"chat.completion\",\n \"created\": 1735337693,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"I now can give a great answer \\nFinal
Answer: Hi\",\n \"refusal\": null\n },\n \"logprobs\": null,\n
@@ -57,12 +142,12 @@ interactions:
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_0705bf87c0\"\n}\n"
\"fp_0aa8d3e20b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ece8cfc3b1f4532-ATL
- 8f8caa83deca756b-SEA
Connection:
- keep-alive
Content-Encoding:
@@ -70,14 +155,14 @@ interactions:
Content-Type:
- application/json
Date:
- Wed, 04 Dec 2024 20:29:50 GMT
- Fri, 27 Dec 2024 22:14:53 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
path=/; expires=Wed, 04-Dec-24 20:59:50 GMT; domain=.api.openai.com; HttpOnly;
- __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw;
path=/; expires=Fri, 27-Dec-24 22:44:53 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000;
- _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Transfer-Encoding:
- chunked
@@ -90,7 +175,7 @@ interactions:
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '313'
- '404'
openai-version:
- '2020-10-01'
strict-transport-security:
@@ -108,7 +193,7 @@ interactions:
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_9fd9a8ee688045dcf7ac5f6fdf689372
- req_6ac84634bff9193743c4b0911c09b4a6
http_version: HTTP/1.1
status_code: 200
- request:
@@ -131,20 +216,20 @@ interactions:
content-type:
- application/json
cookie:
- __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
_cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
- _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
__cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- arm64
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
- Linux
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
@@ -158,8 +243,8 @@ interactions:
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AaqIIaQlLyoyPmk909PvAIfA2TmJL\",\n \"object\":
\"chat.completion\",\n \"created\": 1733344190,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
content: "{\n \"id\": \"chatcmpl-AjCtZNlWdrrPZhq0MJDqd16sMuQEJ\",\n \"object\":
\"chat.completion\",\n \"created\": 1735337693,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"True\",\n \"refusal\": null\n
\ },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n
@@ -168,12 +253,12 @@ interactions:
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_0705bf87c0\"\n}\n"
\"fp_0aa8d3e20b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ece8d060b5e4532-ATL
- 8f8caa87094f756b-SEA
Connection:
- keep-alive
Content-Encoding:
@@ -181,7 +266,7 @@ interactions:
Content-Type:
- application/json
Date:
- Wed, 04 Dec 2024 20:29:50 GMT
- Fri, 27 Dec 2024 22:14:53 GMT
Server:
- cloudflare
Transfer-Encoding:
@@ -195,7 +280,7 @@ interactions:
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '375'
- '156'
openai-version:
- '2020-10-01'
strict-transport-security:
@@ -213,7 +298,7 @@ interactions:
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_be7cb475e0859a82c37ee3f2871ea5ea
- req_ec74bef2a9ef7b2144c03fd7f7bbeab0
http_version: HTTP/1.1
status_code: 200
- request:
@@ -242,20 +327,20 @@ interactions:
content-type:
- application/json
cookie:
- __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
_cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
- _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
__cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- arm64
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
- Linux
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
@@ -269,22 +354,23 @@ interactions:
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AaqIJAAxpVfUOdrsgYKHwfRlHv4RS\",\n \"object\":
\"chat.completion\",\n \"created\": 1733344191,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
content: "{\n \"id\": \"chatcmpl-AjCtZGv4f3h7GDdhyOy9G0sB1lRgC\",\n \"object\":
\"chat.completion\",\n \"created\": 1735337693,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"Thought: I now can give a great answer
\ \\nFinal Answer: Hello\",\n \"refusal\": null\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
188,\n \"completion_tokens\": 14,\n \"total_tokens\": 202,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
\"assistant\",\n \"content\": \"Thought: I understand the feedback and
will adjust my response accordingly. \\nFinal Answer: Hello\",\n \"refusal\":
null\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 188,\n \"completion_tokens\":
18,\n \"total_tokens\": 206,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_0705bf87c0\"\n}\n"
\"fp_0aa8d3e20b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ece8d090fc34532-ATL
- 8f8caa88cac4756b-SEA
Connection:
- keep-alive
Content-Encoding:
@@ -292,7 +378,7 @@ interactions:
Content-Type:
- application/json
Date:
- Wed, 04 Dec 2024 20:29:51 GMT
- Fri, 27 Dec 2024 22:14:54 GMT
Server:
- cloudflare
Transfer-Encoding:
@@ -306,7 +392,7 @@ interactions:
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '484'
- '358'
openai-version:
- '2020-10-01'
strict-transport-security:
@@ -324,7 +410,7 @@ interactions:
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_5bf4a565ad6c2567a1ed204ecac89134
- req_ae1ab6b206d28ded6fee3c83ed0c2ab7
http_version: HTTP/1.1
status_code: 200
- request:
@@ -346,20 +432,20 @@ interactions:
content-type:
- application/json
cookie:
- __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
_cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
- _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
__cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- arm64
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
- Linux
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
@@ -373,8 +459,8 @@ interactions:
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AaqIJqyG8vl9mxj2qDPZgaxyNLLIq\",\n \"object\":
\"chat.completion\",\n \"created\": 1733344191,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
content: "{\n \"id\": \"chatcmpl-AjCtaiHL4TY8Dssk0j2miqmjrzquy\",\n \"object\":
\"chat.completion\",\n \"created\": 1735337694,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"False\",\n \"refusal\": null\n
\ },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n
@@ -383,12 +469,12 @@ interactions:
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_0705bf87c0\"\n}\n"
\"fp_0aa8d3e20b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ece8d0cfdeb4532-ATL
- 8f8caa8bdd26756b-SEA
Connection:
- keep-alive
Content-Encoding:
@@ -396,7 +482,7 @@ interactions:
Content-Type:
- application/json
Date:
- Wed, 04 Dec 2024 20:29:51 GMT
- Fri, 27 Dec 2024 22:14:54 GMT
Server:
- cloudflare
Transfer-Encoding:
@@ -410,7 +496,7 @@ interactions:
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '341'
- '184'
openai-version:
- '2020-10-01'
strict-transport-security:
@@ -428,7 +514,7 @@ interactions:
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_5554bade8ceda00cf364b76a51b708ff
- req_652891f79c1104a7a8436275d78a69f1
http_version: HTTP/1.1
status_code: 200
version: 1

View File

@@ -28,10 +28,9 @@ def test_create_success(mock_subprocess):
with in_temp_dir():
tool_command = ToolCommand()
with (
patch.object(tool_command, "login") as mock_login,
patch("sys.stdout", new=StringIO()) as fake_out,
):
with patch.object(tool_command, "login") as mock_login, patch(
"sys.stdout", new=StringIO()
) as fake_out:
tool_command.create("test-tool")
output = fake_out.getvalue()
@@ -83,7 +82,7 @@ def test_install_success(mock_get, mock_subprocess_run):
capture_output=False,
text=True,
check=True,
env=unittest.mock.ANY,
env=unittest.mock.ANY
)
assert "Successfully installed sample-tool" in output

View File

@@ -14,6 +14,9 @@ from crewai.agent import Agent
from crewai.agents.cache import CacheHandler
from crewai.crew import Crew
from crewai.crews.crew_output import CrewOutput
from collections import defaultdict
from crewai.llm import LLM
from crewai.utilities.evaluators.crew_evaluator_handler import CrewEvaluator
from crewai.memory.contextual.contextual_memory import ContextualMemory
from crewai.process import Process
from crewai.task import Task
@@ -1123,7 +1126,7 @@ def test_kickoff_for_each_empty_input():
assert results == []
@pytest.mark.vcr(filter_headers=["authorization"])
@pytest.mark.vcr(filter_headeruvs=["authorization"])
def test_kickoff_for_each_invalid_input():
"""Tests if kickoff_for_each raises TypeError for invalid input types."""
@@ -2814,8 +2817,8 @@ def test_conditional_should_execute():
@mock.patch("crewai.crew.Crew.kickoff")
def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
task = Task(
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
expected_output="5 bullet points with a paragraph for each idea.",
description="Test task description",
expected_output="Test output",
agent=researcher,
)
@@ -2837,7 +2840,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
crew_evaluator.assert_has_calls(
[
mock.call(crew, "gpt-4o-mini"),
mock.call(crew, mock.ANY),
mock.call().set_iteration(1),
mock.call().set_iteration(2),
mock.call().print_crew_evaluation_result(),
@@ -2845,6 +2848,73 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
)
@mock.patch("crewai.crew.CrewEvaluator")
@mock.patch("crewai.crew.Crew.copy")
@mock.patch("crewai.crew.Crew.kickoff")
def test_crew_testing_with_invalid_llm(kickoff_mock, copy_mock, crew_evaluator_mock):
"""Test that Crew.test() properly validates LLM input."""
task = Task(
description="Test task",
expected_output="Test output",
agent=researcher,
)
crew = Crew(agents=[researcher], tasks=[task])
with pytest.raises(ValueError, match="llm parameter must be either"):
crew.test(2, llm=123) # Invalid type
@mock.patch("crewai.crew.CrewEvaluator")
@mock.patch("crewai.crew.Crew.copy")
@mock.patch("crewai.crew.Crew.kickoff")
def test_crew_testing_with_custom_llm(kickoff_mock, copy_mock, crew_evaluator_mock):
"""Test that Crew.test() works with both string and LLM instance parameters."""
task = Task(
description="Test task",
expected_output="Test output",
agent=researcher,
)
crew = Crew(
agents=[researcher],
tasks=[task],
)
# Create a mock for the copied crew
copy_mock.return_value = crew
# Create a mock evaluator
mock_evaluator = mock.MagicMock()
mock_evaluator.print_crew_evaluation_result = mock.MagicMock()
mock_evaluator.set_iteration = mock.MagicMock()
# Mock the CrewEvaluator class
crew_evaluator_mock.return_value = mock_evaluator
# Test with string model name
crew.test(2, llm="gpt-4o-mini")
crew_evaluator_mock.assert_called_with(crew, "gpt-4o-mini")
mock_evaluator.set_iteration.assert_has_calls([mock.call(1), mock.call(2)])
mock_evaluator.print_crew_evaluation_result.assert_called_once()
crew_evaluator_mock.reset_mock()
mock_evaluator.reset_mock()
# Test with LLM instance
custom_llm = LLM(model="gpt-4o-mini")
crew.test(2, llm=custom_llm)
crew_evaluator_mock.assert_called_with(crew, custom_llm)
mock_evaluator.set_iteration.assert_has_calls([mock.call(1), mock.call(2)])
mock_evaluator.print_crew_evaluation_result.assert_called_once()
crew_evaluator_mock.reset_mock()
mock_evaluator.reset_mock()
# Test backward compatibility
crew.test(2, openai_model_name="gpt-4o-mini")
crew_evaluator_mock.assert_called_with(crew, "gpt-4o-mini")
mock_evaluator.set_iteration.assert_has_calls([mock.call(1), mock.call(2)])
mock_evaluator.print_crew_evaluation_result.assert_called_once()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_hierarchical_verbose_manager_agent():
task = Task(
@@ -3125,4 +3195,4 @@ def test_multimodal_agent_live_image_analysis():
# Verify we got a meaningful response
assert isinstance(result.raw, str)
assert len(result.raw) > 100 # Expecting a detailed analysis
assert "error" not in result.raw.lower() # No error messages in response
assert "error" not in result.raw.lower() # No error messages in response

View File

@@ -1,83 +0,0 @@
import os
import tempfile
from pathlib import Path
import pytest
from unittest.mock import patch
from crewai.memory.storage.ltm_sqlite_storage import LTMSQLiteStorage
from crewai.memory.storage.kickoff_task_outputs_storage import KickoffTaskOutputsSQLiteStorage
from crewai.memory.storage.base_rag_storage import BaseRAGStorage
from crewai.utilities.paths import get_default_storage_path
class MockRAGStorage(BaseRAGStorage):
"""Mock implementation of BaseRAGStorage for testing."""
def _sanitize_role(self, role: str) -> str:
return role.lower()
def save(self, value, metadata):
pass
def search(self, query, limit=3, filter=None, score_threshold=0.35):
return []
def reset(self):
pass
def _generate_embedding(self, text, metadata=None):
return []
def _initialize_app(self):
pass
def test_default_storage_paths():
"""Test that default storage paths are created correctly."""
ltm_path = get_default_storage_path('ltm')
kickoff_path = get_default_storage_path('kickoff')
rag_path = get_default_storage_path('rag')
assert str(ltm_path).endswith('latest_long_term_memories.db')
assert str(kickoff_path).endswith('latest_kickoff_task_outputs.db')
assert isinstance(rag_path, Path)
def test_custom_storage_paths():
"""Test that custom storage paths are respected."""
with tempfile.TemporaryDirectory() as temp_dir:
custom_path = Path(temp_dir) / 'custom.db'
ltm = LTMSQLiteStorage(storage_path=custom_path)
assert ltm.storage_path == custom_path
kickoff = KickoffTaskOutputsSQLiteStorage(storage_path=custom_path)
assert kickoff.storage_path == custom_path
rag = MockRAGStorage('test', storage_path=custom_path)
assert rag.storage_path == custom_path
def test_directory_creation():
"""Test that storage directories are created automatically."""
with tempfile.TemporaryDirectory() as temp_dir:
test_dir = Path(temp_dir) / 'test_storage'
storage_path = test_dir / 'test.db'
assert not test_dir.exists()
LTMSQLiteStorage(storage_path=storage_path)
assert test_dir.exists()
def test_permission_error():
"""Test that permission errors are handled correctly."""
with tempfile.TemporaryDirectory() as temp_dir:
test_dir = Path(temp_dir) / 'readonly'
test_dir.mkdir()
os.chmod(test_dir, 0o444) # Read-only
storage_path = test_dir / 'test.db'
with pytest.raises((PermissionError, OSError)) as exc_info:
LTMSQLiteStorage(storage_path=storage_path)
# Verify that the error message mentions permission
assert "permission" in str(exc_info.value).lower()
def test_invalid_path():
"""Test that invalid paths raise appropriate errors."""
with pytest.raises(OSError):
# Try to create storage in a non-existent root directory
LTMSQLiteStorage(storage_path=Path('/nonexistent/dir/test.db'))

View File

@@ -23,7 +23,7 @@ class TestCrewEvaluator:
)
crew = Crew(agents=[agent], tasks=[task])
return CrewEvaluator(crew, openai_model_name="gpt-4o-mini")
return CrewEvaluator(crew, llm="gpt-4o-mini")
def test_setup_for_evaluating(self, crew_planner):
crew_planner._setup_for_evaluating()