Merge branch 'main' into fix-knowledgestorage-default-instantiation

refactor: Change storage field to optional and improve error handling when saving documents
2026-01-29 18:18:13 +00:00 · 2024-12-27 21:18:16 -03:00 · 2024-12-27 17:18:33 -03:00 · 2024-12-26 22:27:19 -04:00 · 2024-12-26 21:30:06 -04:00
5 changed files with 16 additions and 105 deletions
--- a/src/crewai/knowledge/knowledge.py
+++ b/src/crewai/knowledge/knowledge.py
@@ -14,13 +14,13 @@ class Knowledge(BaseModel):
    Knowledge is a collection of sources and setup for the vector store to save and query relevant context.
    Args:
        sources: List[BaseKnowledgeSource] = Field(default_factory=list)
-        storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+        storage: Optional[KnowledgeStorage] = Field(default=None)
        embedder_config: Optional[Dict[str, Any]] = None
    """

    sources: List[BaseKnowledgeSource] = Field(default_factory=list)
    model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    embedder_config: Optional[Dict[str, Any]] = None
    collection_name: Optional[str] = None

--- a/src/crewai/knowledge/source/base_file_knowledge_source.py
+++ b/src/crewai/knowledge/source/base_file_knowledge_source.py
@@ -22,7 +22,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
        default_factory=list, description="The path to the file"
    )
    content: Dict[Path, str] = Field(init=False, default_factory=dict)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    safe_file_paths: List[Path] = Field(default_factory=list)

    @field_validator("file_path", "file_paths", mode="before")
@@ -62,7 +62,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):

    def _save_documents(self):
        """Save the documents to the storage."""
-        self.storage.save(self.chunks)
+        if self.storage:
+            self.storage.save(self.chunks)
+        else:
+            raise ValueError("No storage found to save documents.")

    def convert_to_path(self, path: Union[Path, str]) -> Path:
        """Convert a path to a Path object."""
--- a/src/crewai/knowledge/source/base_knowledge_source.py
+++ b/src/crewai/knowledge/source/base_knowledge_source.py
@@ -16,7 +16,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
    chunk_embeddings: List[np.ndarray] = Field(default_factory=list)

    model_config = ConfigDict(arbitrary_types_allowed=True)
-    storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
+    storage: Optional[KnowledgeStorage] = Field(default=None)
    metadata: Dict[str, Any] = Field(default_factory=dict)  # Currently unused
    collection_name: Optional[str] = Field(default=None)

@@ -46,4 +46,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
        Save the documents to the storage.
        This method should be called after the chunks and embeddings are generated.
        """
-        self.storage.save(self.chunks)
+        if self.storage:
+            self.storage.save(self.chunks)
+        else:
+            raise ValueError("No storage found to save documents.")
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -92,43 +92,9 @@ def suppress_warnings():


 class LLM:
-    """
-    A wrapper class for language model interactions using litellm.
-    
-    This class provides a unified interface for interacting with various language models
-    through litellm. It handles model configuration, context window sizing, and callback
-    management.
-    
-    Args:
-        model (str): The identifier for the language model to use. Must be a valid model ID
-            with a provider prefix (e.g., 'openai/gpt-4'). Cannot be a numeric value without
-            a provider prefix.
-        timeout (Optional[Union[float, int]]): The timeout for API calls in seconds.
-        temperature (Optional[float]): Controls randomness in the model's output.
-        top_p (Optional[float]): Controls diversity via nucleus sampling.
-        n (Optional[int]): Number of completions to generate.
-        stop (Optional[Union[str, List[str]]]): Sequences where the model should stop generating.
-        max_completion_tokens (Optional[int]): Maximum number of tokens to generate.
-        max_tokens (Optional[int]): Alias for max_completion_tokens.
-        presence_penalty (Optional[float]): Penalizes repeated tokens.
-        frequency_penalty (Optional[float]): Penalizes frequent tokens.
-        logit_bias (Optional[Dict[int, float]]): Modifies likelihood of specific tokens.
-        response_format (Optional[Dict[str, Any]]): Specifies the format for the model's response.
-        seed (Optional[int]): Seed for deterministic outputs.
-        logprobs (Optional[bool]): Whether to return log probabilities.
-        top_logprobs (Optional[int]): Number of most likely tokens to return probabilities for.
-        base_url (Optional[str]): Base URL for API calls.
-        api_version (Optional[str]): API version to use.
-        api_key (Optional[str]): API key for authentication.
-        callbacks (List[Any]): List of callback functions.
-        **kwargs: Additional keyword arguments to pass to the model.
-    
-    Raises:
-        ValueError: If the model ID is empty, whitespace, or a numeric value without a provider prefix.
-    """
    def __init__(
        self,
-        model: Union[str, Any],
+        model: str,
        timeout: Optional[Union[float, int]] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
@@ -149,16 +115,6 @@ class LLM:
        callbacks: List[Any] = [],
        **kwargs,
    ):
-        # Only validate model ID if it's not None and is a numeric value without a provider prefix
-        if model is not None and (
-            isinstance(model, (int, float)) or 
-            (isinstance(model, str) and model.strip() and model.strip().isdigit())
-        ):
-            raise ValueError(
-                f"Invalid model ID: {model}. Model ID cannot be a numeric value without a provider prefix. "
-                "Please specify a valid model ID with a provider prefix, e.g., 'openai/gpt-4'."
-            )
-        
        self.model = model
        self.timeout = timeout
        self.temperature = temperature
@@ -230,10 +186,7 @@ class LLM:

    def supports_function_calling(self) -> bool:
        try:
-            # Handle None model case
-            if self.model is None:
-                return False
-            params = get_supported_openai_params(model=str(self.model))
+            params = get_supported_openai_params(model=self.model)
            return "response_format" in params
        except Exception as e:
            logging.error(f"Failed to get supported params: {str(e)}")
@@ -241,10 +194,7 @@ class LLM:

    def supports_stop_words(self) -> bool:
        try:
-            # Handle None model case
-            if self.model is None:
-                return False
-            params = get_supported_openai_params(model=str(self.model))
+            params = get_supported_openai_params(model=self.model)
            return "stop" in params
        except Exception as e:
            logging.error(f"Failed to get supported params: {str(e)}")
@@ -258,10 +208,8 @@ class LLM:
        self.context_window_size = int(
            DEFAULT_CONTEXT_WINDOW_SIZE * CONTEXT_WINDOW_USAGE_RATIO
        )
-        # Ensure model is a string before calling startswith
-        model_str = str(self.model) if not isinstance(self.model, str) else self.model
        for key, value in LLM_CONTEXT_WINDOW_SIZES.items():
-            if model_str.startswith(key):
+            if self.model.startswith(key):
                self.context_window_size = int(value * CONTEXT_WINDOW_USAGE_RATIO)
        return self.context_window_size

--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -1,43 +0,0 @@
-import pytest
-
-from crewai.llm import LLM
-
-
-@pytest.mark.parametrize(
-    "invalid_model,error_message",
-    [
-        (3420, "Invalid model ID: 3420. Model ID cannot be a numeric value without a provider prefix."),
-        ("3420", "Invalid model ID: 3420. Model ID cannot be a numeric value without a provider prefix."),
-        (3.14, "Invalid model ID: 3.14. Model ID cannot be a numeric value without a provider prefix."),
-    ],
-)
-def test_invalid_numeric_model_ids(invalid_model, error_message):
-    """Test that numeric model IDs are rejected."""
-    with pytest.raises(ValueError, match=error_message):
-        LLM(model=invalid_model)
-
-
-@pytest.mark.parametrize(
-    "valid_model",
-    [
-        "openai/gpt-4",
-        "gpt-3.5-turbo",
-        "anthropic/claude-2",
-    ],
-)
-def test_valid_model_ids(valid_model):
-    """Test that valid model IDs are accepted."""
-    llm = LLM(model=valid_model)
-    assert llm.model == valid_model
-
-
-def test_empty_model_id():
-    """Test that empty model IDs are rejected."""
-    with pytest.raises(ValueError, match="Invalid model ID: ''. Model ID cannot be empty or whitespace."):
-        LLM(model="")
-
-
-def test_whitespace_model_id():
-    """Test that whitespace model IDs are rejected."""
-    with pytest.raises(ValueError, match="Invalid model ID: '   '. Model ID cannot be empty or whitespace."):
-        LLM(model="   ")
Author	SHA1	Message	Date
João Moura	63028e1b20	Merge branch 'main' into fix-knowledgestorage-default-instantiation	2024-12-27 21:18:16 -03:00
João Moura	81759e8c72	Merge branch 'main' into fix-knowledgestorage-default-instantiation	2024-12-27 17:18:33 -03:00
ericklima-ca	27472ba69e	refactor: Change storage field to optional and improve error handling when saving documents	2024-12-26 22:27:19 -04:00
ericklima-ca	25aa774d8c	fix: Change storage initialization to None for KnowledgeStorage	2024-12-26 21:30:06 -04:00