mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-24 00:08:29 +00:00
Compare commits
7 Commits
0.86.0
...
docs/updat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
906be132f8 | ||
|
|
9a5727b149 | ||
|
|
ef3b93ab93 | ||
|
|
3dbcec9434 | ||
|
|
6930b68484 | ||
|
|
c7c0647dd2 | ||
|
|
7b276e6797 |
@@ -48,7 +48,6 @@ from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSourc
|
||||
content = "Users name is John. He is 30 years old and lives in San Francisco."
|
||||
string_source = StringKnowledgeSource(
|
||||
content=content,
|
||||
metadata={"preference": "personal"}
|
||||
)
|
||||
|
||||
# Create an LLM with a temperature of 0 to ensure deterministic outputs
|
||||
@@ -74,10 +73,7 @@ crew = Crew(
|
||||
tasks=[task],
|
||||
verbose=True,
|
||||
process=Process.sequential,
|
||||
knowledge={
|
||||
"sources": [string_source],
|
||||
"metadata": {"preference": "personal"}
|
||||
}, # Enable knowledge by adding the sources here. You can also add more sources to the sources list.
|
||||
knowledge_sources=[string_source], # Enable knowledge by adding the sources here. You can also add more sources to the sources list.
|
||||
)
|
||||
|
||||
result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"})
|
||||
@@ -85,17 +81,6 @@ result = crew.kickoff(inputs={"question": "What city does John live in and how o
|
||||
|
||||
## Knowledge Configuration
|
||||
|
||||
### Metadata and Filtering
|
||||
|
||||
Knowledge sources support metadata for better organization and filtering. Metadata is used to filter the knowledge sources when querying the knowledge store.
|
||||
|
||||
```python Code
|
||||
knowledge_source = StringKnowledgeSource(
|
||||
content="Users name is John. He is 30 years old and lives in San Francisco.",
|
||||
metadata={"preference": "personal"} # Metadata is used to filter the knowledge sources
|
||||
)
|
||||
```
|
||||
|
||||
### Chunking Configuration
|
||||
|
||||
Control how content is split for processing by setting the chunk size and overlap.
|
||||
@@ -116,21 +101,28 @@ You can also configure the embedder for the knowledge store. This is useful if y
|
||||
...
|
||||
string_source = StringKnowledgeSource(
|
||||
content="Users name is John. He is 30 years old and lives in San Francisco.",
|
||||
metadata={"preference": "personal"}
|
||||
)
|
||||
crew = Crew(
|
||||
...
|
||||
knowledge={
|
||||
"sources": [string_source],
|
||||
"metadata": {"preference": "personal"},
|
||||
"embedder_config": {
|
||||
"provider": "openai", # Default embedder provider; can be "ollama", "gemini", e.t.c.
|
||||
"config": {"model": "text-embedding-3-small"} # Default embedder model; can be "mxbai-embed-large", "nomic-embed-tex", e.t.c.
|
||||
},
|
||||
knowledge_sources=[string_source],
|
||||
embedder={
|
||||
"provider": "openai",
|
||||
"config": {"model": "text-embedding-3-small"},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
## Clearing Knowledge
|
||||
|
||||
If you need to clear the knowledge stored in CrewAI, you can use the `crewai reset-memories` command with the `--knowledge` option.
|
||||
|
||||
```bash Command
|
||||
crewai reset-memories --knowledge
|
||||
```
|
||||
|
||||
This is useful when you've updated your knowledge sources and want to ensure that the agents are using the most recent information.
|
||||
|
||||
|
||||
## Custom Knowledge Sources
|
||||
|
||||
CrewAI allows you to create custom knowledge sources for any type of data by extending the `BaseKnowledgeSource` class. Let's create a practical example that fetches and processes space news articles.
|
||||
@@ -174,12 +166,12 @@ class SpaceNewsKnowledgeSource(BaseKnowledgeSource):
|
||||
formatted = "Space News Articles:\n\n"
|
||||
for article in articles:
|
||||
formatted += f"""
|
||||
Title: {article['title']}
|
||||
Published: {article['published_at']}
|
||||
Summary: {article['summary']}
|
||||
News Site: {article['news_site']}
|
||||
URL: {article['url']}
|
||||
-------------------"""
|
||||
Title: {article['title']}
|
||||
Published: {article['published_at']}
|
||||
Summary: {article['summary']}
|
||||
News Site: {article['news_site']}
|
||||
URL: {article['url']}
|
||||
-------------------"""
|
||||
return formatted
|
||||
|
||||
def add(self) -> None:
|
||||
@@ -189,17 +181,12 @@ URL: {article['url']}
|
||||
chunks = self._chunk_text(text)
|
||||
self.chunks.extend(chunks)
|
||||
|
||||
self.save_documents(metadata={
|
||||
"source": "space_news_api",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"article_count": self.limit
|
||||
})
|
||||
self._save_documents()
|
||||
|
||||
# Create knowledge source
|
||||
recent_news = SpaceNewsKnowledgeSource(
|
||||
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles",
|
||||
limit=10,
|
||||
metadata={"category": "recent_news", "source": "spaceflight_news"}
|
||||
)
|
||||
|
||||
# Create specialized agent
|
||||
@@ -265,7 +252,7 @@ The latest developments in space exploration, based on recent space news article
|
||||
- Implements three key methods:
|
||||
- `load_content()`: Fetches articles from the API
|
||||
- `_format_articles()`: Structures the articles into readable text
|
||||
- `add()`: Processes and stores the content with metadata
|
||||
- `add()`: Processes and stores the content
|
||||
|
||||
2. **Agent Configuration**:
|
||||
- Specialized role as a Space News Analyst
|
||||
@@ -299,14 +286,12 @@ You can customize the API query by modifying the endpoint URL:
|
||||
recent_news = SpaceNewsKnowledgeSource(
|
||||
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles",
|
||||
limit=20, # Increase the number of articles
|
||||
metadata={"category": "recent_news"}
|
||||
)
|
||||
|
||||
# Add search parameters
|
||||
recent_news = SpaceNewsKnowledgeSource(
|
||||
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles?search=NASA", # Search for NASA news
|
||||
limit=10,
|
||||
metadata={"category": "nasa_news"}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -314,16 +299,14 @@ recent_news = SpaceNewsKnowledgeSource(
|
||||
|
||||
<AccordionGroup>
|
||||
<Accordion title="Content Organization">
|
||||
- Use descriptive metadata for better filtering
|
||||
- Keep chunk sizes appropriate for your content type
|
||||
- Consider content overlap for context preservation
|
||||
- Organize related information into separate knowledge sources
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Performance Tips">
|
||||
- Use metadata filtering to narrow search scope
|
||||
- Adjust chunk sizes based on content complexity
|
||||
- Configure appropriate embedding models
|
||||
- Consider using local embedding providers for faster processing
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
</AccordionGroup>
|
||||
|
||||
@@ -57,7 +57,7 @@ This feature is useful for debugging and understanding how agents interact with
|
||||
<Step title="Install AgentOps">
|
||||
Install AgentOps with:
|
||||
```bash
|
||||
pip install crewai[agentops]
|
||||
pip install 'crewai[agentops]'
|
||||
```
|
||||
or
|
||||
```bash
|
||||
|
||||
@@ -8,7 +8,7 @@ from pydantic import Field, InstanceOf, PrivateAttr, model_validator
|
||||
from crewai.agents import CacheHandler
|
||||
from crewai.agents.agent_builder.base_agent import BaseAgent
|
||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||
from crewai.cli.constants import ENV_VARS
|
||||
from crewai.cli.constants import ENV_VARS, LITELLM_PARAMS
|
||||
from crewai.knowledge.knowledge import Knowledge
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.knowledge.utils.knowledge_utils import extract_knowledge_context
|
||||
@@ -181,20 +181,11 @@ class Agent(BaseAgent):
|
||||
if key_name and key_name not in unaccepted_attributes:
|
||||
env_value = os.environ.get(key_name)
|
||||
if env_value:
|
||||
# Map key names containing "API_KEY" to "api_key"
|
||||
key_name = (
|
||||
"api_key" if "API_KEY" in key_name else key_name
|
||||
)
|
||||
# Map key names containing "API_BASE" to "api_base"
|
||||
key_name = (
|
||||
"api_base" if "API_BASE" in key_name else key_name
|
||||
)
|
||||
# Map key names containing "API_VERSION" to "api_version"
|
||||
key_name = (
|
||||
"api_version"
|
||||
if "API_VERSION" in key_name
|
||||
else key_name
|
||||
)
|
||||
key_name = key_name.lower()
|
||||
for pattern in LITELLM_PARAMS:
|
||||
if pattern in key_name:
|
||||
key_name = pattern
|
||||
break
|
||||
llm_params[key_name] = env_value
|
||||
# Check for default values if the environment variable is not set
|
||||
elif env_var.get("default", False):
|
||||
|
||||
@@ -25,6 +25,7 @@ from .update_crew import update_crew
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(pkg_resources.get_distribution("crewai").version)
|
||||
def crewai():
|
||||
"""Top-level command group for crewai."""
|
||||
|
||||
@@ -50,7 +51,10 @@ def create(type, name, provider, skip_provider=False):
|
||||
)
|
||||
def version(tools):
|
||||
"""Show the installed version of crewai."""
|
||||
crewai_version = pkg_resources.get_distribution("crewai").version
|
||||
try:
|
||||
crewai_version = pkg_resources.get_distribution("crewai").version
|
||||
except Exception:
|
||||
crewai_version = "unknown version"
|
||||
click.echo(f"crewai version: {crewai_version}")
|
||||
|
||||
if tools:
|
||||
|
||||
@@ -159,3 +159,6 @@ MODELS = {
|
||||
}
|
||||
|
||||
JSON_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||
|
||||
|
||||
LITELLM_PARAMS = ["api_key", "api_base", "api_version"]
|
||||
|
||||
@@ -12,6 +12,6 @@ reporting_task:
|
||||
Review the context you got and expand each topic into a full section for a report.
|
||||
Make sure the report is detailed and contains any and all relevant information.
|
||||
expected_output: >
|
||||
A fully fledge reports with the mains topics, each with a full section of information.
|
||||
A fully fledged report with the main topics, each with a full section of information.
|
||||
Formatted as markdown without '```'
|
||||
agent: reporting_analyst
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from typing import List, Optional, Dict, Any
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||
from crewai.utilities.constants import DEFAULT_SCORE_THRESHOLD
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false" # removes logging from fastembed
|
||||
|
||||
@@ -46,9 +45,7 @@ class Knowledge(BaseModel):
|
||||
source.storage = self.storage
|
||||
source.add()
|
||||
|
||||
def query(
|
||||
self, query: List[str], limit: int = 3, preference: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
def query(self, query: List[str], limit: int = 3) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query across all knowledge sources to find the most relevant information.
|
||||
Returns the top_k most relevant chunks.
|
||||
@@ -57,8 +54,6 @@ class Knowledge(BaseModel):
|
||||
results = self.storage.search(
|
||||
query,
|
||||
limit,
|
||||
filter={"preference": preference} if preference else None,
|
||||
score_threshold=DEFAULT_SCORE_THRESHOLD,
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Union, List, Dict, Any
|
||||
from typing import Dict, List, Union
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.utilities.logger import Logger
|
||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
|
||||
from crewai.utilities.logger import Logger
|
||||
|
||||
|
||||
class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
|
||||
@@ -49,10 +49,9 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
|
||||
color="red",
|
||||
)
|
||||
|
||||
def save_documents(self, metadata: Dict[str, Any]):
|
||||
def _save_documents(self):
|
||||
"""Save the documents to the storage."""
|
||||
chunk_metadatas = [metadata.copy() for _ in self.chunks]
|
||||
self.storage.save(self.chunks, chunk_metadatas)
|
||||
self.storage.save(self.chunks)
|
||||
|
||||
def convert_to_path(self, path: Union[Path, str]) -> Path:
|
||||
"""Convert a path to a Path object."""
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Dict, Any, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
@@ -17,7 +17,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict) # Currently unused
|
||||
collection_name: Optional[str] = Field(default=None)
|
||||
|
||||
@abstractmethod
|
||||
@@ -41,9 +41,9 @@ class BaseKnowledgeSource(BaseModel, ABC):
|
||||
for i in range(0, len(text), self.chunk_size - self.chunk_overlap)
|
||||
]
|
||||
|
||||
def save_documents(self, metadata: Dict[str, Any]):
|
||||
def _save_documents(self):
|
||||
"""
|
||||
Save the documents to the storage.
|
||||
This method should be called after the chunks and embeddings are generated.
|
||||
"""
|
||||
self.storage.save(self.chunks, metadata)
|
||||
self.storage.save(self.chunks)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import csv
|
||||
from typing import Dict, List
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||
|
||||
@@ -30,7 +30,7 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
|
||||
)
|
||||
new_chunks = self._chunk_text(content_str)
|
||||
self.chunks.extend(new_chunks)
|
||||
self.save_documents(metadata=self.metadata)
|
||||
self._save_documents()
|
||||
|
||||
def _chunk_text(self, text: str) -> List[str]:
|
||||
"""Utility method to split text into chunks."""
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from typing import Dict, List
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||
|
||||
|
||||
@@ -44,7 +45,7 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
new_chunks = self._chunk_text(content_str)
|
||||
self.chunks.extend(new_chunks)
|
||||
self.save_documents(metadata=self.metadata)
|
||||
self._save_documents()
|
||||
|
||||
def _chunk_text(self, text: str) -> List[str]:
|
||||
"""Utility method to split text into chunks."""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||
|
||||
@@ -42,7 +42,7 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
|
||||
)
|
||||
new_chunks = self._chunk_text(content_str)
|
||||
self.chunks.extend(new_chunks)
|
||||
self.save_documents(metadata=self.metadata)
|
||||
self._save_documents()
|
||||
|
||||
def _chunk_text(self, text: str) -> List[str]:
|
||||
"""Utility method to split text into chunks."""
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from typing import List, Dict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||
|
||||
@@ -43,7 +43,7 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
|
||||
for _, text in self.content.items():
|
||||
new_chunks = self._chunk_text(text)
|
||||
self.chunks.extend(new_chunks)
|
||||
self.save_documents(metadata=self.metadata)
|
||||
self._save_documents()
|
||||
|
||||
def _chunk_text(self, text: str) -> List[str]:
|
||||
"""Utility method to split text into chunks."""
|
||||
|
||||
@@ -24,7 +24,7 @@ class StringKnowledgeSource(BaseKnowledgeSource):
|
||||
"""Add string content to the knowledge source, chunk it, compute embeddings, and save them."""
|
||||
new_chunks = self._chunk_text(self.content)
|
||||
self.chunks.extend(new_chunks)
|
||||
self.save_documents(metadata=self.metadata)
|
||||
self._save_documents()
|
||||
|
||||
def _chunk_text(self, text: str) -> List[str]:
|
||||
"""Utility method to split text into chunks."""
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from typing import Dict, List
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||
|
||||
@@ -24,7 +24,7 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
|
||||
for _, text in self.content.items():
|
||||
new_chunks = self._chunk_text(text)
|
||||
self.chunks.extend(new_chunks)
|
||||
self.save_documents(metadata=self.metadata)
|
||||
self._save_documents()
|
||||
|
||||
def _chunk_text(self, text: str) -> List[str]:
|
||||
"""Utility method to split text into chunks."""
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
import contextlib
|
||||
import hashlib
|
||||
import io
|
||||
import logging
|
||||
import chromadb
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Union, cast
|
||||
|
||||
import chromadb
|
||||
import chromadb.errors
|
||||
from crewai.utilities.paths import db_storage_path
|
||||
from typing import Optional, List, Dict, Any, Union
|
||||
from crewai.utilities import EmbeddingConfigurator
|
||||
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
|
||||
import hashlib
|
||||
from chromadb.config import Settings
|
||||
from chromadb.api import ClientAPI
|
||||
from chromadb.api.types import OneOrMany
|
||||
from chromadb.config import Settings
|
||||
|
||||
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
|
||||
from crewai.utilities import EmbeddingConfigurator
|
||||
from crewai.utilities.logger import Logger
|
||||
from crewai.utilities.paths import db_storage_path
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
@@ -116,11 +118,16 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
||||
def save(
|
||||
self,
|
||||
documents: List[str],
|
||||
metadata: Union[Dict[str, Any], List[Dict[str, Any]]],
|
||||
metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
|
||||
):
|
||||
if self.collection:
|
||||
try:
|
||||
metadatas = [metadata] if isinstance(metadata, dict) else metadata
|
||||
if metadata is None:
|
||||
metadatas: Optional[OneOrMany[chromadb.Metadata]] = None
|
||||
elif isinstance(metadata, list):
|
||||
metadatas = [cast(chromadb.Metadata, m) for m in metadata]
|
||||
else:
|
||||
metadatas = cast(chromadb.Metadata, metadata)
|
||||
|
||||
ids = [
|
||||
hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import warnings
|
||||
@@ -128,6 +129,7 @@ class LLM:
|
||||
litellm.drop_params = True
|
||||
litellm.set_verbose = False
|
||||
self.set_callbacks(callbacks)
|
||||
self.set_env_callbacks()
|
||||
|
||||
def call(self, messages: List[Dict[str, str]], callbacks: List[Any] = []) -> str:
|
||||
with suppress_warnings():
|
||||
@@ -202,3 +204,39 @@ class LLM:
|
||||
litellm._async_success_callback.remove(callback)
|
||||
|
||||
litellm.callbacks = callbacks
|
||||
|
||||
def set_env_callbacks(self):
|
||||
"""
|
||||
Sets the success and failure callbacks for the LiteLLM library from environment variables.
|
||||
|
||||
This method reads the `LITELLM_SUCCESS_CALLBACKS` and `LITELLM_FAILURE_CALLBACKS`
|
||||
environment variables, which should contain comma-separated lists of callback names.
|
||||
It then assigns these lists to `litellm.success_callback` and `litellm.failure_callback`,
|
||||
respectively.
|
||||
|
||||
If the environment variables are not set or are empty, the corresponding callback lists
|
||||
will be set to empty lists.
|
||||
|
||||
Example:
|
||||
LITELLM_SUCCESS_CALLBACKS="langfuse,langsmith"
|
||||
LITELLM_FAILURE_CALLBACKS="langfuse"
|
||||
|
||||
This will set `litellm.success_callback` to ["langfuse", "langsmith"] and
|
||||
`litellm.failure_callback` to ["langfuse"].
|
||||
"""
|
||||
success_callbacks_str = os.environ.get("LITELLM_SUCCESS_CALLBACKS", "")
|
||||
success_callbacks = []
|
||||
if success_callbacks_str:
|
||||
success_callbacks = [
|
||||
callback.strip() for callback in success_callbacks_str.split(",")
|
||||
]
|
||||
|
||||
failure_callbacks_str = os.environ.get("LITELLM_FAILURE_CALLBACKS", "")
|
||||
failure_callbacks = []
|
||||
if failure_callbacks_str:
|
||||
failure_callbacks = [
|
||||
callback.strip() for callback in failure_callbacks_str.split(",")
|
||||
]
|
||||
|
||||
litellm.success_callback = success_callbacks
|
||||
litellm.failure_callback = failure_callbacks
|
||||
|
||||
@@ -4,12 +4,14 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from chromadb.api import ClientAPI
|
||||
|
||||
from crewai.memory.storage.base_rag_storage import BaseRAGStorage
|
||||
from crewai.utilities.paths import db_storage_path
|
||||
from crewai.utilities import EmbeddingConfigurator
|
||||
from crewai.utilities.constants import MAX_FILE_NAME_LENGTH
|
||||
from crewai.utilities.paths import db_storage_path
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
@@ -37,12 +39,15 @@ class RAGStorage(BaseRAGStorage):
|
||||
|
||||
app: ClientAPI | None = None
|
||||
|
||||
def __init__(self, type, allow_reset=True, embedder_config=None, crew=None, path=None):
|
||||
def __init__(
|
||||
self, type, allow_reset=True, embedder_config=None, crew=None, path=None
|
||||
):
|
||||
super().__init__(type, allow_reset, embedder_config, crew)
|
||||
agents = crew.agents if crew else []
|
||||
agents = [self._sanitize_role(agent.role) for agent in agents]
|
||||
agents = "_".join(agents)
|
||||
self.agents = agents
|
||||
self.storage_file_name = self._build_storage_file_name(type, agents)
|
||||
|
||||
self.type = type
|
||||
|
||||
@@ -60,7 +65,7 @@ class RAGStorage(BaseRAGStorage):
|
||||
|
||||
self._set_embedder_config()
|
||||
chroma_client = chromadb.PersistentClient(
|
||||
path=self.path if self.path else f"{db_storage_path()}/{self.type}/{self.agents}",
|
||||
path=self.path if self.path else self.storage_file_name,
|
||||
settings=Settings(allow_reset=self.allow_reset),
|
||||
)
|
||||
|
||||
@@ -81,6 +86,20 @@ class RAGStorage(BaseRAGStorage):
|
||||
"""
|
||||
return role.replace("\n", "").replace(" ", "_").replace("/", "_")
|
||||
|
||||
def _build_storage_file_name(self, type: str, file_name: str) -> str:
|
||||
"""
|
||||
Ensures file name does not exceed max allowed by OS
|
||||
"""
|
||||
base_path = f"{db_storage_path()}/{type}"
|
||||
|
||||
if len(file_name) > MAX_FILE_NAME_LENGTH:
|
||||
logging.warning(
|
||||
f"Trimming file name from {len(file_name)} to {MAX_FILE_NAME_LENGTH} characters."
|
||||
)
|
||||
file_name = file_name[:MAX_FILE_NAME_LENGTH]
|
||||
|
||||
return f"{base_path}/{file_name}"
|
||||
|
||||
def save(self, value: Any, metadata: Dict[str, Any]) -> None:
|
||||
if not hasattr(self, "app") or not hasattr(self, "collection"):
|
||||
self._initialize_app()
|
||||
|
||||
@@ -3,3 +3,4 @@ TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl"
|
||||
DEFAULT_SCORE_THRESHOLD = 0.35
|
||||
KNOWLEDGE_DIRECTORY = "knowledge"
|
||||
MAX_LLM_RETRY = 3
|
||||
MAX_FILE_NAME_LENGTH = 255
|
||||
|
||||
@@ -131,6 +131,13 @@ def test_reset_no_memory_flags(runner):
|
||||
)
|
||||
|
||||
|
||||
def test_version_flag(runner):
|
||||
result = runner.invoke(version)
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "crewai version:" in result.output
|
||||
|
||||
|
||||
def test_version_command(runner):
|
||||
result = runner.invoke(version)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user