mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
added knowledge to agent level (#1655)
* added knowledge to agent level * linted * added doc * added from suggestions * added test * fixes from discussion * fix docs * fix test * rm cassette for knowledge_sources test as its a mock and update agent doc string * fix test * rm unused * linted
This commit is contained in:
@@ -3,12 +3,16 @@ import io
|
||||
import logging
|
||||
import chromadb
|
||||
import os
|
||||
|
||||
import chromadb.errors
|
||||
from crewai.utilities.paths import db_storage_path
|
||||
from typing import Optional, List
|
||||
from typing import Dict, Any
|
||||
from typing import Optional, List, Dict, Any, Union
|
||||
from crewai.utilities import EmbeddingConfigurator
|
||||
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
|
||||
import hashlib
|
||||
from chromadb.config import Settings
|
||||
from chromadb.api import ClientAPI
|
||||
from crewai.utilities.logger import Logger
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
@@ -35,9 +39,16 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
||||
"""
|
||||
|
||||
collection: Optional[chromadb.Collection] = None
|
||||
collection_name: Optional[str] = "knowledge"
|
||||
app: Optional[ClientAPI] = None
|
||||
|
||||
def __init__(self, embedder_config: Optional[Dict[str, Any]] = None):
|
||||
self._initialize_app(embedder_config or {})
|
||||
def __init__(
|
||||
self,
|
||||
embedder_config: Optional[Dict[str, Any]] = None,
|
||||
collection_name: Optional[str] = None,
|
||||
):
|
||||
self.collection_name = collection_name
|
||||
self._set_embedder_config(embedder_config)
|
||||
|
||||
def search(
|
||||
self,
|
||||
@@ -67,43 +78,75 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
||||
else:
|
||||
raise Exception("Collection not initialized")
|
||||
|
||||
def _initialize_app(self, embedder_config: Optional[Dict[str, Any]] = None):
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
|
||||
self._set_embedder_config(embedder_config)
|
||||
|
||||
def initialize_knowledge_storage(self):
|
||||
base_path = os.path.join(db_storage_path(), "knowledge")
|
||||
chroma_client = chromadb.PersistentClient(
|
||||
path=f"{db_storage_path()}/knowledge",
|
||||
path=base_path,
|
||||
settings=Settings(allow_reset=True),
|
||||
)
|
||||
|
||||
self.app = chroma_client
|
||||
|
||||
try:
|
||||
self.collection = self.app.get_or_create_collection(name="knowledge")
|
||||
collection_name = (
|
||||
f"knowledge_{self.collection_name}"
|
||||
if self.collection_name
|
||||
else "knowledge"
|
||||
)
|
||||
if self.app:
|
||||
self.collection = self.app.get_or_create_collection(
|
||||
name=collection_name, embedding_function=self.embedder_config
|
||||
)
|
||||
else:
|
||||
raise Exception("Vector Database Client not initialized")
|
||||
except Exception:
|
||||
raise Exception("Failed to create or get collection")
|
||||
|
||||
def reset(self):
|
||||
if self.app:
|
||||
self.app.reset()
|
||||
else:
|
||||
base_path = os.path.join(db_storage_path(), "knowledge")
|
||||
self.app = chromadb.PersistentClient(
|
||||
path=base_path,
|
||||
settings=Settings(allow_reset=True),
|
||||
)
|
||||
self.app.reset()
|
||||
|
||||
def save(
|
||||
self, documents: List[str], metadata: Dict[str, Any] | List[Dict[str, Any]]
|
||||
self,
|
||||
documents: List[str],
|
||||
metadata: Union[Dict[str, Any], List[Dict[str, Any]]],
|
||||
):
|
||||
if self.collection:
|
||||
metadatas = [metadata] if isinstance(metadata, dict) else metadata
|
||||
try:
|
||||
metadatas = [metadata] if isinstance(metadata, dict) else metadata
|
||||
|
||||
ids = [
|
||||
hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents
|
||||
]
|
||||
ids = [
|
||||
hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents
|
||||
]
|
||||
|
||||
self.collection.upsert(
|
||||
documents=documents,
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
)
|
||||
self.collection.upsert(
|
||||
documents=documents,
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
)
|
||||
except chromadb.errors.InvalidDimensionException as e:
|
||||
Logger(verbose=True).log(
|
||||
"error",
|
||||
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
|
||||
"red",
|
||||
)
|
||||
raise ValueError(
|
||||
"Embedding dimension mismatch. Make sure you're using the same embedding model "
|
||||
"across all operations with this collection."
|
||||
"Try resetting the collection using `crewai reset-memories -a`"
|
||||
) from e
|
||||
except Exception as e:
|
||||
Logger(verbose=True).log(
|
||||
"error", f"Failed to upsert documents: {e}", "red"
|
||||
)
|
||||
raise
|
||||
else:
|
||||
raise Exception("Collection not initialized")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user