added knowledge to agent level (#1655)

* added knowledge to agent level

* linted

* added doc

* added from suggestions

* added test

* fixes from discussion

* fix docs

* fix test

* rm cassette for knowledge_sources test as its a mock and update agent doc string

* fix test

* rm unused

* linted
This commit is contained in:
Lorenze Jay
2024-11-27 11:33:07 -08:00
committed by GitHub
parent 366bbbbea3
commit c6a6c918e0
10 changed files with 654 additions and 91 deletions

View File

@@ -3,12 +3,16 @@ import io
import logging
import chromadb
import os
import chromadb.errors
from crewai.utilities.paths import db_storage_path
from typing import Optional, List
from typing import Dict, Any
from typing import Optional, List, Dict, Any, Union
from crewai.utilities import EmbeddingConfigurator
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
import hashlib
from chromadb.config import Settings
from chromadb.api import ClientAPI
from crewai.utilities.logger import Logger
@contextlib.contextmanager
@@ -35,9 +39,16 @@ class KnowledgeStorage(BaseKnowledgeStorage):
"""
collection: Optional[chromadb.Collection] = None
collection_name: Optional[str] = "knowledge"
app: Optional[ClientAPI] = None
def __init__(self, embedder_config: Optional[Dict[str, Any]] = None):
self._initialize_app(embedder_config or {})
def __init__(
self,
embedder_config: Optional[Dict[str, Any]] = None,
collection_name: Optional[str] = None,
):
self.collection_name = collection_name
self._set_embedder_config(embedder_config)
def search(
self,
@@ -67,43 +78,75 @@ class KnowledgeStorage(BaseKnowledgeStorage):
else:
raise Exception("Collection not initialized")
def _initialize_app(self, embedder_config: Optional[Dict[str, Any]] = None):
import chromadb
from chromadb.config import Settings
self._set_embedder_config(embedder_config)
def initialize_knowledge_storage(self):
base_path = os.path.join(db_storage_path(), "knowledge")
chroma_client = chromadb.PersistentClient(
path=f"{db_storage_path()}/knowledge",
path=base_path,
settings=Settings(allow_reset=True),
)
self.app = chroma_client
try:
self.collection = self.app.get_or_create_collection(name="knowledge")
collection_name = (
f"knowledge_{self.collection_name}"
if self.collection_name
else "knowledge"
)
if self.app:
self.collection = self.app.get_or_create_collection(
name=collection_name, embedding_function=self.embedder_config
)
else:
raise Exception("Vector Database Client not initialized")
except Exception:
raise Exception("Failed to create or get collection")
def reset(self):
if self.app:
self.app.reset()
else:
base_path = os.path.join(db_storage_path(), "knowledge")
self.app = chromadb.PersistentClient(
path=base_path,
settings=Settings(allow_reset=True),
)
self.app.reset()
def save(
self, documents: List[str], metadata: Dict[str, Any] | List[Dict[str, Any]]
self,
documents: List[str],
metadata: Union[Dict[str, Any], List[Dict[str, Any]]],
):
if self.collection:
metadatas = [metadata] if isinstance(metadata, dict) else metadata
try:
metadatas = [metadata] if isinstance(metadata, dict) else metadata
ids = [
hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents
]
ids = [
hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents
]
self.collection.upsert(
documents=documents,
metadatas=metadatas,
ids=ids,
)
self.collection.upsert(
documents=documents,
metadatas=metadatas,
ids=ids,
)
except chromadb.errors.InvalidDimensionException as e:
Logger(verbose=True).log(
"error",
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
"red",
)
raise ValueError(
"Embedding dimension mismatch. Make sure you're using the same embedding model "
"across all operations with this collection."
"Try resetting the collection using `crewai reset-memories -a`"
) from e
except Exception as e:
Logger(verbose=True).log(
"error", f"Failed to upsert documents: {e}", "red"
)
raise
else:
raise Exception("Collection not initialized")