fixes from discussion

This commit is contained in:
Lorenze Jay
2024-11-27 10:38:20 -08:00
parent 3f87bf3ada
commit 5b03d6c8bc
7 changed files with 119 additions and 79 deletions

View File

@@ -51,7 +51,7 @@ crew = Crew(
tasks=[task], tasks=[task],
verbose=True, verbose=True,
process=Process.sequential, process=Process.sequential,
knowledge={"sources": [string_source], "metadata": {"preference": "personal"}}, # Enable knowledge by adding the sources here. You can also add more sources to the sources list. knowledge_sources=[string_source], # Enable knowledge by adding the sources here. You can also add more sources to the sources list.
) )
result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"}) result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"})
@@ -63,18 +63,29 @@ Sometimes you may want to append knowledge sources to an individual agent. You c
```python ```python
agent = Agent( agent = Agent(
... ...
knowledge={ knowledge_sources=[
"sources": [ StringKnowledgeSource(
StringKnowledgeSource( content="Users name is John. He is 30 years old and lives in San Francisco.",
content="Users name is John. He is 30 years old and lives in San Francisco.", metadata={"preference": "personal"},
metadata={"preference": "personal"}, )
) ],
],
"metadata": {"preference": "personal"},
},
) )
``` ```
## Agent Level Knowledge Sources
You can also append knowledge sources to an individual agent by setting the `knowledge_sources` parameter in the `Agent` class.
```python
string_source = StringKnowledgeSource(
content="Users name is John. He is 30 years old and lives in San Francisco.",
metadata={"preference": "personal"},
)
agent = Agent(
...
knowledge_sources=[string_source],
)
```
## Embedder Configuration ## Embedder Configuration
@@ -88,10 +99,7 @@ string_source = StringKnowledgeSource(
) )
crew = Crew( crew = Crew(
... ...
knowledge={ knowledge_sources=[string_source],
"sources": [string_source], embedder_config={"provider": "ollama", "config": {"model": "nomic-embed-text:latest"}},
"metadata": {"preference": "personal"},
"embedder_config": {"provider": "openai", "config": {"model": "text-embedding-3-small"}},
},
) )
``` ```

View File

@@ -21,6 +21,7 @@ from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_F
from crewai.utilities.converter import generate_model_description from crewai.utilities.converter import generate_model_description
from crewai.utilities.token_counter_callback import TokenCalcHandler from crewai.utilities.token_counter_callback import TokenCalcHandler
from crewai.utilities.training_handler import CrewTrainingHandler from crewai.utilities.training_handler import CrewTrainingHandler
from crewai.knowledge.utils.knowledge_utils import extract_knowledge_context
def mock_agent_ops_provider(): def mock_agent_ops_provider():
@@ -124,14 +125,17 @@ class Agent(BaseAgent):
default="safe", default="safe",
description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).", description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).",
) )
knowledge: Optional[Union[List[BaseKnowledgeSource], Knowledge]] = Field(
default=None,
description="Knowledge for the agent. Add knowledge sources to the knowledge object.",
)
embedder_config: Optional[Dict[str, Any]] = Field( embedder_config: Optional[Dict[str, Any]] = Field(
default=None, default=None,
description="Embedder configuration for the agent.", description="Embedder configuration for the agent.",
) )
knowledge_sources: Optional[List[BaseKnowledgeSource]] = Field(
default=None,
description="Knowledge sources for the agent. Add knowledge sources to the knowledge object.",
)
_knowledge: Optional[Knowledge] = PrivateAttr(
default=None,
)
@model_validator(mode="after") @model_validator(mode="after")
def post_init_setup(self): def post_init_setup(self):
@@ -245,14 +249,13 @@ class Agent(BaseAgent):
def _set_knowledge(self): def _set_knowledge(self):
try: try:
if self.knowledge: if self.knowledge_sources:
knowledge_agent_name = f"{self.role.replace(' ', '_')}" knowledge_agent_name = f"{self.role.replace(' ', '_')}"
print("knowledge_agent_name", knowledge_agent_name) if isinstance(self.knowledge_sources, list) and all(
if isinstance(self.knowledge, list) and all( isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources
isinstance(k, BaseKnowledgeSource) for k in self.knowledge
): ):
self.knowledge = Knowledge( self._knowledge = Knowledge(
sources=self.knowledge, sources=self.knowledge_sources,
embedder_config=self.embedder_config, embedder_config=self.embedder_config,
collection_name=knowledge_agent_name, collection_name=knowledge_agent_name,
) )
@@ -313,22 +316,21 @@ class Agent(BaseAgent):
if memory.strip() != "": if memory.strip() != "":
task_prompt += self.i18n.slice("memory").format(memory=memory) task_prompt += self.i18n.slice("memory").format(memory=memory)
if self.knowledge and isinstance(self.knowledge, Knowledge): if self._knowledge:
agent_knowledge_snippets = self.knowledge.query([task.prompt()]) agent_knowledge_snippets = self._knowledge.query([task.prompt()])
agent_knowledge_context = self.knowledge.extract_knowledge_context( if agent_knowledge_snippets:
agent_knowledge_snippets agent_knowledge_context = extract_knowledge_context(
) agent_knowledge_snippets
if agent_knowledge_context: )
task_prompt += agent_knowledge_context if agent_knowledge_context:
task_prompt += agent_knowledge_context
if self.crew and self.crew.knowledge: if self.crew:
knowledge_snippets = self.crew.knowledge.query([task.prompt()]) knowledge_snippets = self.crew.query_knowledge([task.prompt()])
if knowledge_snippets:
crew_knowledge_context = self.crew.knowledge.extract_knowledge_context( crew_knowledge_context = extract_knowledge_context(knowledge_snippets)
knowledge_snippets if crew_knowledge_context:
) task_prompt += crew_knowledge_context
if crew_knowledge_context:
task_prompt += crew_knowledge_context
tools = tools or self.tools or [] tools = tools or self.tools or []
self.create_agent_executor(tools=tools, task=task) self.create_agent_executor(tools=tools, task=task)

View File

@@ -203,9 +203,12 @@ class Crew(BaseModel):
default=[], default=[],
description="List of execution logs for tasks", description="List of execution logs for tasks",
) )
knowledge: Optional[Union[List[BaseKnowledgeSource], Knowledge]] = Field( knowledge_sources: Optional[List[BaseKnowledgeSource]] = Field(
default=None,
description="Knowledge sources for the crew. Add knowledge sources to the knowledge object.",
)
_knowledge: Optional[Knowledge] = PrivateAttr(
default=None, default=None,
description="Knowledge for the crew. Add knowledge sources to the knowledge object.",
) )
@field_validator("id", mode="before") @field_validator("id", mode="before")
@@ -284,13 +287,13 @@ class Crew(BaseModel):
@model_validator(mode="after") @model_validator(mode="after")
def create_crew_knowledge(self) -> "Crew": def create_crew_knowledge(self) -> "Crew":
"""Create the knowledge for the crew.""" """Create the knowledge for the crew."""
if self.knowledge: if self.knowledge_sources:
try: try:
if isinstance(self.knowledge, list) and all( if isinstance(self.knowledge_sources, list) and all(
isinstance(k, BaseKnowledgeSource) for k in self.knowledge isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources
): ):
self.knowledge = Knowledge( self._knowledge = Knowledge(
sources=self.knowledge, sources=self.knowledge_sources,
embedder_config=self.embedder, embedder_config=self.embedder,
collection_name="crew", collection_name="crew",
) )
@@ -954,6 +957,11 @@ class Crew(BaseModel):
result = self._execute_tasks(self.tasks, start_index, True) result = self._execute_tasks(self.tasks, start_index, True)
return result return result
def query_knowledge(self, query: List[str]) -> Union[List[Dict[str, Any]], None]:
if self._knowledge:
return self._knowledge.query(query)
return None
def copy(self): def copy(self):
"""Create a deep copy of the Crew.""" """Create a deep copy of the Crew."""

View File

@@ -62,18 +62,6 @@ class Knowledge(BaseModel):
) )
return results return results
def extract_knowledge_context(
self, knowledge_snippets: List[Dict[str, Any]]
) -> str:
"""Extract knowledge from the task prompt."""
valid_snippets = [
result["context"]
for result in knowledge_snippets
if result and result.get("context")
]
snippet = "\n".join(valid_snippets)
return f"Additional Information: {snippet}" if valid_snippets else ""
def _add_sources(self): def _add_sources(self):
for source in self.sources: for source in self.sources:
source.storage = self.storage source.storage = self.storage

View File

@@ -3,12 +3,16 @@ import io
import logging import logging
import chromadb import chromadb
import os import os
import chromadb.errors
from crewai.utilities.paths import db_storage_path from crewai.utilities.paths import db_storage_path
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any, Union
from crewai.utilities import EmbeddingConfigurator from crewai.utilities import EmbeddingConfigurator
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
import hashlib import hashlib
from chromadb.config import Settings from chromadb.config import Settings
from chromadb.api import ClientAPI
from crewai.utilities.logger import Logger
@contextlib.contextmanager @contextlib.contextmanager
@@ -36,15 +40,15 @@ class KnowledgeStorage(BaseKnowledgeStorage):
collection: Optional[chromadb.Collection] = None collection: Optional[chromadb.Collection] = None
collection_name: Optional[str] = "knowledge" collection_name: Optional[str] = "knowledge"
app: Optional[chromadb.PersistentClient] = None app: Optional[ClientAPI] = None
def __init__( def __init__(
self, self,
embedder_config: Optional[Dict[str, Any]] = None, embedder_config: Optional[Dict[str, Any]] = None,
collection_name: Optional[str] = None, collection_name: Optional[str] = None,
): ):
self.embedder_config = embedder_config
self.collection_name = collection_name self.collection_name = collection_name
self._set_embedder_config(embedder_config)
def search( def search(
self, self,
@@ -91,7 +95,7 @@ class KnowledgeStorage(BaseKnowledgeStorage):
) )
if self.app: if self.app:
self.collection = self.app.get_or_create_collection( self.collection = self.app.get_or_create_collection(
name=collection_name name=collection_name, embedding_function=self.embedder_config
) )
else: else:
raise Exception("Vector Database Client not initialized") raise Exception("Vector Database Client not initialized")
@@ -110,18 +114,39 @@ class KnowledgeStorage(BaseKnowledgeStorage):
self.app.reset() self.app.reset()
def save( def save(
self, documents: List[str], metadata: Dict[str, Any] | List[Dict[str, Any]] self,
documents: List[str],
metadata: Union[Dict[str, Any], List[Dict[str, Any]]],
): ):
if self.collection: if self.collection:
metadatas = [metadata] if isinstance(metadata, dict) else metadata try:
metadatas = [metadata] if isinstance(metadata, dict) else metadata
ids = [hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents] ids = [
hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents
]
self.collection.upsert( self.collection.upsert(
documents=documents, documents=documents,
metadatas=metadatas, metadatas=metadatas,
ids=ids, ids=ids,
) )
except chromadb.errors.InvalidDimensionException as e:
Logger(verbose=True).log(
"error",
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
"red",
)
raise ValueError(
"Embedding dimension mismatch. Make sure you're using the same embedding model "
"across all operations with this collection."
"Try resetting the collection using `crewai reset-memories -a`"
) from e
except Exception as e:
Logger(verbose=True).log(
"error", f"Failed to upsert documents: {e}", "red"
)
raise
else: else:
raise Exception("Collection not initialized") raise Exception("Collection not initialized")

View File

@@ -0,0 +1,12 @@
from typing import Any, Dict, List
def extract_knowledge_context(knowledge_snippets: List[Dict[str, Any]]) -> str:
"""Extract knowledge from the task prompt."""
valid_snippets = [
result["context"]
for result in knowledge_snippets
if result and result.get("context")
]
snippet = "\n".join(valid_snippets)
return f"Additional Information: {snippet}" if valid_snippets else ""

View File

@@ -3,7 +3,6 @@
import os import os
from unittest import mock from unittest import mock
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
from crewai import Agent, Crew, Task from crewai import Agent, Crew, Task
@@ -11,7 +10,6 @@ from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.agents.cache import CacheHandler from crewai.agents.cache import CacheHandler
from crewai.agents.crew_agent_executor import CrewAgentExecutor from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.parser import AgentAction, CrewAgentParser, OutputParserException from crewai.agents.parser import AgentAction, CrewAgentParser, OutputParserException
from crewai.knowledge.knowledge import Knowledge
from crewai.llm import LLM from crewai.llm import LLM
from crewai.tools import tool from crewai.tools import tool
from crewai.tools.tool_calling import InstructorToolCalling from crewai.tools.tool_calling import InstructorToolCalling
@@ -1627,10 +1625,9 @@ def test_agent_with_knowledge_sources_context():
goal="Provide information based on knowledge sources", goal="Provide information based on knowledge sources",
backstory="You have access to specific knowledge sources.", backstory="You have access to specific knowledge sources.",
llm=LLM(model="gpt-4o-mini"), llm=LLM(model="gpt-4o-mini"),
knowledge=[string_source], knowledge_sources=[string_source],
) )
# Test that agent is properly initialized with knowledge sources assert isinstance(agent.knowledge_sources, list)
assert isinstance(agent.knowledge, Knowledge) assert len(agent.knowledge_sources) == 1
assert len(agent.knowledge.sources) == 1 assert isinstance(agent.knowledge_sources[0], BaseKnowledgeSource)
assert isinstance(agent.knowledge.sources[0], BaseKnowledgeSource) assert agent.knowledge_sources[0].metadata == {"preference": "personal"}
assert agent.knowledge.sources[0].metadata == {"preference": "personal"}