From d93e08a3a6eea9ff3a9e1687797c85f49ac524db Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Thu, 17 Apr 2025 15:45:09 -0700 Subject: [PATCH] Enhance knowledge management in CrewAI - Added `KnowledgeConfig` class to configure knowledge retrieval parameters such as `limit` and `score_threshold`. - Updated `Agent` and `Crew` classes to utilize the new knowledge configuration for querying knowledge sources. - Enhanced documentation to clarify the addition of knowledge sources at both agent and crew levels. - Introduced new tips in documentation to guide users on knowledge source management and configuration. --- docs/concepts/knowledge.mdx | 30 +++++++++ src/crewai/agent.py | 34 +++++++--- src/crewai/agents/agent_builder/base_agent.py | 5 ++ src/crewai/crew.py | 26 +++++--- src/crewai/knowledge/knowledge.py | 5 +- src/crewai/knowledge/knowledge_config.py | 6 ++ .../knowledge/storage/knowledge_storage.py | 2 +- tests/agent_test.py | 62 ++++++++++++++++++- 8 files changed, 149 insertions(+), 21 deletions(-) create mode 100644 src/crewai/knowledge/knowledge_config.py diff --git a/docs/concepts/knowledge.mdx b/docs/concepts/knowledge.mdx index ae74ee50a..6d3f6c167 100644 --- a/docs/concepts/knowledge.mdx +++ b/docs/concepts/knowledge.mdx @@ -42,6 +42,16 @@ CrewAI supports various types of knowledge sources out of the box: | `collection_name` | **str** | No | Name of the collection where the knowledge will be stored. Used to identify different sets of knowledge. Defaults to "knowledge" if not provided. | | `storage` | **Optional[KnowledgeStorage]** | No | Custom storage configuration for managing how the knowledge is stored and retrieved. If not provided, a default storage will be created. | + + +Unlike retrieval from a vector database using a tool, agents preloaded with knowledge will not need a retrieval persona or task. +Simply add the relevant knowledge sources your agent or crew needs to function. + +Knowledge sources can be added at the agent or crew level. +Crew level knowledge sources will be used by **all agents** in the crew. +Agent level knowledge sources will be used by the **specific agent** that is preloaded with the knowledge. + + ## Quickstart Example @@ -146,6 +156,26 @@ result = crew.kickoff( ) ``` +## Knowledge Configuration + +You can configure the knowledge configuration for the crew or agent. + +```python Code +from crewai.knowledge.knowledge_config import KnowledgeConfig + +knowledge_config = KnowledgeConfig(limit=10, score_threshold=0.5) + +agent = Agent( + ... + knowledge_config=knowledge_config +) +``` + + + limit: is the number of relevant documents to return. Default is 3. + score_threshold: is the minimum score for a document to be considered relevant. Default is 0.35. + + ## More Examples Here are examples of how to use different types of knowledge sources: diff --git a/src/crewai/agent.py b/src/crewai/agent.py index 2a8067576..f472b50f7 100644 --- a/src/crewai/agent.py +++ b/src/crewai/agent.py @@ -114,6 +114,14 @@ class Agent(BaseAgent): default=None, description="Embedder configuration for the agent.", ) + agent_knowledge_context: Optional[str] = Field( + default=None, + description="Knowledge context for the agent.", + ) + crew_knowledge_context: Optional[str] = Field( + default=None, + description="Knowledge context for the crew.", + ) @model_validator(mode="after") def post_init_setup(self): @@ -229,22 +237,30 @@ class Agent(BaseAgent): memory = contextual_memory.build_context_for_task(task, context) if memory.strip() != "": task_prompt += self.i18n.slice("memory").format(memory=memory) - + knowledge_config = ( + self.knowledge_config.model_dump() if self.knowledge_config else {} + ) if self.knowledge: - agent_knowledge_snippets = self.knowledge.query([task.prompt()]) + agent_knowledge_snippets = self.knowledge.query( + [task.prompt()], **knowledge_config + ) if agent_knowledge_snippets: - agent_knowledge_context = extract_knowledge_context( + self.agent_knowledge_context = extract_knowledge_context( agent_knowledge_snippets ) - if agent_knowledge_context: - task_prompt += agent_knowledge_context + if self.agent_knowledge_context: + task_prompt += self.agent_knowledge_context if self.crew: - knowledge_snippets = self.crew.query_knowledge([task.prompt()]) + knowledge_snippets = self.crew.query_knowledge( + [task.prompt()], **knowledge_config + ) if knowledge_snippets: - crew_knowledge_context = extract_knowledge_context(knowledge_snippets) - if crew_knowledge_context: - task_prompt += crew_knowledge_context + self.crew_knowledge_context = extract_knowledge_context( + knowledge_snippets + ) + if self.crew_knowledge_context: + task_prompt += self.crew_knowledge_context tools = tools or self.tools or [] self.create_agent_executor(tools=tools, task=task) diff --git a/src/crewai/agents/agent_builder/base_agent.py b/src/crewai/agents/agent_builder/base_agent.py index a82cd12d7..ba2596f63 100644 --- a/src/crewai/agents/agent_builder/base_agent.py +++ b/src/crewai/agents/agent_builder/base_agent.py @@ -19,6 +19,7 @@ from crewai.agents.agent_builder.utilities.base_token_process import TokenProces from crewai.agents.cache.cache_handler import CacheHandler from crewai.agents.tools_handler import ToolsHandler from crewai.knowledge.knowledge import Knowledge +from crewai.knowledge.knowledge_config import KnowledgeConfig from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource from crewai.security.security_config import SecurityConfig from crewai.tools.base_tool import BaseTool, Tool @@ -155,6 +156,10 @@ class BaseAgent(ABC, BaseModel): adapted_agent: bool = Field( default=False, description="Whether the agent is adapted" ) + knowledge_config: Optional[KnowledgeConfig] = Field( + default=None, + description="Knowledge configuration for the agent such as limits and threshold", + ) @model_validator(mode="before") @classmethod diff --git a/src/crewai/crew.py b/src/crewai/crew.py index f1c33f637..600dd5d4e 100644 --- a/src/crewai/crew.py +++ b/src/crewai/crew.py @@ -304,9 +304,7 @@ class Crew(BaseModel): """Initialize private memory attributes.""" self._external_memory = ( # External memory doesn’t support a default value since it was designed to be managed entirely externally - self.external_memory.set_crew(self) - if self.external_memory - else None + self.external_memory.set_crew(self) if self.external_memory else None ) self._long_term_memory = self.long_term_memory @@ -1136,9 +1134,13 @@ class Crew(BaseModel): result = self._execute_tasks(self.tasks, start_index, True) return result - def query_knowledge(self, query: List[str]) -> Union[List[Dict[str, Any]], None]: + def query_knowledge( + self, query: List[str], limit: int = 3, score_threshold: float = 0.35 + ) -> Union[List[Dict[str, Any]], None]: if self.knowledge: - return self.knowledge.query(query) + return self.knowledge.query( + query, limit=limit, score_threshold=score_threshold + ) return None def fetch_inputs(self) -> Set[str]: @@ -1220,9 +1222,13 @@ class Crew(BaseModel): copied_data = self.model_dump(exclude=exclude) copied_data = {k: v for k, v in copied_data.items() if v is not None} if self.short_term_memory: - copied_data["short_term_memory"] = self.short_term_memory.model_copy(deep=True) + copied_data["short_term_memory"] = self.short_term_memory.model_copy( + deep=True + ) if self.long_term_memory: - copied_data["long_term_memory"] = self.long_term_memory.model_copy(deep=True) + copied_data["long_term_memory"] = self.long_term_memory.model_copy( + deep=True + ) if self.entity_memory: copied_data["entity_memory"] = self.entity_memory.model_copy(deep=True) if self.external_memory: @@ -1230,7 +1236,6 @@ class Crew(BaseModel): if self.user_memory: copied_data["user_memory"] = self.user_memory.model_copy(deep=True) - copied_data.pop("agents", None) copied_data.pop("tasks", None) @@ -1403,7 +1408,10 @@ class Crew(BaseModel): "short": (getattr(self, "_short_term_memory", None), "short term"), "entity": (getattr(self, "_entity_memory", None), "entity"), "knowledge": (getattr(self, "knowledge", None), "knowledge"), - "kickoff_outputs": (getattr(self, "_task_output_handler", None), "task output"), + "kickoff_outputs": ( + getattr(self, "_task_output_handler", None), + "task output", + ), "external": (getattr(self, "_external_memory", None), "external"), } diff --git a/src/crewai/knowledge/knowledge.py b/src/crewai/knowledge/knowledge.py index da1db90a8..8cd8af2aa 100644 --- a/src/crewai/knowledge/knowledge.py +++ b/src/crewai/knowledge/knowledge.py @@ -43,7 +43,9 @@ class Knowledge(BaseModel): self.storage.initialize_knowledge_storage() self._add_sources() - def query(self, query: List[str], limit: int = 3) -> List[Dict[str, Any]]: + def query( + self, query: List[str], limit: int = 3, score_threshold: float = 0.35 + ) -> List[Dict[str, Any]]: """ Query across all knowledge sources to find the most relevant information. Returns the top_k most relevant chunks. @@ -57,6 +59,7 @@ class Knowledge(BaseModel): results = self.storage.search( query, limit, + score_threshold=score_threshold, ) return results diff --git a/src/crewai/knowledge/knowledge_config.py b/src/crewai/knowledge/knowledge_config.py new file mode 100644 index 000000000..434d52f1e --- /dev/null +++ b/src/crewai/knowledge/knowledge_config.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class KnowledgeConfig(BaseModel): + limit: int = 3 + score_threshold: float = 0.35 diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py index e23b9e120..d49cc9876 100644 --- a/src/crewai/knowledge/storage/knowledge_storage.py +++ b/src/crewai/knowledge/storage/knowledge_storage.py @@ -4,7 +4,7 @@ import io import logging import os import shutil -from typing import Any, Dict, List, Optional, Union, cast +from typing import Any, Dict, List, Optional, Union import chromadb import chromadb.errors diff --git a/tests/agent_test.py b/tests/agent_test.py index d437a57fc..796e651db 100644 --- a/tests/agent_test.py +++ b/tests/agent_test.py @@ -10,6 +10,8 @@ from crewai import Agent, Crew, Task from crewai.agents.cache import CacheHandler from crewai.agents.crew_agent_executor import AgentFinish, CrewAgentExecutor from crewai.agents.parser import CrewAgentParser, OutputParserException +from crewai.knowledge.knowledge import Knowledge +from crewai.knowledge.knowledge_config import KnowledgeConfig from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource from crewai.llm import LLM @@ -259,7 +261,9 @@ def test_cache_hitting(): def handle_tool_end(source, event): received_events.append(event) - with (patch.object(CacheHandler, "read") as read,): + with ( + patch.object(CacheHandler, "read") as read, + ): read.return_value = "0" task = Task( description="What is 2 times 6? Ignore correctness and just return the result of the multiplication tool, you must use the tool.", @@ -1611,6 +1615,62 @@ def test_agent_with_knowledge_sources(): assert "red" in result.raw.lower() +def test_agent_with_knowledge_sources_with_query_limit_and_score_threshold(): + content = "Brandon's favorite color is red and he likes Mexican food." + string_source = StringKnowledgeSource(content=content) + knowledge_config = KnowledgeConfig(limit=10, score_threshold=0.5) + with patch.object(Knowledge, "query") as mock_knowledge_query: + agent = Agent( + role="Information Agent", + goal="Provide information based on knowledge sources", + backstory="You have access to specific knowledge sources.", + llm=LLM(model="gpt-4o-mini"), + knowledge_sources=[string_source], + knowledge_config=knowledge_config, + ) + task = Task( + description="What is Brandon's favorite color?", + expected_output="Brandon's favorite color.", + agent=agent, + ) + crew = Crew(agents=[agent], tasks=[task]) + crew.kickoff() + + assert agent.knowledge is not None + mock_knowledge_query.assert_called_once_with( + [task.prompt()], + **knowledge_config.model_dump(), + ) + + +def test_agent_with_knowledge_sources_with_query_limit_and_score_threshold_default(): + content = "Brandon's favorite color is red and he likes Mexican food." + string_source = StringKnowledgeSource(content=content) + knowledge_config = KnowledgeConfig() + with patch.object(Knowledge, "query") as mock_knowledge_query: + agent = Agent( + role="Information Agent", + goal="Provide information based on knowledge sources", + backstory="You have access to specific knowledge sources.", + llm=LLM(model="gpt-4o-mini"), + knowledge_sources=[string_source], + knowledge_config=knowledge_config, + ) + task = Task( + description="What is Brandon's favorite color?", + expected_output="Brandon's favorite color.", + agent=agent, + ) + crew = Crew(agents=[agent], tasks=[task]) + crew.kickoff() + + assert agent.knowledge is not None + mock_knowledge_query.assert_called_once_with( + [task.prompt()], + **knowledge_config.model_dump(), + ) + + @pytest.mark.vcr(filter_headers=["authorization"]) def test_agent_with_knowledge_sources_extensive_role(): content = "Brandon's favorite color is red and he likes Mexican food."