diff --git a/docs/concepts/knowledge.mdx b/docs/concepts/knowledge.mdx
index ae74ee50a..6d3f6c167 100644
--- a/docs/concepts/knowledge.mdx
+++ b/docs/concepts/knowledge.mdx
@@ -42,6 +42,16 @@ CrewAI supports various types of knowledge sources out of the box:
| `collection_name` | **str** | No | Name of the collection where the knowledge will be stored. Used to identify different sets of knowledge. Defaults to "knowledge" if not provided. |
| `storage` | **Optional[KnowledgeStorage]** | No | Custom storage configuration for managing how the knowledge is stored and retrieved. If not provided, a default storage will be created. |
+
+
+Unlike retrieval from a vector database using a tool, agents preloaded with knowledge will not need a retrieval persona or task.
+Simply add the relevant knowledge sources your agent or crew needs to function.
+
+Knowledge sources can be added at the agent or crew level.
+Crew level knowledge sources will be used by **all agents** in the crew.
+Agent level knowledge sources will be used by the **specific agent** that is preloaded with the knowledge.
+
+
## Quickstart Example
@@ -146,6 +156,26 @@ result = crew.kickoff(
)
```
+## Knowledge Configuration
+
+You can configure the knowledge configuration for the crew or agent.
+
+```python Code
+from crewai.knowledge.knowledge_config import KnowledgeConfig
+
+knowledge_config = KnowledgeConfig(limit=10, score_threshold=0.5)
+
+agent = Agent(
+ ...
+ knowledge_config=knowledge_config
+)
+```
+
+
+ limit: is the number of relevant documents to return. Default is 3.
+ score_threshold: is the minimum score for a document to be considered relevant. Default is 0.35.
+
+
## More Examples
Here are examples of how to use different types of knowledge sources:
diff --git a/src/crewai/agent.py b/src/crewai/agent.py
index 2a8067576..f472b50f7 100644
--- a/src/crewai/agent.py
+++ b/src/crewai/agent.py
@@ -114,6 +114,14 @@ class Agent(BaseAgent):
default=None,
description="Embedder configuration for the agent.",
)
+ agent_knowledge_context: Optional[str] = Field(
+ default=None,
+ description="Knowledge context for the agent.",
+ )
+ crew_knowledge_context: Optional[str] = Field(
+ default=None,
+ description="Knowledge context for the crew.",
+ )
@model_validator(mode="after")
def post_init_setup(self):
@@ -229,22 +237,30 @@ class Agent(BaseAgent):
memory = contextual_memory.build_context_for_task(task, context)
if memory.strip() != "":
task_prompt += self.i18n.slice("memory").format(memory=memory)
-
+ knowledge_config = (
+ self.knowledge_config.model_dump() if self.knowledge_config else {}
+ )
if self.knowledge:
- agent_knowledge_snippets = self.knowledge.query([task.prompt()])
+ agent_knowledge_snippets = self.knowledge.query(
+ [task.prompt()], **knowledge_config
+ )
if agent_knowledge_snippets:
- agent_knowledge_context = extract_knowledge_context(
+ self.agent_knowledge_context = extract_knowledge_context(
agent_knowledge_snippets
)
- if agent_knowledge_context:
- task_prompt += agent_knowledge_context
+ if self.agent_knowledge_context:
+ task_prompt += self.agent_knowledge_context
if self.crew:
- knowledge_snippets = self.crew.query_knowledge([task.prompt()])
+ knowledge_snippets = self.crew.query_knowledge(
+ [task.prompt()], **knowledge_config
+ )
if knowledge_snippets:
- crew_knowledge_context = extract_knowledge_context(knowledge_snippets)
- if crew_knowledge_context:
- task_prompt += crew_knowledge_context
+ self.crew_knowledge_context = extract_knowledge_context(
+ knowledge_snippets
+ )
+ if self.crew_knowledge_context:
+ task_prompt += self.crew_knowledge_context
tools = tools or self.tools or []
self.create_agent_executor(tools=tools, task=task)
diff --git a/src/crewai/agents/agent_builder/base_agent.py b/src/crewai/agents/agent_builder/base_agent.py
index a82cd12d7..ba2596f63 100644
--- a/src/crewai/agents/agent_builder/base_agent.py
+++ b/src/crewai/agents/agent_builder/base_agent.py
@@ -19,6 +19,7 @@ from crewai.agents.agent_builder.utilities.base_token_process import TokenProces
from crewai.agents.cache.cache_handler import CacheHandler
from crewai.agents.tools_handler import ToolsHandler
from crewai.knowledge.knowledge import Knowledge
+from crewai.knowledge.knowledge_config import KnowledgeConfig
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.security.security_config import SecurityConfig
from crewai.tools.base_tool import BaseTool, Tool
@@ -155,6 +156,10 @@ class BaseAgent(ABC, BaseModel):
adapted_agent: bool = Field(
default=False, description="Whether the agent is adapted"
)
+ knowledge_config: Optional[KnowledgeConfig] = Field(
+ default=None,
+ description="Knowledge configuration for the agent such as limits and threshold",
+ )
@model_validator(mode="before")
@classmethod
diff --git a/src/crewai/crew.py b/src/crewai/crew.py
index f1c33f637..600dd5d4e 100644
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -304,9 +304,7 @@ class Crew(BaseModel):
"""Initialize private memory attributes."""
self._external_memory = (
# External memory doesn’t support a default value since it was designed to be managed entirely externally
- self.external_memory.set_crew(self)
- if self.external_memory
- else None
+ self.external_memory.set_crew(self) if self.external_memory else None
)
self._long_term_memory = self.long_term_memory
@@ -1136,9 +1134,13 @@ class Crew(BaseModel):
result = self._execute_tasks(self.tasks, start_index, True)
return result
- def query_knowledge(self, query: List[str]) -> Union[List[Dict[str, Any]], None]:
+ def query_knowledge(
+ self, query: List[str], limit: int = 3, score_threshold: float = 0.35
+ ) -> Union[List[Dict[str, Any]], None]:
if self.knowledge:
- return self.knowledge.query(query)
+ return self.knowledge.query(
+ query, limit=limit, score_threshold=score_threshold
+ )
return None
def fetch_inputs(self) -> Set[str]:
@@ -1220,9 +1222,13 @@ class Crew(BaseModel):
copied_data = self.model_dump(exclude=exclude)
copied_data = {k: v for k, v in copied_data.items() if v is not None}
if self.short_term_memory:
- copied_data["short_term_memory"] = self.short_term_memory.model_copy(deep=True)
+ copied_data["short_term_memory"] = self.short_term_memory.model_copy(
+ deep=True
+ )
if self.long_term_memory:
- copied_data["long_term_memory"] = self.long_term_memory.model_copy(deep=True)
+ copied_data["long_term_memory"] = self.long_term_memory.model_copy(
+ deep=True
+ )
if self.entity_memory:
copied_data["entity_memory"] = self.entity_memory.model_copy(deep=True)
if self.external_memory:
@@ -1230,7 +1236,6 @@ class Crew(BaseModel):
if self.user_memory:
copied_data["user_memory"] = self.user_memory.model_copy(deep=True)
-
copied_data.pop("agents", None)
copied_data.pop("tasks", None)
@@ -1403,7 +1408,10 @@ class Crew(BaseModel):
"short": (getattr(self, "_short_term_memory", None), "short term"),
"entity": (getattr(self, "_entity_memory", None), "entity"),
"knowledge": (getattr(self, "knowledge", None), "knowledge"),
- "kickoff_outputs": (getattr(self, "_task_output_handler", None), "task output"),
+ "kickoff_outputs": (
+ getattr(self, "_task_output_handler", None),
+ "task output",
+ ),
"external": (getattr(self, "_external_memory", None), "external"),
}
diff --git a/src/crewai/knowledge/knowledge.py b/src/crewai/knowledge/knowledge.py
index da1db90a8..8cd8af2aa 100644
--- a/src/crewai/knowledge/knowledge.py
+++ b/src/crewai/knowledge/knowledge.py
@@ -43,7 +43,9 @@ class Knowledge(BaseModel):
self.storage.initialize_knowledge_storage()
self._add_sources()
- def query(self, query: List[str], limit: int = 3) -> List[Dict[str, Any]]:
+ def query(
+ self, query: List[str], limit: int = 3, score_threshold: float = 0.35
+ ) -> List[Dict[str, Any]]:
"""
Query across all knowledge sources to find the most relevant information.
Returns the top_k most relevant chunks.
@@ -57,6 +59,7 @@ class Knowledge(BaseModel):
results = self.storage.search(
query,
limit,
+ score_threshold=score_threshold,
)
return results
diff --git a/src/crewai/knowledge/knowledge_config.py b/src/crewai/knowledge/knowledge_config.py
new file mode 100644
index 000000000..434d52f1e
--- /dev/null
+++ b/src/crewai/knowledge/knowledge_config.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class KnowledgeConfig(BaseModel):
+ limit: int = 3
+ score_threshold: float = 0.35
diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py
index e23b9e120..d49cc9876 100644
--- a/src/crewai/knowledge/storage/knowledge_storage.py
+++ b/src/crewai/knowledge/storage/knowledge_storage.py
@@ -4,7 +4,7 @@ import io
import logging
import os
import shutil
-from typing import Any, Dict, List, Optional, Union, cast
+from typing import Any, Dict, List, Optional, Union
import chromadb
import chromadb.errors
diff --git a/tests/agent_test.py b/tests/agent_test.py
index d437a57fc..796e651db 100644
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -10,6 +10,8 @@ from crewai import Agent, Crew, Task
from crewai.agents.cache import CacheHandler
from crewai.agents.crew_agent_executor import AgentFinish, CrewAgentExecutor
from crewai.agents.parser import CrewAgentParser, OutputParserException
+from crewai.knowledge.knowledge import Knowledge
+from crewai.knowledge.knowledge_config import KnowledgeConfig
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
from crewai.llm import LLM
@@ -259,7 +261,9 @@ def test_cache_hitting():
def handle_tool_end(source, event):
received_events.append(event)
- with (patch.object(CacheHandler, "read") as read,):
+ with (
+ patch.object(CacheHandler, "read") as read,
+ ):
read.return_value = "0"
task = Task(
description="What is 2 times 6? Ignore correctness and just return the result of the multiplication tool, you must use the tool.",
@@ -1611,6 +1615,62 @@ def test_agent_with_knowledge_sources():
assert "red" in result.raw.lower()
+def test_agent_with_knowledge_sources_with_query_limit_and_score_threshold():
+ content = "Brandon's favorite color is red and he likes Mexican food."
+ string_source = StringKnowledgeSource(content=content)
+ knowledge_config = KnowledgeConfig(limit=10, score_threshold=0.5)
+ with patch.object(Knowledge, "query") as mock_knowledge_query:
+ agent = Agent(
+ role="Information Agent",
+ goal="Provide information based on knowledge sources",
+ backstory="You have access to specific knowledge sources.",
+ llm=LLM(model="gpt-4o-mini"),
+ knowledge_sources=[string_source],
+ knowledge_config=knowledge_config,
+ )
+ task = Task(
+ description="What is Brandon's favorite color?",
+ expected_output="Brandon's favorite color.",
+ agent=agent,
+ )
+ crew = Crew(agents=[agent], tasks=[task])
+ crew.kickoff()
+
+ assert agent.knowledge is not None
+ mock_knowledge_query.assert_called_once_with(
+ [task.prompt()],
+ **knowledge_config.model_dump(),
+ )
+
+
+def test_agent_with_knowledge_sources_with_query_limit_and_score_threshold_default():
+ content = "Brandon's favorite color is red and he likes Mexican food."
+ string_source = StringKnowledgeSource(content=content)
+ knowledge_config = KnowledgeConfig()
+ with patch.object(Knowledge, "query") as mock_knowledge_query:
+ agent = Agent(
+ role="Information Agent",
+ goal="Provide information based on knowledge sources",
+ backstory="You have access to specific knowledge sources.",
+ llm=LLM(model="gpt-4o-mini"),
+ knowledge_sources=[string_source],
+ knowledge_config=knowledge_config,
+ )
+ task = Task(
+ description="What is Brandon's favorite color?",
+ expected_output="Brandon's favorite color.",
+ agent=agent,
+ )
+ crew = Crew(agents=[agent], tasks=[task])
+ crew.kickoff()
+
+ assert agent.knowledge is not None
+ mock_knowledge_query.assert_called_once_with(
+ [task.prompt()],
+ **knowledge_config.model_dump(),
+ )
+
+
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_with_knowledge_sources_extensive_role():
content = "Brandon's favorite color is red and he likes Mexican food."