Compare commits

...

4 Commits

Author SHA1 Message Date
Devin AI
ffa386e302 Enhance set_knowledge implementation with better validation and documentation
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-03-17 04:55:07 +00:00
Devin AI
e73cf7b00f Fix type-checker errors in set_knowledge implementation
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-03-17 04:48:02 +00:00
Devin AI
6b99aa4ca0 Add test cassette for set_knowledge test
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-03-17 04:46:43 +00:00
Devin AI
e6fba64939 Implement set_knowledge method in BaseAgent to enable knowledge integration (fixes #2385)
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-03-17 04:45:49 +00:00
5 changed files with 287 additions and 19 deletions

View File

@@ -134,25 +134,73 @@ class Agent(BaseAgent):
self.cache_handler = CacheHandler()
self.set_cache_handler(self.cache_handler)
def set_knowledge(self, crew_embedder: Optional[Dict[str, Any]] = None):
def set_knowledge(
self,
knowledge_sources: Optional[List[BaseKnowledgeSource]] = None,
embedder_config: Optional[Dict[str, Any]] = None
) -> None:
"""Set knowledge sources for the agent with optional embedder configuration.
This method allows agents to integrate external knowledge sources for enhanced
contextual understanding and information retrieval during task execution.
Args:
knowledge_sources: List of knowledge sources to integrate. These can include
various data types such as text files, PDFs, CSV files, JSON files,
web pages, YouTube videos, and documentation websites.
embedder_config: Configuration for embedding generation. If not provided,
a default configuration will be used.
Raises:
ValueError: If the provided knowledge sources are invalid.
TypeError: If knowledge_sources is not a list or None.
ValueError: If embedder_config is missing required keys.
Example:
```python
from crewai.knowledge.source import StringKnowledgeSource
content = "The capital of France is Paris."
source = StringKnowledgeSource(content=content)
agent.set_knowledge(
knowledge_sources=[source],
embedder_config={"provider": "openai", "model": "text-embedding-3-small"}
)
```
"""
try:
if self.embedder is None and crew_embedder:
self.embedder = crew_embedder
# Handle backward compatibility with crew_embedder
if embedder_config and self.embedder is None:
self.embedder = embedder_config
# Validate knowledge sources
if knowledge_sources is not None:
if not isinstance(knowledge_sources, list):
raise TypeError("knowledge_sources must be a list or None")
if not all(isinstance(k, BaseKnowledgeSource) for k in knowledge_sources):
raise ValueError("All knowledge sources must be instances of BaseKnowledgeSource")
self.knowledge_sources = knowledge_sources
# Create knowledge object if knowledge sources are provided
if self.knowledge_sources:
full_pattern = re.compile(r"[^a-zA-Z0-9\-_\r\n]|(\.\.)")
knowledge_agent_name = f"{re.sub(full_pattern, '_', self.role)}"
if isinstance(self.knowledge_sources, list) and all(
isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources
):
self.knowledge = Knowledge(
sources=self.knowledge_sources,
embedder=self.embedder,
collection_name=knowledge_agent_name,
storage=self.knowledge_storage or None,
)
except (TypeError, ValueError) as e:
raise ValueError(f"Invalid Knowledge Configuration: {str(e)}")
# Create a unique collection name based on agent role and id
knowledge_agent_name = f"{re.sub(full_pattern, '_', self.role)}_{id(self)}"
self.knowledge = Knowledge(
sources=self.knowledge_sources,
embedder=self.embedder,
collection_name=knowledge_agent_name,
storage=self.knowledge_storage or None,
)
except TypeError as e:
raise TypeError(f"Invalid Knowledge Configuration Type: {str(e)}")
except ValueError as e:
raise ValueError(f"Invalid Knowledge Configuration Value: {str(e)}")
except Exception as e:
raise ValueError(f"Error setting knowledge: {str(e)}")
def execute_task(
self,

View File

@@ -2,7 +2,7 @@ import uuid
from abc import ABC, abstractmethod
from copy import copy as shallow_copy
from hashlib import md5
from typing import Any, Dict, List, Optional, TypeVar
from typing import Any, Dict, List, Optional, TypeVar, Union, cast
from pydantic import (
UUID4,
@@ -148,6 +148,10 @@ class BaseAgent(ABC, BaseModel):
default=None,
description="Custom knowledge storage for the agent.",
)
embedder_config: Optional[Dict[str, Any]] = Field(
default=None,
description="Configuration for embedding generation.",
)
security_config: SecurityConfig = Field(
default_factory=SecurityConfig,
description="Security configuration for the agent, including fingerprinting.",
@@ -362,5 +366,74 @@ class BaseAgent(ABC, BaseModel):
self._rpm_controller = rpm_controller
self.create_agent_executor()
def set_knowledge(self, crew_embedder: Optional[Dict[str, Any]] = None):
pass
def set_knowledge(
self,
knowledge_sources: Optional[List[BaseKnowledgeSource]] = None,
embedder_config: Optional[Dict[str, Any]] = None
) -> None:
"""Set knowledge sources for the agent with optional embedder configuration.
This method allows agents to integrate external knowledge sources for enhanced
contextual understanding and information retrieval during task execution.
Args:
knowledge_sources: List of knowledge sources to integrate. These can include
various data types such as text files, PDFs, CSV files, JSON files,
web pages, YouTube videos, and documentation websites.
embedder_config: Configuration for embedding generation. If not provided,
a default configuration will be used.
Raises:
ValueError: If the provided knowledge sources are invalid.
TypeError: If knowledge_sources is not a list or None.
ValueError: If embedder_config is missing required keys.
Example:
```python
from crewai.knowledge.source import StringKnowledgeSource
content = "The capital of France is Paris."
source = StringKnowledgeSource(content=content)
agent.set_knowledge(
knowledge_sources=[source],
embedder_config={"provider": "openai", "model": "text-embedding-3-small"}
)
```
"""
try:
# Validate knowledge sources first
if knowledge_sources is not None:
if not isinstance(knowledge_sources, list):
raise TypeError("knowledge_sources must be a list or None")
if not all(isinstance(k, BaseKnowledgeSource) for k in knowledge_sources):
raise ValueError("All knowledge sources must be instances of BaseKnowledgeSource")
self.knowledge_sources = knowledge_sources
# Validate embedder configuration
if embedder_config is not None:
if not isinstance(embedder_config, dict):
raise TypeError("embedder_config must be a dictionary or None")
if "provider" not in embedder_config:
raise ValueError("embedder_config must contain a 'provider' key")
self.embedder_config = embedder_config
# Create knowledge object if knowledge sources are provided
if self.knowledge_sources:
# Create a unique collection name based on agent role and id
knowledge_agent_name = f"{self.role.replace(' ', '_')}_{id(self)}"
self.knowledge = Knowledge(
sources=self.knowledge_sources,
embedder_config=self.embedder_config,
collection_name=knowledge_agent_name,
)
except TypeError as e:
raise TypeError(f"Invalid Knowledge Configuration Type: {str(e)}")
except ValueError as e:
raise ValueError(f"Invalid Knowledge Configuration Value: {str(e)}")
except Exception as e:
raise ValueError(f"Error setting knowledge: {str(e)}")

View File

@@ -621,7 +621,7 @@ class Crew(BaseModel):
agent.i18n = i18n
# type: ignore[attr-defined] # Argument 1 to "_interpolate_inputs" of "Crew" has incompatible type "dict[str, Any] | None"; expected "dict[str, Any]"
agent.crew = self # type: ignore[attr-defined]
agent.set_knowledge(crew_embedder=self.embedder)
agent.set_knowledge(embedder_config=self.embedder)
# TODO: Create an AgentFunctionCalling protocol for future refactoring
if not agent.function_calling_llm: # type: ignore # "BaseAgent" has no attribute "function_calling_llm"
agent.function_calling_llm = self.function_calling_llm # type: ignore # "BaseAgent" has no attribute "function_calling_llm"

View File

@@ -1586,6 +1586,76 @@ def test_agent_execute_task_with_ollama():
assert "AI" in result or "artificial intelligence" in result.lower()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_base_agent_set_knowledge():
"""Test that set_knowledge correctly sets knowledge sources and creates a Knowledge object."""
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.knowledge.knowledge import Knowledge
# Create a test implementation of BaseAgent
class TestAgent(BaseAgent):
def execute_task(self, task, context=None, tools=None):
return "Test execution"
def create_agent_executor(self, tools=None):
pass
def _parse_tools(self, tools):
return tools
def get_delegation_tools(self, agents):
return []
def get_output_converter(self, llm, text, model, instructions):
return None
# Create a knowledge source with some content
content = "The capital of France is Paris."
string_source = StringKnowledgeSource(content=content)
# Create an agent
agent = TestAgent(
role="Test Agent",
goal="Test Goal",
backstory="Test Backstory",
)
# Mock the Knowledge class to avoid API calls
with patch("crewai.agents.agent_builder.base_agent.Knowledge") as MockKnowledge:
mock_knowledge_instance = MockKnowledge.return_value
mock_knowledge_instance.sources = [string_source]
# Test setting knowledge
agent.set_knowledge(knowledge_sources=[string_source])
# Verify that knowledge was set correctly
assert agent.knowledge_sources == [string_source]
assert agent.knowledge is not None
assert MockKnowledge.called
# Check that collection name starts with the agent role (now includes unique ID)
assert MockKnowledge.call_args[1]["collection_name"].startswith("Test_Agent_")
# Test with embedder config
embedder_config = {
"provider": "openai",
"model": "text-embedding-3-small"
}
agent.set_knowledge(
knowledge_sources=[string_source],
embedder_config=embedder_config
)
assert agent.embedder_config == embedder_config
assert MockKnowledge.call_args[1]["embedder_config"] == embedder_config
# Test with invalid knowledge source - we need to directly test the validation logic
# rather than relying on the Knowledge class to raise an error
with pytest.raises(ValueError):
# This will trigger the validation check in set_knowledge
agent.set_knowledge(knowledge_sources=["invalid source"])
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_with_knowledge_sources():
# Create a knowledge source with some content

View File

@@ -0,0 +1,77 @@
interactions:
- request:
body: '{"input": ["The capital of France is Paris."], "model": "text-embedding-3-small",
"encoding_format": "base64"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '110'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.61.0
x-stainless-arch:
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- Linux
x-stainless-package-version:
- 1.61.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/embeddings
response:
content: "{\n \"error\": {\n \"message\": \"Incorrect API key provided:
sk-proj-********************************************************************************************************************************************************sLcA.
You can find your API key at https://platform.openai.com/account/api-keys.\",\n
\ \"type\": \"invalid_request_error\",\n \"param\": null,\n \"code\":
\"invalid_api_key\"\n }\n}\n"
headers:
CF-RAY:
- 9219d2095edc680f-SEA
Connection:
- keep-alive
Content-Length:
- '414'
Content-Type:
- application/json; charset=utf-8
Date:
- Mon, 17 Mar 2025 04:41:52 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=VL2xCt_SZgziztqb6CFL0smPGPhRpbELZKUdSHPmfLQ-1742186512-1.0.1.1-UFayToBt3jFkEkjQwZJ7A4KLy0.uZK9sqwbNqpMQ75dMEz2hycNU3NwtXor0NmM7k7XsdxtcXPfv.JcVjYatku_yE3I6qMEMGsgoog.guDU;
path=/; expires=Mon, 17-Mar-25 05:11:52 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=O4ymzjmuwsEutsmbHpzKDz4uyyZNA1tSUX0M.FNCjro-1742186512991-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
X-Content-Type-Options:
- nosniff
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
vary:
- Origin
x-request-id:
- req_62700144d22a58e93c0464aa643af3ec
http_version: HTTP/1.1
status_code: 401
version: 1