mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-15 02:58:30 +00:00
Improve types and better support for file paths
This commit is contained in:
@@ -8,8 +8,8 @@ from pydantic import Field, InstanceOf, PrivateAttr, model_validator
|
||||
from crewai.agents import CacheHandler
|
||||
from crewai.agents.agent_builder.base_agent import BaseAgent
|
||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||
|
||||
# from crewai.knowledge import StringKnowledgeBase
|
||||
from crewai.knowledge.knowledge import Knowledge
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.llm import LLM
|
||||
from crewai.memory.contextual.contextual_memory import ContextualMemory
|
||||
from crewai.tools import BaseTool
|
||||
@@ -87,9 +87,9 @@ class Agent(BaseAgent):
|
||||
llm: Union[str, InstanceOf[LLM], Any] = Field(
|
||||
description="Language model that will run the agent.", default=None
|
||||
)
|
||||
knowledge: Optional[str] = Field(
|
||||
knowledge_sources: Optional[List[BaseKnowledgeSource]] = Field(
|
||||
default=None,
|
||||
description="Knowledge base for the agent.",
|
||||
description="Knowledge sources for the agent.",
|
||||
)
|
||||
function_calling_llm: Optional[Any] = Field(
|
||||
description="Language model that will run the agent.", default=None
|
||||
@@ -125,6 +125,8 @@ class Agent(BaseAgent):
|
||||
default="safe",
|
||||
description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).",
|
||||
)
|
||||
# TODO: We need to add in knowledge config (score, top_k, etc)
|
||||
_knowledge: Optional[Knowledge] = PrivateAttr(default=None)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def post_init_setup(self):
|
||||
@@ -189,7 +191,11 @@ class Agent(BaseAgent):
|
||||
if self.allow_code_execution:
|
||||
self._validate_docker_installation()
|
||||
|
||||
# self.knowledge = StringKnowledgeBase(content=self.knowledge)
|
||||
# Initialize the Knowledge object if knowledge_sources are provided
|
||||
if self.knowledge_sources:
|
||||
self._knowledge = Knowledge(sources=self.knowledge_sources)
|
||||
else:
|
||||
self._knowledge = None
|
||||
|
||||
return self
|
||||
|
||||
@@ -234,6 +240,16 @@ class Agent(BaseAgent):
|
||||
if memory.strip() != "":
|
||||
task_prompt += self.i18n.slice("memory").format(memory=memory)
|
||||
|
||||
# Integrate the knowledge base
|
||||
if self._knowledge:
|
||||
# Query the knowledge base for relevant information
|
||||
knowledge_snippets = self._knowledge.query(query=task.prompt())
|
||||
print("knowledge_snippets", knowledge_snippets)
|
||||
if knowledge_snippets:
|
||||
formatted_knowledge = "\n".join(knowledge_snippets)
|
||||
print("formatted_knowledge", formatted_knowledge)
|
||||
task_prompt += f"\n\nAdditional Information:\n{formatted_knowledge}"
|
||||
|
||||
tools = tools or self.tools or []
|
||||
self.create_agent_executor(tools=tools, task=task)
|
||||
|
||||
|
||||
24
src/crewai/knowledge/source/base_file_knowledge_source.py
Normal file
24
src/crewai/knowledge/source/base_file_knowledge_source.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
|
||||
|
||||
class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
||||
"""Base class for knowledge sources that load content from files."""
|
||||
|
||||
file_path: Path = Field(...)
|
||||
content: str = Field(init=False, default="")
|
||||
|
||||
def model_post_init(self, context):
|
||||
"""Post-initialization method to load content."""
|
||||
self.content = self.load_content()
|
||||
|
||||
def load_content(self) -> str:
|
||||
"""Load and preprocess file content. Should be overridden by subclasses."""
|
||||
if not self.file_path.exists():
|
||||
raise FileNotFoundError(f"File not found: {self.file_path}")
|
||||
if not self.file_path.is_file():
|
||||
raise ValueError(f"Path is not a file: {self.file_path}")
|
||||
return ""
|
||||
@@ -2,22 +2,20 @@ from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
||||
|
||||
|
||||
class BaseKnowledgeSource(ABC):
|
||||
class BaseKnowledgeSource(BaseModel, ABC):
|
||||
"""Abstract base class for knowledge sources."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
chunk_size: int = 1000,
|
||||
chunk_overlap: int = 200,
|
||||
):
|
||||
self.chunk_size = chunk_size
|
||||
self.chunk_overlap = chunk_overlap
|
||||
self.chunks: List[str] = []
|
||||
self.chunk_embeddings: List[np.ndarray] = []
|
||||
chunk_size: int = 1000
|
||||
chunk_overlap: int = 200
|
||||
chunks: List[str] = Field(default_factory=list)
|
||||
chunk_embeddings: List[np.ndarray] = Field(default_factory=list)
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
@abstractmethod
|
||||
def load_content(self):
|
||||
|
||||
@@ -1,22 +1,23 @@
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||
|
||||
|
||||
class PDFKnowledgeSource(BaseKnowledgeSource):
|
||||
class PDFKnowledgeSource(BaseFileKnowledgeSource):
|
||||
"""A knowledge source that stores and queries PDF file content using embeddings."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file_path: str,
|
||||
chunk_size: int = 1000,
|
||||
chunk_overlap: int = 200,
|
||||
):
|
||||
super().__init__(chunk_size, chunk_overlap)
|
||||
self.file_path = Path(file_path)
|
||||
self.content = self.load_content()
|
||||
def load_content(self) -> str:
|
||||
"""Load and preprocess PDF file content."""
|
||||
super().load_content() # Validate the file path
|
||||
pdfplumber = self._import_pdfplumber()
|
||||
text = ""
|
||||
with pdfplumber.open(self.file_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
page_text = page.extract_text()
|
||||
if page_text:
|
||||
text += page_text + "\n"
|
||||
return text
|
||||
|
||||
def _import_pdfplumber(self):
|
||||
"""Dynamically import pdfplumber."""
|
||||
@@ -29,22 +30,6 @@ class PDFKnowledgeSource(BaseKnowledgeSource):
|
||||
"pdfplumber is not installed. Please install it with: pip install pdfplumber"
|
||||
)
|
||||
|
||||
def load_content(self) -> str:
|
||||
"""Load and preprocess PDF file content."""
|
||||
if not self.file_path.exists():
|
||||
raise FileNotFoundError(f"File not found: {self.file_path}")
|
||||
if not self.file_path.is_file():
|
||||
raise ValueError(f"Path is not a file: {self.file_path}")
|
||||
|
||||
pdfplumber = self._import_pdfplumber()
|
||||
text = ""
|
||||
with pdfplumber.open(self.file_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
page_text = page.extract_text()
|
||||
if page_text:
|
||||
text += page_text + "\n"
|
||||
return text
|
||||
|
||||
def add(self, embedder: BaseEmbedder) -> None:
|
||||
"""
|
||||
Add PDF file content to the knowledge source, chunk it, compute embeddings,
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from typing import List
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
|
||||
@@ -7,29 +9,19 @@ from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
class StringKnowledgeSource(BaseKnowledgeSource):
|
||||
"""A knowledge source that stores and queries plain text content using embeddings."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
content: str,
|
||||
chunk_size: int = 1000,
|
||||
chunk_overlap: int = 200,
|
||||
):
|
||||
super().__init__(
|
||||
chunk_size,
|
||||
chunk_overlap,
|
||||
)
|
||||
self.content = content
|
||||
content: str = Field(...)
|
||||
|
||||
def model_post_init(self, context):
|
||||
"""Post-initialization method to validate content."""
|
||||
self.load_content()
|
||||
|
||||
def load_content(self):
|
||||
"""Load and preprocess string content."""
|
||||
"""Validate string content."""
|
||||
if not isinstance(self.content, str):
|
||||
raise ValueError("StringKnowledgeSource only accepts string content")
|
||||
|
||||
def add(self, embedder: BaseEmbedder) -> None:
|
||||
"""
|
||||
Add string content to the knowledge source, chunk it, compute embeddings,
|
||||
and save the embeddings.
|
||||
"""
|
||||
"""Add string content to the knowledge source, chunk it, compute embeddings, and save them."""
|
||||
new_chunks = self._chunk_text(self.content)
|
||||
self.chunks.extend(new_chunks)
|
||||
# Compute embeddings for the new chunks
|
||||
|
||||
@@ -1,30 +1,15 @@
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||
|
||||
|
||||
class TextFileKnowledgeSource(BaseKnowledgeSource):
|
||||
class TextFileKnowledgeSource(BaseFileKnowledgeSource):
|
||||
"""A knowledge source that stores and queries text file content using embeddings."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file_path: str,
|
||||
chunk_size: int = 1000,
|
||||
chunk_overlap: int = 200,
|
||||
):
|
||||
super().__init__(chunk_size, chunk_overlap)
|
||||
self.file_path = Path(file_path)
|
||||
self.content = self.load_content()
|
||||
|
||||
def load_content(self) -> str:
|
||||
"""Load and preprocess text file content."""
|
||||
if not self.file_path.exists():
|
||||
raise FileNotFoundError(f"File not found: {self.file_path}")
|
||||
if not self.file_path.is_file():
|
||||
raise ValueError(f"Path is not a file: {self.file_path}")
|
||||
|
||||
super().load_content() # Validate the file path
|
||||
with self.file_path.open("r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
@@ -10,10 +10,11 @@ from crewai import Agent, Crew, Task
|
||||
from crewai.agents.cache import CacheHandler
|
||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||
from crewai.agents.parser import AgentAction, CrewAgentParser, OutputParserException
|
||||
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
|
||||
from crewai.llm import LLM
|
||||
from crewai.tools import tool
|
||||
from crewai.tools.tool_calling import InstructorToolCalling
|
||||
from crewai.tools.tool_usage import ToolUsage
|
||||
from crewai.tools import tool
|
||||
from crewai.tools.tool_usage_events import ToolUsageFinished
|
||||
from crewai.utilities import RPMController
|
||||
from crewai.utilities.events import Emitter
|
||||
@@ -1574,3 +1575,32 @@ def test_agent_execute_task_with_ollama():
|
||||
result = agent.execute_task(task)
|
||||
assert len(result.split(".")) == 2
|
||||
assert "AI" in result or "artificial intelligence" in result.lower()
|
||||
|
||||
|
||||
# @pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_agent_with_knowledge_sources():
|
||||
# Create a knowledge source with some content
|
||||
content = "Brandon's favorite color is blue and he likes Mexican food."
|
||||
string_source = StringKnowledgeSource(content=content)
|
||||
|
||||
# Create an agent with the knowledge source
|
||||
agent = Agent(
|
||||
role="Information Agent",
|
||||
goal="Provide information based on knowledge sources",
|
||||
backstory="You have access to specific knowledge sources.",
|
||||
llm=LLM(model="gpt-3.5-turbo"),
|
||||
knowledge_sources=[string_source],
|
||||
)
|
||||
|
||||
# Create a task that requires the agent to use the knowledge
|
||||
task = Task(
|
||||
description="What is Brandon's favorite color?",
|
||||
expected_output="Brandon's favorite color.",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Execute the task
|
||||
result = agent.execute_task(task)
|
||||
|
||||
# Assert that the agent provides the correct information
|
||||
assert "blue" in result.lower()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Test Knowledge creation and querying functionality."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from crewai.knowledge.knowledge import Knowledge
|
||||
from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource
|
||||
@@ -141,11 +141,11 @@ def test_multiple_2k_character_strings():
|
||||
def test_single_short_file(tmpdir):
|
||||
# Create a single short text file
|
||||
content = "Brandon's favorite sport is basketball."
|
||||
file_path = tmpdir.join("short_file.txt")
|
||||
file_path = Path(tmpdir.join("short_file.txt"))
|
||||
with open(file_path, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
file_source = TextFileKnowledgeSource(file_path=str(file_path))
|
||||
file_source = TextFileKnowledgeSource(file_path=file_path)
|
||||
knowledge_base = Knowledge(sources=[file_source])
|
||||
|
||||
# Perform a query
|
||||
@@ -180,11 +180,11 @@ def test_single_2k_character_file(tmpdir):
|
||||
"Brandon's favorite sport is basketball, and he often plays with his friends on weekends. "
|
||||
"He is also a fan of the Golden State Warriors and enjoys watching their games. "
|
||||
) * 2 # Repeat to ensure it's 2k characters
|
||||
file_path = tmpdir.join("long_file.txt")
|
||||
file_path = Path(tmpdir.join("long_file.txt"))
|
||||
with open(file_path, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
file_source = TextFileKnowledgeSource(file_path=str(file_path))
|
||||
file_source = TextFileKnowledgeSource(file_path=file_path)
|
||||
knowledge_base = Knowledge(sources=[file_source])
|
||||
|
||||
# Perform a query
|
||||
@@ -204,10 +204,10 @@ def test_multiple_short_files(tmpdir):
|
||||
]
|
||||
file_paths = []
|
||||
for i, content in enumerate(contents):
|
||||
file_path = tmpdir.join(f"file_{i}.txt")
|
||||
file_path = Path(tmpdir.join(f"file_{i}.txt"))
|
||||
with open(file_path, "w") as f:
|
||||
f.write(content)
|
||||
file_paths.append(str(file_path))
|
||||
file_paths.append(file_path)
|
||||
|
||||
file_sources = [TextFileKnowledgeSource(file_path=path) for path in file_paths]
|
||||
knowledge_base = Knowledge(sources=file_sources)
|
||||
@@ -272,10 +272,10 @@ def test_multiple_2k_character_files(tmpdir):
|
||||
]
|
||||
file_paths = []
|
||||
for i, content in enumerate(contents):
|
||||
file_path = tmpdir.join(f"long_file_{i}.txt")
|
||||
file_path = Path(tmpdir.join(f"long_file_{i}.txt"))
|
||||
with open(file_path, "w") as f:
|
||||
f.write(content)
|
||||
file_paths.append(str(file_path))
|
||||
file_paths.append(file_path)
|
||||
|
||||
file_sources = [TextFileKnowledgeSource(file_path=path) for path in file_paths]
|
||||
knowledge_base = Knowledge(sources=file_sources)
|
||||
@@ -307,10 +307,10 @@ def test_hybrid_string_and_files(tmpdir):
|
||||
]
|
||||
file_paths = []
|
||||
for i, content in enumerate(file_contents):
|
||||
file_path = tmpdir.join(f"file_{i}.txt")
|
||||
file_path = Path(tmpdir.join(f"file_{i}.txt"))
|
||||
with open(file_path, "w") as f:
|
||||
f.write(content)
|
||||
file_paths.append(str(file_path))
|
||||
file_paths.append(file_path)
|
||||
|
||||
file_sources = [TextFileKnowledgeSource(file_path=path) for path in file_paths]
|
||||
|
||||
@@ -327,9 +327,9 @@ def test_hybrid_string_and_files(tmpdir):
|
||||
|
||||
def test_pdf_knowledge_source():
|
||||
# Get the directory of the current file
|
||||
current_dir = os.path.dirname(__file__)
|
||||
current_dir = Path(__file__).parent
|
||||
# Construct the path to the PDF file
|
||||
pdf_path = os.path.join(current_dir, "crewai_quickstart.pdf")
|
||||
pdf_path = current_dir / "crewai_quickstart.pdf"
|
||||
|
||||
# Create a PDFKnowledgeSource
|
||||
pdf_source = PDFKnowledgeSource(file_path=pdf_path)
|
||||
|
||||
Reference in New Issue
Block a user