Knowledge project directory standard

This commit is contained in:
Lorenze Jay
2024-12-02 15:53:35 -08:00
parent 3285c1b196
commit 543ebbf3a4
5 changed files with 61 additions and 9 deletions

View File

@@ -39,6 +39,7 @@ def create_folder_structure(name, parent_folder=None):
folder_path.mkdir(parents=True)
(folder_path / "tests").mkdir(exist_ok=True)
(folder_path / "knowledge").mkdir(exist_ok=True)
if not parent_folder:
(folder_path / "src" / folder_name).mkdir(parents=True)
(folder_path / "src" / folder_name / "tools").mkdir(parents=True)
@@ -52,7 +53,14 @@ def copy_template_files(folder_path, name, class_name, parent_folder):
templates_dir = package_dir / "templates" / "crew"
root_template_files = (
[".gitignore", "pyproject.toml", "README.md"] if not parent_folder else []
[
".gitignore",
"pyproject.toml",
"README.md",
"knowledge/user_preference.txt",
]
if not parent_folder
else []
)
tools_template_files = ["tools/custom_tool.py", "tools/__init__.py"]
config_template_files = ["config/agents.yaml", "config/tasks.yaml"]
@@ -168,7 +176,9 @@ def create_crew(name, provider=None, skip_provider=False, parent_folder=None):
templates_dir = package_dir / "templates" / "crew"
root_template_files = (
[".gitignore", "pyproject.toml", "README.md"] if not parent_folder else []
[".gitignore", "pyproject.toml", "README.md", "knowledge/user_preference.txt"]
if not parent_folder
else []
)
tools_template_files = ["tools/custom_tool.py", "tools/__init__.py"]
config_template_files = ["config/agents.yaml", "config/tasks.yaml"]

View File

@@ -1,6 +1,6 @@
from crewai import Agent, Crew, Process, Task
from crewai.project import CrewBase, agent, crew, task, before_kickoff, after_kickoff
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
# Uncomment the following line to use an example of a custom tool
# from {{folder_name}}.tools.custom_tool import MyCustomTool
@@ -57,10 +57,20 @@ class {{crew_name}}():
@crew
def crew(self) -> Crew:
"""Creates the {{crew_name}} crew"""
# You can add knowledge sources here
# knowledge_path = "user_preference.txt"
# sources = [
# TextFileKnowledgeSource(
# file_path="knowledge/user_preference.txt",
# metadata={"preference": "personal"}
# ),
# ]
return Crew(
agents=self.agents, # Automatically created by the @agent decorator
tasks=self.tasks, # Automatically created by the @task decorator
process=Process.sequential,
verbose=True,
# process=Process.hierarchical, # In case you wanna use that instead https://docs.crewai.com/how-to/Hierarchical/
# knowledge_sources=sources, # In the case you want to add knowledge sources``
)

View File

@@ -0,0 +1,4 @@
User name is John Doe.
User is an AI Engineer.
User is interested in AI Agents.
User is based in San Francisco, California.

View File

@@ -1,17 +1,20 @@
from pathlib import Path
from typing import Union, List
from typing import Union, List, Dict, Any
from pydantic import Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from typing import Dict, Any
from crewai.utilities.logger import Logger
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
class BaseFileKnowledgeSource(BaseKnowledgeSource):
"""Base class for knowledge sources that load content from files."""
file_path: Union[Path, List[Path]] = Field(...)
_logger: Logger = Logger(verbose=True)
file_path: Union[Path, List[Path], str, List[str]] = Field(
..., description="The path to the file"
)
content: Dict[Path, str] = Field(init=False, default_factory=dict)
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
@@ -20,13 +23,34 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
self.content = self.load_content()
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess file content. Should be overridden by subclasses."""
"""Load and preprocess file content. Should be overridden by subclasses. We want to assume that the file path is relative to the project root in the knowledge directory."""
if isinstance(self.file_path, str):
self.file_path = self.convert_to_path(self.file_path)
elif isinstance(self.file_path, list):
processed_paths = []
for path in self.file_path:
processed_paths.append(self.convert_to_path(path))
self.file_path = processed_paths
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
if not isinstance(paths, list):
raise ValueError("file_path must be a Path or a list of Paths")
for path in paths:
if not path.exists():
self._logger.log(
"error",
f"File not found: {path}. Try adding sources to the knowledge directory.",
color="red",
)
raise FileNotFoundError(f"File not found: {path}")
if not path.is_file():
self._logger.log(
"error",
f"Path is not a file: {path}",
color="red",
)
raise ValueError(f"Path is not a file: {path}")
return {}
@@ -34,3 +58,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
"""Save the documents to the storage."""
chunk_metadatas = [metadata.copy() for _ in self.chunks]
self.storage.save(self.chunks, chunk_metadatas)
def convert_to_path(self, path: Union[Path, str]) -> Path:
"""Convert a path to a Path object."""
return Path("knowledge/" + path) if isinstance(path, str) else path

View File

@@ -13,8 +13,8 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
content = {}
for path in paths:
with path.open("r", encoding="utf-8") as f:
content[path] = f.read() # type: ignore
with open(path, "r", encoding="utf-8") as f:
content[path] = f.read()
return content
def add(self) -> None: