From 543ebbf3a4b951a816d44f6924acaaee59f09f79 Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Mon, 2 Dec 2024 15:53:35 -0800 Subject: [PATCH] Knowledge project directory standard --- src/crewai/cli/create_crew.py | 14 ++++++-- src/crewai/cli/templates/crew/crew.py | 12 ++++++- .../crew/knowledge/user_preference.txt | 4 +++ .../source/base_file_knowledge_source.py | 36 ++++++++++++++++--- .../source/text_file_knowledge_source.py | 4 +-- 5 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 src/crewai/cli/templates/crew/knowledge/user_preference.txt diff --git a/src/crewai/cli/create_crew.py b/src/crewai/cli/create_crew.py index 06440d74e..c658b0de1 100644 --- a/src/crewai/cli/create_crew.py +++ b/src/crewai/cli/create_crew.py @@ -39,6 +39,7 @@ def create_folder_structure(name, parent_folder=None): folder_path.mkdir(parents=True) (folder_path / "tests").mkdir(exist_ok=True) + (folder_path / "knowledge").mkdir(exist_ok=True) if not parent_folder: (folder_path / "src" / folder_name).mkdir(parents=True) (folder_path / "src" / folder_name / "tools").mkdir(parents=True) @@ -52,7 +53,14 @@ def copy_template_files(folder_path, name, class_name, parent_folder): templates_dir = package_dir / "templates" / "crew" root_template_files = ( - [".gitignore", "pyproject.toml", "README.md"] if not parent_folder else [] + [ + ".gitignore", + "pyproject.toml", + "README.md", + "knowledge/user_preference.txt", + ] + if not parent_folder + else [] ) tools_template_files = ["tools/custom_tool.py", "tools/__init__.py"] config_template_files = ["config/agents.yaml", "config/tasks.yaml"] @@ -168,7 +176,9 @@ def create_crew(name, provider=None, skip_provider=False, parent_folder=None): templates_dir = package_dir / "templates" / "crew" root_template_files = ( - [".gitignore", "pyproject.toml", "README.md"] if not parent_folder else [] + [".gitignore", "pyproject.toml", "README.md", "knowledge/user_preference.txt"] + if not parent_folder + else [] ) tools_template_files = ["tools/custom_tool.py", "tools/__init__.py"] config_template_files = ["config/agents.yaml", "config/tasks.yaml"] diff --git a/src/crewai/cli/templates/crew/crew.py b/src/crewai/cli/templates/crew/crew.py index 6f8e66c4a..502e4e57a 100644 --- a/src/crewai/cli/templates/crew/crew.py +++ b/src/crewai/cli/templates/crew/crew.py @@ -1,6 +1,6 @@ from crewai import Agent, Crew, Process, Task from crewai.project import CrewBase, agent, crew, task, before_kickoff, after_kickoff - +from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource # Uncomment the following line to use an example of a custom tool # from {{folder_name}}.tools.custom_tool import MyCustomTool @@ -57,10 +57,20 @@ class {{crew_name}}(): @crew def crew(self) -> Crew: """Creates the {{crew_name}} crew""" + # You can add knowledge sources here + # knowledge_path = "user_preference.txt" + # sources = [ + # TextFileKnowledgeSource( + # file_path="knowledge/user_preference.txt", + # metadata={"preference": "personal"} + # ), + # ] + return Crew( agents=self.agents, # Automatically created by the @agent decorator tasks=self.tasks, # Automatically created by the @task decorator process=Process.sequential, verbose=True, # process=Process.hierarchical, # In case you wanna use that instead https://docs.crewai.com/how-to/Hierarchical/ + # knowledge_sources=sources, # In the case you want to add knowledge sources`` ) diff --git a/src/crewai/cli/templates/crew/knowledge/user_preference.txt b/src/crewai/cli/templates/crew/knowledge/user_preference.txt new file mode 100644 index 000000000..dd63a17bf --- /dev/null +++ b/src/crewai/cli/templates/crew/knowledge/user_preference.txt @@ -0,0 +1,4 @@ +User name is John Doe. +User is an AI Engineer. +User is interested in AI Agents. +User is based in San Francisco, California. diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index b6e346534..ad160683d 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -1,17 +1,20 @@ from pathlib import Path -from typing import Union, List +from typing import Union, List, Dict, Any from pydantic import Field from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource -from typing import Dict, Any +from crewai.utilities.logger import Logger from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage class BaseFileKnowledgeSource(BaseKnowledgeSource): """Base class for knowledge sources that load content from files.""" - file_path: Union[Path, List[Path]] = Field(...) + _logger: Logger = Logger(verbose=True) + file_path: Union[Path, List[Path], str, List[str]] = Field( + ..., description="The path to the file" + ) content: Dict[Path, str] = Field(init=False, default_factory=dict) storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage) @@ -20,13 +23,34 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource): self.content = self.load_content() def load_content(self) -> Dict[Path, str]: - """Load and preprocess file content. Should be overridden by subclasses.""" + """Load and preprocess file content. Should be overridden by subclasses. We want to assume that the file path is relative to the project root in the knowledge directory.""" + + if isinstance(self.file_path, str): + self.file_path = self.convert_to_path(self.file_path) + elif isinstance(self.file_path, list): + processed_paths = [] + for path in self.file_path: + processed_paths.append(self.convert_to_path(path)) + self.file_path = processed_paths + paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path + if not isinstance(paths, list): + raise ValueError("file_path must be a Path or a list of Paths") for path in paths: if not path.exists(): + self._logger.log( + "error", + f"File not found: {path}. Try adding sources to the knowledge directory.", + color="red", + ) raise FileNotFoundError(f"File not found: {path}") if not path.is_file(): + self._logger.log( + "error", + f"Path is not a file: {path}", + color="red", + ) raise ValueError(f"Path is not a file: {path}") return {} @@ -34,3 +58,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource): """Save the documents to the storage.""" chunk_metadatas = [metadata.copy() for _ in self.chunks] self.storage.save(self.chunks, chunk_metadatas) + + def convert_to_path(self, path: Union[Path, str]) -> Path: + """Convert a path to a Path object.""" + return Path("knowledge/" + path) if isinstance(path, str) else path diff --git a/src/crewai/knowledge/source/text_file_knowledge_source.py b/src/crewai/knowledge/source/text_file_knowledge_source.py index 640db4ef9..cbb1aedf5 100644 --- a/src/crewai/knowledge/source/text_file_knowledge_source.py +++ b/src/crewai/knowledge/source/text_file_knowledge_source.py @@ -13,8 +13,8 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource): paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path content = {} for path in paths: - with path.open("r", encoding="utf-8") as f: - content[path] = f.read() # type: ignore + with open(path, "r", encoding="utf-8") as f: + content[path] = f.read() return content def add(self) -> None: