mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-24 00:08:29 +00:00
Compare commits
9 Commits
bugfix/add
...
feat/organ
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3bb59c1458 | ||
|
|
8b1aef9e2d | ||
|
|
7e93285df1 | ||
|
|
45d6f64f3e | ||
|
|
c1c79b1f8f | ||
|
|
8d7b0de732 | ||
|
|
9703d701b8 | ||
|
|
aab05388d5 | ||
|
|
543ebbf3a4 |
@@ -156,14 +156,35 @@ crew = Crew(
|
||||
agents=[agent],
|
||||
tasks=[task],
|
||||
knowledge_sources=[source],
|
||||
embedder_config={
|
||||
"model": "BAAI/bge-small-en-v1.5",
|
||||
"normalize": True,
|
||||
"max_length": 512
|
||||
embedder={
|
||||
"provider": "ollama",
|
||||
"config": {"model": "nomic-embed-text:latest"},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
### Referencing Sources
|
||||
|
||||
You can reference knowledge sources by their collection name or metadata.
|
||||
|
||||
* Add a directory to your crew project called `knowledge`:
|
||||
* File paths in knowledge can be referenced relative to the `knowledge` directory.
|
||||
|
||||
Example:
|
||||
A file inside the `knowledge` directory called `example.txt` can be referenced as `example.txt`.
|
||||
|
||||
```python
|
||||
source = TextFileKnowledgeSource(
|
||||
file_path="example.txt", # or /example.txt
|
||||
collection_name="example"
|
||||
)
|
||||
crew = Crew(
|
||||
agents=[agent],
|
||||
tasks=[task],
|
||||
knowledge_sources=[source],
|
||||
)
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
<AccordionGroup>
|
||||
|
||||
@@ -39,6 +39,7 @@ def create_folder_structure(name, parent_folder=None):
|
||||
|
||||
folder_path.mkdir(parents=True)
|
||||
(folder_path / "tests").mkdir(exist_ok=True)
|
||||
(folder_path / "knowledge").mkdir(exist_ok=True)
|
||||
if not parent_folder:
|
||||
(folder_path / "src" / folder_name).mkdir(parents=True)
|
||||
(folder_path / "src" / folder_name / "tools").mkdir(parents=True)
|
||||
@@ -52,7 +53,14 @@ def copy_template_files(folder_path, name, class_name, parent_folder):
|
||||
templates_dir = package_dir / "templates" / "crew"
|
||||
|
||||
root_template_files = (
|
||||
[".gitignore", "pyproject.toml", "README.md"] if not parent_folder else []
|
||||
[
|
||||
".gitignore",
|
||||
"pyproject.toml",
|
||||
"README.md",
|
||||
"knowledge/user_preference.txt",
|
||||
]
|
||||
if not parent_folder
|
||||
else []
|
||||
)
|
||||
tools_template_files = ["tools/custom_tool.py", "tools/__init__.py"]
|
||||
config_template_files = ["config/agents.yaml", "config/tasks.yaml"]
|
||||
@@ -168,7 +176,9 @@ def create_crew(name, provider=None, skip_provider=False, parent_folder=None):
|
||||
templates_dir = package_dir / "templates" / "crew"
|
||||
|
||||
root_template_files = (
|
||||
[".gitignore", "pyproject.toml", "README.md"] if not parent_folder else []
|
||||
[".gitignore", "pyproject.toml", "README.md", "knowledge/user_preference.txt"]
|
||||
if not parent_folder
|
||||
else []
|
||||
)
|
||||
tools_template_files = ["tools/custom_tool.py", "tools/__init__.py"]
|
||||
config_template_files = ["config/agents.yaml", "config/tasks.yaml"]
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
from crewai import Agent, Crew, Process, Task
|
||||
from crewai.project import CrewBase, agent, crew, task, before_kickoff, after_kickoff
|
||||
|
||||
# Uncomment the following line to use an example of a custom tool
|
||||
# from {{folder_name}}.tools.custom_tool import MyCustomTool
|
||||
# Uncomment the following line to use an example of a knowledge source
|
||||
# from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
|
||||
|
||||
# Check our tools documentations for more information on how to use them
|
||||
# from crewai_tools import SerperDevTool
|
||||
@@ -57,10 +58,20 @@ class {{crew_name}}():
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
"""Creates the {{crew_name}} crew"""
|
||||
# You can add knowledge sources here
|
||||
# knowledge_path = "user_preference.txt"
|
||||
# sources = [
|
||||
# TextFileKnowledgeSource(
|
||||
# file_path="knowledge/user_preference.txt",
|
||||
# metadata={"preference": "personal"}
|
||||
# ),
|
||||
# ]
|
||||
|
||||
return Crew(
|
||||
agents=self.agents, # Automatically created by the @agent decorator
|
||||
tasks=self.tasks, # Automatically created by the @task decorator
|
||||
process=Process.sequential,
|
||||
verbose=True,
|
||||
# process=Process.hierarchical, # In case you wanna use that instead https://docs.crewai.com/how-to/Hierarchical/
|
||||
# knowledge_sources=sources, # In the case you want to add knowledge sources
|
||||
)
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
User name is John Doe.
|
||||
User is an AI Engineer.
|
||||
User is interested in AI Agents.
|
||||
User is based in San Francisco, California.
|
||||
@@ -1,36 +1,72 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Union, List
|
||||
from typing import Union, List, Dict, Any
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from typing import Dict, Any
|
||||
from crewai.utilities.logger import Logger
|
||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
|
||||
|
||||
|
||||
class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
||||
class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
|
||||
"""Base class for knowledge sources that load content from files."""
|
||||
|
||||
file_path: Union[Path, List[Path]] = Field(...)
|
||||
_logger: Logger = Logger(verbose=True)
|
||||
file_path: Union[Path, List[Path], str, List[str]] = Field(
|
||||
..., description="The path to the file"
|
||||
)
|
||||
content: Dict[Path, str] = Field(init=False, default_factory=dict)
|
||||
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
|
||||
safe_file_paths: List[Path] = Field(default_factory=list)
|
||||
|
||||
def model_post_init(self, _):
|
||||
"""Post-initialization method to load content."""
|
||||
self.safe_file_paths = self._process_file_paths()
|
||||
self.validate_paths()
|
||||
self.content = self.load_content()
|
||||
|
||||
@abstractmethod
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess file content. Should be overridden by subclasses."""
|
||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||
"""Load and preprocess file content. Should be overridden by subclasses. Assume that the file path is relative to the project root in the knowledge directory."""
|
||||
pass
|
||||
|
||||
for path in paths:
|
||||
def validate_paths(self):
|
||||
"""Validate the paths."""
|
||||
for path in self.safe_file_paths:
|
||||
if not path.exists():
|
||||
self._logger.log(
|
||||
"error",
|
||||
f"File not found: {path}. Try adding sources to the knowledge directory. If its inside the knowledge directory, use the relative path.",
|
||||
color="red",
|
||||
)
|
||||
raise FileNotFoundError(f"File not found: {path}")
|
||||
if not path.is_file():
|
||||
raise ValueError(f"Path is not a file: {path}")
|
||||
return {}
|
||||
self._logger.log(
|
||||
"error",
|
||||
f"Path is not a file: {path}",
|
||||
color="red",
|
||||
)
|
||||
|
||||
def save_documents(self, metadata: Dict[str, Any]):
|
||||
"""Save the documents to the storage."""
|
||||
chunk_metadatas = [metadata.copy() for _ in self.chunks]
|
||||
self.storage.save(self.chunks, chunk_metadatas)
|
||||
|
||||
def convert_to_path(self, path: Union[Path, str]) -> Path:
|
||||
"""Convert a path to a Path object."""
|
||||
return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
|
||||
|
||||
def _process_file_paths(self) -> List[Path]:
|
||||
"""Convert file_path to a list of Path objects."""
|
||||
paths = (
|
||||
[self.file_path]
|
||||
if isinstance(self.file_path, (str, Path))
|
||||
else self.file_path
|
||||
)
|
||||
|
||||
if not isinstance(paths, list):
|
||||
raise ValueError("file_path must be a Path, str, or a list of these types")
|
||||
|
||||
return [self.convert_to_path(path) for path in paths]
|
||||
|
||||
@@ -10,19 +10,15 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess CSV file content."""
|
||||
super().load_content() # Validate the file path
|
||||
|
||||
file_path = (
|
||||
self.file_path[0] if isinstance(self.file_path, list) else self.file_path
|
||||
)
|
||||
file_path = Path(file_path) if isinstance(file_path, str) else file_path
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as csvfile:
|
||||
reader = csv.reader(csvfile)
|
||||
content = ""
|
||||
for row in reader:
|
||||
content += " ".join(row) + "\n"
|
||||
return {file_path: content}
|
||||
content_dict = {}
|
||||
for file_path in self.safe_file_paths:
|
||||
with open(file_path, "r", encoding="utf-8") as csvfile:
|
||||
reader = csv.reader(csvfile)
|
||||
content = ""
|
||||
for row in reader:
|
||||
content += " ".join(row) + "\n"
|
||||
content_dict[file_path] = content
|
||||
return content_dict
|
||||
|
||||
def add(self) -> None:
|
||||
"""
|
||||
|
||||
@@ -8,17 +8,15 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess Excel file content."""
|
||||
super().load_content() # Validate the file path
|
||||
pd = self._import_dependencies()
|
||||
|
||||
if isinstance(self.file_path, list):
|
||||
file_path = self.file_path[0]
|
||||
else:
|
||||
file_path = self.file_path
|
||||
|
||||
df = pd.read_excel(file_path)
|
||||
content = df.to_csv(index=False)
|
||||
return {file_path: content}
|
||||
content_dict = {}
|
||||
for file_path in self.safe_file_paths:
|
||||
file_path = self.convert_to_path(file_path)
|
||||
df = pd.read_excel(file_path)
|
||||
content = df.to_csv(index=False)
|
||||
content_dict[file_path] = content
|
||||
return content_dict
|
||||
|
||||
def _import_dependencies(self):
|
||||
"""Dynamically import dependencies."""
|
||||
|
||||
@@ -10,11 +10,9 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess JSON file content."""
|
||||
super().load_content() # Validate the file path
|
||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||
|
||||
content: Dict[Path, str] = {}
|
||||
for path in paths:
|
||||
for path in self.safe_file_paths:
|
||||
path = self.convert_to_path(path)
|
||||
with open(path, "r", encoding="utf-8") as json_file:
|
||||
data = json.load(json_file)
|
||||
content[path] = self._json_to_text(data)
|
||||
|
||||
@@ -9,14 +9,13 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess PDF file content."""
|
||||
super().load_content() # Validate the file paths
|
||||
pdfplumber = self._import_pdfplumber()
|
||||
|
||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||
content = {}
|
||||
|
||||
for path in paths:
|
||||
for path in self.safe_file_paths:
|
||||
text = ""
|
||||
path = self.convert_to_path(path)
|
||||
with pdfplumber.open(path) as pdf:
|
||||
for page in pdf.pages:
|
||||
page_text = page.extract_text()
|
||||
|
||||
@@ -9,12 +9,11 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess text file content."""
|
||||
super().load_content()
|
||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||
content = {}
|
||||
for path in paths:
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
content[path] = f.read() # type: ignore
|
||||
for path in self.safe_file_paths:
|
||||
path = self.convert_to_path(path)
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
content[path] = f.read()
|
||||
return content
|
||||
|
||||
def add(self) -> None:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
TRAINING_DATA_FILE = "training_data.pkl"
|
||||
TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl"
|
||||
DEFAULT_SCORE_THRESHOLD = 0.35
|
||||
KNOWLEDGE_DIRECTORY = "knowledge"
|
||||
|
||||
Reference in New Issue
Block a user