made base file knowledge source an abstract class

This commit is contained in:
Lorenze Jay
2024-12-03 10:57:58 -08:00
parent 8d7b0de732
commit c1c79b1f8f
7 changed files with 11 additions and 11 deletions

View File

@@ -1,3 +1,4 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Union, List, Dict, Any
@@ -6,9 +7,10 @@ from pydantic import Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.utilities.logger import Logger
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
class BaseFileKnowledgeSource(BaseKnowledgeSource):
class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
"""Base class for knowledge sources that load content from files."""
_logger: Logger = Logger(verbose=True)
@@ -20,11 +22,16 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
def model_post_init(self, _):
"""Post-initialization method to load content."""
self.validate_paths()
self.content = self.load_content()
@abstractmethod
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess file content. Should be overridden by subclasses. We want to assume that the file path is relative to the project root in the knowledge directory."""
"""Load and preprocess file content. Should be overridden by subclasses. Assume that the file path is relative to the project root in the knowledge directory."""
pass
def validate_paths(self):
"""Validate the paths."""
if isinstance(self.file_path, str):
self.file_path = self.convert_to_path(self.file_path)
elif isinstance(self.file_path, list):
@@ -37,7 +44,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
if not isinstance(paths, list):
raise ValueError("file_path must be a Path or a list of Paths")
# Ensure all paths are Path objects
paths = [Path(path) if isinstance(path, str) else path for path in paths]
for path in paths:
@@ -54,8 +60,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
f"Path is not a file: {path}",
color="red",
)
raise ValueError(f"Path is not a file: {path}")
return {}
def save_documents(self, metadata: Dict[str, Any]):
"""Save the documents to the storage."""
@@ -64,4 +68,4 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
def convert_to_path(self, path: Union[Path, str]) -> Path:
"""Convert a path to a Path object."""
return Path("knowledge/" + path) if isinstance(path, str) else path
return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path

View File

@@ -10,7 +10,6 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess CSV file content."""
super().load_content() # Validate the file path
file_path = (
self.file_path[0] if isinstance(self.file_path, list) else self.file_path

View File

@@ -8,7 +8,6 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess Excel file content."""
super().load_content() # Validate the file path
pd = self._import_dependencies()
if isinstance(self.file_path, list):

View File

@@ -10,7 +10,6 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess JSON file content."""
super().load_content() # Validate the file path
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
content: Dict[Path, str] = {}

View File

@@ -9,7 +9,6 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess PDF file content."""
super().load_content() # Validate the file paths
pdfplumber = self._import_pdfplumber()
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path

View File

@@ -9,7 +9,6 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess text file content."""
super().load_content()
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
content = {}
for path in paths:

View File

@@ -1,3 +1,4 @@
TRAINING_DATA_FILE = "training_data.pkl"
TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl"
DEFAULT_SCORE_THRESHOLD = 0.35
KNOWLEDGE_DIRECTORY = "knowledge"