mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
made base file knowledge source an abstract class
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Union, List, Dict, Any
|
||||
|
||||
@@ -6,9 +7,10 @@ from pydantic import Field
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.utilities.logger import Logger
|
||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
|
||||
|
||||
|
||||
class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
||||
class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
|
||||
"""Base class for knowledge sources that load content from files."""
|
||||
|
||||
_logger: Logger = Logger(verbose=True)
|
||||
@@ -20,11 +22,16 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
||||
|
||||
def model_post_init(self, _):
|
||||
"""Post-initialization method to load content."""
|
||||
self.validate_paths()
|
||||
self.content = self.load_content()
|
||||
|
||||
@abstractmethod
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess file content. Should be overridden by subclasses. We want to assume that the file path is relative to the project root in the knowledge directory."""
|
||||
"""Load and preprocess file content. Should be overridden by subclasses. Assume that the file path is relative to the project root in the knowledge directory."""
|
||||
pass
|
||||
|
||||
def validate_paths(self):
|
||||
"""Validate the paths."""
|
||||
if isinstance(self.file_path, str):
|
||||
self.file_path = self.convert_to_path(self.file_path)
|
||||
elif isinstance(self.file_path, list):
|
||||
@@ -37,7 +44,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
||||
if not isinstance(paths, list):
|
||||
raise ValueError("file_path must be a Path or a list of Paths")
|
||||
|
||||
# Ensure all paths are Path objects
|
||||
paths = [Path(path) if isinstance(path, str) else path for path in paths]
|
||||
|
||||
for path in paths:
|
||||
@@ -54,8 +60,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
||||
f"Path is not a file: {path}",
|
||||
color="red",
|
||||
)
|
||||
raise ValueError(f"Path is not a file: {path}")
|
||||
return {}
|
||||
|
||||
def save_documents(self, metadata: Dict[str, Any]):
|
||||
"""Save the documents to the storage."""
|
||||
@@ -64,4 +68,4 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
||||
|
||||
def convert_to_path(self, path: Union[Path, str]) -> Path:
|
||||
"""Convert a path to a Path object."""
|
||||
return Path("knowledge/" + path) if isinstance(path, str) else path
|
||||
return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
|
||||
|
||||
@@ -10,7 +10,6 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess CSV file content."""
|
||||
super().load_content() # Validate the file path
|
||||
|
||||
file_path = (
|
||||
self.file_path[0] if isinstance(self.file_path, list) else self.file_path
|
||||
|
||||
@@ -8,7 +8,6 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess Excel file content."""
|
||||
super().load_content() # Validate the file path
|
||||
pd = self._import_dependencies()
|
||||
|
||||
if isinstance(self.file_path, list):
|
||||
|
||||
@@ -10,7 +10,6 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess JSON file content."""
|
||||
super().load_content() # Validate the file path
|
||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||
|
||||
content: Dict[Path, str] = {}
|
||||
|
||||
@@ -9,7 +9,6 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess PDF file content."""
|
||||
super().load_content() # Validate the file paths
|
||||
pdfplumber = self._import_pdfplumber()
|
||||
|
||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||
|
||||
@@ -9,7 +9,6 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess text file content."""
|
||||
super().load_content()
|
||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||
content = {}
|
||||
for path in paths:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
TRAINING_DATA_FILE = "training_data.pkl"
|
||||
TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl"
|
||||
DEFAULT_SCORE_THRESHOLD = 0.35
|
||||
KNOWLEDGE_DIRECTORY = "knowledge"
|
||||
|
||||
Reference in New Issue
Block a user