mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-29 18:18:13 +00:00
made base file knowledge source an abstract class
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union, List, Dict, Any
|
from typing import Union, List, Dict, Any
|
||||||
|
|
||||||
@@ -6,9 +7,10 @@ from pydantic import Field
|
|||||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||||
from crewai.utilities.logger import Logger
|
from crewai.utilities.logger import Logger
|
||||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||||
|
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
|
||||||
|
|
||||||
|
|
||||||
class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
|
||||||
"""Base class for knowledge sources that load content from files."""
|
"""Base class for knowledge sources that load content from files."""
|
||||||
|
|
||||||
_logger: Logger = Logger(verbose=True)
|
_logger: Logger = Logger(verbose=True)
|
||||||
@@ -20,11 +22,16 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
|||||||
|
|
||||||
def model_post_init(self, _):
|
def model_post_init(self, _):
|
||||||
"""Post-initialization method to load content."""
|
"""Post-initialization method to load content."""
|
||||||
|
self.validate_paths()
|
||||||
self.content = self.load_content()
|
self.content = self.load_content()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def load_content(self) -> Dict[Path, str]:
|
def load_content(self) -> Dict[Path, str]:
|
||||||
"""Load and preprocess file content. Should be overridden by subclasses. We want to assume that the file path is relative to the project root in the knowledge directory."""
|
"""Load and preprocess file content. Should be overridden by subclasses. Assume that the file path is relative to the project root in the knowledge directory."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def validate_paths(self):
|
||||||
|
"""Validate the paths."""
|
||||||
if isinstance(self.file_path, str):
|
if isinstance(self.file_path, str):
|
||||||
self.file_path = self.convert_to_path(self.file_path)
|
self.file_path = self.convert_to_path(self.file_path)
|
||||||
elif isinstance(self.file_path, list):
|
elif isinstance(self.file_path, list):
|
||||||
@@ -37,7 +44,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
|||||||
if not isinstance(paths, list):
|
if not isinstance(paths, list):
|
||||||
raise ValueError("file_path must be a Path or a list of Paths")
|
raise ValueError("file_path must be a Path or a list of Paths")
|
||||||
|
|
||||||
# Ensure all paths are Path objects
|
|
||||||
paths = [Path(path) if isinstance(path, str) else path for path in paths]
|
paths = [Path(path) if isinstance(path, str) else path for path in paths]
|
||||||
|
|
||||||
for path in paths:
|
for path in paths:
|
||||||
@@ -54,8 +60,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
|||||||
f"Path is not a file: {path}",
|
f"Path is not a file: {path}",
|
||||||
color="red",
|
color="red",
|
||||||
)
|
)
|
||||||
raise ValueError(f"Path is not a file: {path}")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def save_documents(self, metadata: Dict[str, Any]):
|
def save_documents(self, metadata: Dict[str, Any]):
|
||||||
"""Save the documents to the storage."""
|
"""Save the documents to the storage."""
|
||||||
@@ -64,4 +68,4 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource):
|
|||||||
|
|
||||||
def convert_to_path(self, path: Union[Path, str]) -> Path:
|
def convert_to_path(self, path: Union[Path, str]) -> Path:
|
||||||
"""Convert a path to a Path object."""
|
"""Convert a path to a Path object."""
|
||||||
return Path("knowledge/" + path) if isinstance(path, str) else path
|
return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
|
|
||||||
def load_content(self) -> Dict[Path, str]:
|
def load_content(self) -> Dict[Path, str]:
|
||||||
"""Load and preprocess CSV file content."""
|
"""Load and preprocess CSV file content."""
|
||||||
super().load_content() # Validate the file path
|
|
||||||
|
|
||||||
file_path = (
|
file_path = (
|
||||||
self.file_path[0] if isinstance(self.file_path, list) else self.file_path
|
self.file_path[0] if isinstance(self.file_path, list) else self.file_path
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
|
|
||||||
def load_content(self) -> Dict[Path, str]:
|
def load_content(self) -> Dict[Path, str]:
|
||||||
"""Load and preprocess Excel file content."""
|
"""Load and preprocess Excel file content."""
|
||||||
super().load_content() # Validate the file path
|
|
||||||
pd = self._import_dependencies()
|
pd = self._import_dependencies()
|
||||||
|
|
||||||
if isinstance(self.file_path, list):
|
if isinstance(self.file_path, list):
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
|
|
||||||
def load_content(self) -> Dict[Path, str]:
|
def load_content(self) -> Dict[Path, str]:
|
||||||
"""Load and preprocess JSON file content."""
|
"""Load and preprocess JSON file content."""
|
||||||
super().load_content() # Validate the file path
|
|
||||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||||
|
|
||||||
content: Dict[Path, str] = {}
|
content: Dict[Path, str] = {}
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
|
|
||||||
def load_content(self) -> Dict[Path, str]:
|
def load_content(self) -> Dict[Path, str]:
|
||||||
"""Load and preprocess PDF file content."""
|
"""Load and preprocess PDF file content."""
|
||||||
super().load_content() # Validate the file paths
|
|
||||||
pdfplumber = self._import_pdfplumber()
|
pdfplumber = self._import_pdfplumber()
|
||||||
|
|
||||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
|
|
||||||
def load_content(self) -> Dict[Path, str]:
|
def load_content(self) -> Dict[Path, str]:
|
||||||
"""Load and preprocess text file content."""
|
"""Load and preprocess text file content."""
|
||||||
super().load_content()
|
|
||||||
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
|
||||||
content = {}
|
content = {}
|
||||||
for path in paths:
|
for path in paths:
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
TRAINING_DATA_FILE = "training_data.pkl"
|
TRAINING_DATA_FILE = "training_data.pkl"
|
||||||
TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl"
|
TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl"
|
||||||
DEFAULT_SCORE_THRESHOLD = 0.35
|
DEFAULT_SCORE_THRESHOLD = 0.35
|
||||||
|
KNOWLEDGE_DIRECTORY = "knowledge"
|
||||||
|
|||||||
Reference in New Issue
Block a user