From c1c79b1f8ff4a6f1a377caa5fb75e1e30a56299f Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Tue, 3 Dec 2024 10:57:58 -0800 Subject: [PATCH] made base file knowledge source an abstract class --- .../source/base_file_knowledge_source.py | 16 ++++++++++------ .../knowledge/source/csv_knowledge_source.py | 1 - .../knowledge/source/excel_knowledge_source.py | 1 - .../knowledge/source/json_knowledge_source.py | 1 - .../knowledge/source/pdf_knowledge_source.py | 1 - .../source/text_file_knowledge_source.py | 1 - src/crewai/utilities/constants.py | 1 + 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index ebb3341a8..b29ffe796 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -1,3 +1,4 @@ +from abc import ABC, abstractmethod from pathlib import Path from typing import Union, List, Dict, Any @@ -6,9 +7,10 @@ from pydantic import Field from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource from crewai.utilities.logger import Logger from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage +from crewai.utilities.constants import KNOWLEDGE_DIRECTORY -class BaseFileKnowledgeSource(BaseKnowledgeSource): +class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): """Base class for knowledge sources that load content from files.""" _logger: Logger = Logger(verbose=True) @@ -20,11 +22,16 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource): def model_post_init(self, _): """Post-initialization method to load content.""" + self.validate_paths() self.content = self.load_content() + @abstractmethod def load_content(self) -> Dict[Path, str]: - """Load and preprocess file content. Should be overridden by subclasses. We want to assume that the file path is relative to the project root in the knowledge directory.""" + """Load and preprocess file content. Should be overridden by subclasses. Assume that the file path is relative to the project root in the knowledge directory.""" + pass + def validate_paths(self): + """Validate the paths.""" if isinstance(self.file_path, str): self.file_path = self.convert_to_path(self.file_path) elif isinstance(self.file_path, list): @@ -37,7 +44,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource): if not isinstance(paths, list): raise ValueError("file_path must be a Path or a list of Paths") - # Ensure all paths are Path objects paths = [Path(path) if isinstance(path, str) else path for path in paths] for path in paths: @@ -54,8 +60,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource): f"Path is not a file: {path}", color="red", ) - raise ValueError(f"Path is not a file: {path}") - return {} def save_documents(self, metadata: Dict[str, Any]): """Save the documents to the storage.""" @@ -64,4 +68,4 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource): def convert_to_path(self, path: Union[Path, str]) -> Path: """Convert a path to a Path object.""" - return Path("knowledge/" + path) if isinstance(path, str) else path + return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path diff --git a/src/crewai/knowledge/source/csv_knowledge_source.py b/src/crewai/knowledge/source/csv_knowledge_source.py index 0946104a4..736bba0ed 100644 --- a/src/crewai/knowledge/source/csv_knowledge_source.py +++ b/src/crewai/knowledge/source/csv_knowledge_source.py @@ -10,7 +10,6 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource): def load_content(self) -> Dict[Path, str]: """Load and preprocess CSV file content.""" - super().load_content() # Validate the file path file_path = ( self.file_path[0] if isinstance(self.file_path, list) else self.file_path diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py index 50520b668..c19923c63 100644 --- a/src/crewai/knowledge/source/excel_knowledge_source.py +++ b/src/crewai/knowledge/source/excel_knowledge_source.py @@ -8,7 +8,6 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource): def load_content(self) -> Dict[Path, str]: """Load and preprocess Excel file content.""" - super().load_content() # Validate the file path pd = self._import_dependencies() if isinstance(self.file_path, list): diff --git a/src/crewai/knowledge/source/json_knowledge_source.py b/src/crewai/knowledge/source/json_knowledge_source.py index e899bb338..d7f8dab40 100644 --- a/src/crewai/knowledge/source/json_knowledge_source.py +++ b/src/crewai/knowledge/source/json_knowledge_source.py @@ -10,7 +10,6 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource): def load_content(self) -> Dict[Path, str]: """Load and preprocess JSON file content.""" - super().load_content() # Validate the file path paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path content: Dict[Path, str] = {} diff --git a/src/crewai/knowledge/source/pdf_knowledge_source.py b/src/crewai/knowledge/source/pdf_knowledge_source.py index 623ba30a2..25fc9198f 100644 --- a/src/crewai/knowledge/source/pdf_knowledge_source.py +++ b/src/crewai/knowledge/source/pdf_knowledge_source.py @@ -9,7 +9,6 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource): def load_content(self) -> Dict[Path, str]: """Load and preprocess PDF file content.""" - super().load_content() # Validate the file paths pdfplumber = self._import_pdfplumber() paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path diff --git a/src/crewai/knowledge/source/text_file_knowledge_source.py b/src/crewai/knowledge/source/text_file_knowledge_source.py index 3078ce05b..d760edcba 100644 --- a/src/crewai/knowledge/source/text_file_knowledge_source.py +++ b/src/crewai/knowledge/source/text_file_knowledge_source.py @@ -9,7 +9,6 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource): def load_content(self) -> Dict[Path, str]: """Load and preprocess text file content.""" - super().load_content() paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path content = {} for path in paths: diff --git a/src/crewai/utilities/constants.py b/src/crewai/utilities/constants.py index 59f789913..97fadda48 100644 --- a/src/crewai/utilities/constants.py +++ b/src/crewai/utilities/constants.py @@ -1,3 +1,4 @@ TRAINING_DATA_FILE = "training_data.pkl" TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl" DEFAULT_SCORE_THRESHOLD = 0.35 +KNOWLEDGE_DIRECTORY = "knowledge"