diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py index 805d9baa5..0bd2532c3 100644 --- a/src/crewai/knowledge/source/excel_knowledge_source.py +++ b/src/crewai/knowledge/source/excel_knowledge_source.py @@ -7,6 +7,29 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge class ExcelKnowledgeSource(BaseFileKnowledgeSource): """A knowledge source that stores and queries Excel file content using embeddings.""" + def load_content(self) -> Dict[Path, Dict[str, str]]: + """Load and preprocess Excel file content from multiple sheets. + + Each sheet's content is converted to CSV format and stored. + + Returns: + Dict[Path, Dict[str, str]]: A mapping of file paths to their respective sheet contents. + + Raises: + ImportError: If required dependencies are missing. + FileNotFoundError: If the specified Excel file cannot be opened. + """ + pd = self._import_dependencies() + content_dict = {} + for file_path in self.safe_file_paths: + with pd.ExcelFile(file_path) as xl: + sheet_dict = { + sheet_name: pd.read_excel(xl, sheet_name).to_csv(index=False) + for sheet_name in xl.sheet_names + } + content_dict[file_path] = sheet_dict + return content_dict + def load_content(self) -> Dict[Path, str]: """Load and preprocess Excel file content. Updated to account for .xlsx workbooks with multiple tabs/sheets""" pd, openpyxl, load_workbook = self._import_dependencies() @@ -33,7 +56,6 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource): sheet_str += str(cell) + "," sheet_str += "\n" - print(sheet_str) # Add the sheet content to the file sheet dictionary sheet_dict[sheet_name] = sheet_str # Add the file sheet dictionary to the content dictionary @@ -44,11 +66,11 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource): def _import_dependencies(self): """Dynamically import dependencies.""" try: - import openpyxl # noqa - from openpyxl import load_workbook + # import openpyxl # noqa + # from openpyxl import load_workbook import pandas as pd - return pd, openpyxl, load_workbook + return pd except ImportError as e: missing_package = str(e).split()[-1] raise ImportError(