updated load_content() function in excel_knowledge_source.py to reduce memory usage and provide better documentation

2026-05-06 01:32:36 +00:00 · 2025-01-18 18:18:00 -05:00
parent bda9e34c57
commit 1002af8a31
1 changed files with 26 additions and 4 deletions
--- a/src/crewai/knowledge/source/excel_knowledge_source.py
+++ b/src/crewai/knowledge/source/excel_knowledge_source.py
@@ -7,6 +7,29 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
 class ExcelKnowledgeSource(BaseFileKnowledgeSource):
    """A knowledge source that stores and queries Excel file content using embeddings."""
    def load_content(self) -> Dict[Path, Dict[str, str]]:
        """Load and preprocess Excel file content from multiple sheets.
        Each sheet's content is converted to CSV format and stored.
        Returns:
            Dict[Path, Dict[str, str]]: A mapping of file paths to their respective sheet contents.
        Raises:
            ImportError: If required dependencies are missing.
            FileNotFoundError: If the specified Excel file cannot be opened.
        """
        pd = self._import_dependencies()
        content_dict = {}
        for file_path in self.safe_file_paths:
            with pd.ExcelFile(file_path) as xl:
                sheet_dict = {
                    sheet_name: pd.read_excel(xl, sheet_name).to_csv(index=False)
                    for sheet_name in xl.sheet_names
                }
            content_dict[file_path] = sheet_dict
        return content_dict
    def load_content(self) -> Dict[Path, str]:
        """Load and preprocess Excel file content. Updated to account for .xlsx workbooks with multiple tabs/sheets"""
        pd, openpyxl, load_workbook = self._import_dependencies()
@@ -33,7 +56,6 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
                        sheet_str += str(cell) + ","
                    sheet_str += "\n"
                print(sheet_str)
                # Add the sheet content to the file sheet dictionary
                sheet_dict[sheet_name] = sheet_str
            # Add the file sheet dictionary to the content dictionary
@@ -44,11 +66,11 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
    def _import_dependencies(self):
        """Dynamically import dependencies."""
        try:
-            import openpyxl  # noqa
+            # import openpyxl  # noqa
-            from openpyxl import load_workbook
+            # from openpyxl import load_workbook
            import pandas as pd
-            return pd, openpyxl, load_workbook
+            return pd
        except ImportError as e:
            missing_package = str(e).split()[-1]
            raise ImportError(