Updated excel_knowledge_source.py to account for excel sheets that have multiple tabs. The old implementation contained a single df=pd.read_excel(excel_file_path), which only reads the first or most recently used excel sheet. The updated functionality reads all sheets in the excel workbook.

2026-07-02 13:48:09 +00:00 · 2025-01-18 18:02:51 -05:00
parent 30d027158a
commit eec0d227a9
1 changed files with 37 additions and 10 deletions
--- a/src/crewai/knowledge/source/excel_knowledge_source.py
+++ b/src/crewai/knowledge/source/excel_knowledge_source.py
@@ -8,24 +8,47 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
    """A knowledge source that stores and queries Excel file content using embeddings."""

    def load_content(self) -> Dict[Path, str]:
-        """Load and preprocess Excel file content."""
-        pd = self._import_dependencies()
+        """Load and preprocess Excel file content. Updated to account for .xlsx workbooks with multiple tabs/sheets"""
+        pd, openpyxl, load_workbook = self._import_dependencies()

+        # Initialize the content dictionary
        content_dict = {}
        for file_path in self.safe_file_paths:
+            # Convert the file path to a Path object
            file_path = self.convert_to_path(file_path)
-            df = pd.read_excel(file_path)
-            content = df.to_csv(index=False)
-            content_dict[file_path] = content
+            # Load the Excel file
+            wb = load_workbook(file_path)
+            # Get the sheet names
+            sheet_names = wb.sheetnames
+            # Iterate over the sheets
+            # Initialize the file sheet dictionary
+            sheet_dict = {}
+            for sheet_name in sheet_names:
+                # Get the sheet
+                ws = wb[sheet_name]
+                # Convert the sheet to a CSV string
+                sheet_str = """"""
+                for row in ws.values:
+                    for cell in row:
+                        sheet_str += str(cell) + ","
+                    sheet_str += "\n"
+
+                print(sheet_str)
+                # Add the sheet content to the file sheet dictionary
+                sheet_dict[sheet_name] = sheet_str
+            # Add the file sheet dictionary to the content dictionary
+            content_dict[file_path] = sheet_dict
+
        return content_dict

    def _import_dependencies(self):
        """Dynamically import dependencies."""
        try:
            import openpyxl  # noqa
+            from openpyxl import load_workbook
            import pandas as pd

-            return pd
+            return pd, openpyxl, load_workbook
        except ImportError as e:
            missing_package = str(e).split()[-1]
            raise ImportError(
@@ -38,10 +61,14 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
        and save the embeddings.
        """
        # Convert dictionary values to a single string if content is a dictionary
-        if isinstance(self.content, dict):
-            content_str = "\n".join(str(value) for value in self.content.values())
-        else:
-            content_str = str(self.content)
+        # Updated to account for .xlsx workbooks with multiple tabs/sheets
+        content_str = ""
+        for value in self.content.values():
+            if isinstance(value, dict):
+                for sheet_value in value.values():
+                    content_str += str(sheet_value) + "\n"
+            else:
+                content_str += str(value) + "\n"

        new_chunks = self._chunk_text(content_str)
        self.chunks.extend(new_chunks)