mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-06 01:32:36 +00:00
updated load_content() function in excel_knowledge_source.py to reduce memory usage and provide better documentation
This commit is contained in:
@@ -7,6 +7,29 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
|
||||
class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
||||
"""A knowledge source that stores and queries Excel file content using embeddings."""
|
||||
|
||||
def load_content(self) -> Dict[Path, Dict[str, str]]:
|
||||
"""Load and preprocess Excel file content from multiple sheets.
|
||||
|
||||
Each sheet's content is converted to CSV format and stored.
|
||||
|
||||
Returns:
|
||||
Dict[Path, Dict[str, str]]: A mapping of file paths to their respective sheet contents.
|
||||
|
||||
Raises:
|
||||
ImportError: If required dependencies are missing.
|
||||
FileNotFoundError: If the specified Excel file cannot be opened.
|
||||
"""
|
||||
pd = self._import_dependencies()
|
||||
content_dict = {}
|
||||
for file_path in self.safe_file_paths:
|
||||
with pd.ExcelFile(file_path) as xl:
|
||||
sheet_dict = {
|
||||
sheet_name: pd.read_excel(xl, sheet_name).to_csv(index=False)
|
||||
for sheet_name in xl.sheet_names
|
||||
}
|
||||
content_dict[file_path] = sheet_dict
|
||||
return content_dict
|
||||
|
||||
def load_content(self) -> Dict[Path, str]:
|
||||
"""Load and preprocess Excel file content. Updated to account for .xlsx workbooks with multiple tabs/sheets"""
|
||||
pd, openpyxl, load_workbook = self._import_dependencies()
|
||||
@@ -33,7 +56,6 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
||||
sheet_str += str(cell) + ","
|
||||
sheet_str += "\n"
|
||||
|
||||
print(sheet_str)
|
||||
# Add the sheet content to the file sheet dictionary
|
||||
sheet_dict[sheet_name] = sheet_str
|
||||
# Add the file sheet dictionary to the content dictionary
|
||||
@@ -44,11 +66,11 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
||||
def _import_dependencies(self):
|
||||
"""Dynamically import dependencies."""
|
||||
try:
|
||||
import openpyxl # noqa
|
||||
from openpyxl import load_workbook
|
||||
# import openpyxl # noqa
|
||||
# from openpyxl import load_workbook
|
||||
import pandas as pd
|
||||
|
||||
return pd, openpyxl, load_workbook
|
||||
return pd
|
||||
except ImportError as e:
|
||||
missing_package = str(e).split()[-1]
|
||||
raise ImportError(
|
||||
|
||||
Reference in New Issue
Block a user