mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-06 01:32:36 +00:00
updated load_content() function in excel_knowledge_source.py to reduce memory usage and provide better documentation
This commit is contained in:
@@ -7,6 +7,29 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
|
|||||||
class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
||||||
"""A knowledge source that stores and queries Excel file content using embeddings."""
|
"""A knowledge source that stores and queries Excel file content using embeddings."""
|
||||||
|
|
||||||
|
def load_content(self) -> Dict[Path, Dict[str, str]]:
|
||||||
|
"""Load and preprocess Excel file content from multiple sheets.
|
||||||
|
|
||||||
|
Each sheet's content is converted to CSV format and stored.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[Path, Dict[str, str]]: A mapping of file paths to their respective sheet contents.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ImportError: If required dependencies are missing.
|
||||||
|
FileNotFoundError: If the specified Excel file cannot be opened.
|
||||||
|
"""
|
||||||
|
pd = self._import_dependencies()
|
||||||
|
content_dict = {}
|
||||||
|
for file_path in self.safe_file_paths:
|
||||||
|
with pd.ExcelFile(file_path) as xl:
|
||||||
|
sheet_dict = {
|
||||||
|
sheet_name: pd.read_excel(xl, sheet_name).to_csv(index=False)
|
||||||
|
for sheet_name in xl.sheet_names
|
||||||
|
}
|
||||||
|
content_dict[file_path] = sheet_dict
|
||||||
|
return content_dict
|
||||||
|
|
||||||
def load_content(self) -> Dict[Path, str]:
|
def load_content(self) -> Dict[Path, str]:
|
||||||
"""Load and preprocess Excel file content. Updated to account for .xlsx workbooks with multiple tabs/sheets"""
|
"""Load and preprocess Excel file content. Updated to account for .xlsx workbooks with multiple tabs/sheets"""
|
||||||
pd, openpyxl, load_workbook = self._import_dependencies()
|
pd, openpyxl, load_workbook = self._import_dependencies()
|
||||||
@@ -33,7 +56,6 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
sheet_str += str(cell) + ","
|
sheet_str += str(cell) + ","
|
||||||
sheet_str += "\n"
|
sheet_str += "\n"
|
||||||
|
|
||||||
print(sheet_str)
|
|
||||||
# Add the sheet content to the file sheet dictionary
|
# Add the sheet content to the file sheet dictionary
|
||||||
sheet_dict[sheet_name] = sheet_str
|
sheet_dict[sheet_name] = sheet_str
|
||||||
# Add the file sheet dictionary to the content dictionary
|
# Add the file sheet dictionary to the content dictionary
|
||||||
@@ -44,11 +66,11 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
def _import_dependencies(self):
|
def _import_dependencies(self):
|
||||||
"""Dynamically import dependencies."""
|
"""Dynamically import dependencies."""
|
||||||
try:
|
try:
|
||||||
import openpyxl # noqa
|
# import openpyxl # noqa
|
||||||
from openpyxl import load_workbook
|
# from openpyxl import load_workbook
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
return pd, openpyxl, load_workbook
|
return pd
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
missing_package = str(e).split()[-1]
|
missing_package = str(e).split()[-1]
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
|
|||||||
Reference in New Issue
Block a user