From 0c5b6f2a93d42dd82283453351c2a98255ccea33 Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Tue, 19 Nov 2024 12:02:06 -0800 Subject: [PATCH] mypysrc fixes --- .../knowledge/source/csv_knowledge_source.py | 17 ++++++++++++----- .../knowledge/source/excel_knowledge_source.py | 16 +++++++++++----- .../knowledge/source/json_knowledge_source.py | 18 +++++++++++++----- .../knowledge/storage/knowledge_storage.py | 12 ++++++------ 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/src/crewai/knowledge/source/csv_knowledge_source.py b/src/crewai/knowledge/source/csv_knowledge_source.py index c65e7f7a0..2f5a3cc2c 100644 --- a/src/crewai/knowledge/source/csv_knowledge_source.py +++ b/src/crewai/knowledge/source/csv_knowledge_source.py @@ -1,5 +1,6 @@ import csv -from typing import List +from typing import Dict, List +from pathlib import Path from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource @@ -7,22 +8,28 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge class CSVKnowledgeSource(BaseFileKnowledgeSource): """A knowledge source that stores and queries CSV file content using embeddings.""" - def load_content(self) -> str: + def load_content(self) -> Dict[Path, str]: """Load and preprocess CSV file content.""" super().load_content() # Validate the file path - with open(self.file_path, "r", encoding="utf-8") as csvfile: + file_path_str = ( + str(self.file_path) if isinstance(self.file_path, Path) else self.file_path + ) + with open(file_path_str, "r", encoding="utf-8") as csvfile: reader = csv.reader(csvfile) content = "" for row in reader: content += " ".join(row) + "\n" - return content + return {self.file_path: content} def add(self) -> None: """ Add CSV file content to the knowledge source, chunk it, compute embeddings, and save the embeddings. """ - new_chunks = self._chunk_text(self.content) + content_str = ( + str(self.content) if isinstance(self.content, dict) else self.content + ) + new_chunks = self._chunk_text(content_str) self.chunks.extend(new_chunks) self.save_documents(metadata=self.metadata) diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py index ff0475472..5417b5200 100644 --- a/src/crewai/knowledge/source/excel_knowledge_source.py +++ b/src/crewai/knowledge/source/excel_knowledge_source.py @@ -1,18 +1,18 @@ -from typing import List - +from typing import Dict, List +from pathlib import Path from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource class ExcelKnowledgeSource(BaseFileKnowledgeSource): """A knowledge source that stores and queries Excel file content using embeddings.""" - def load_content(self) -> str: + def load_content(self) -> Dict[Path, str]: """Load and preprocess Excel file content.""" super().load_content() # Validate the file path pd = self._import_dependencies() df = pd.read_excel(self.file_path) content = df.to_csv(index=False) - return content + return {self.file_path: content} def _import_dependencies(self): """Dynamically import dependencies.""" @@ -32,7 +32,13 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource): Add Excel file content to the knowledge source, chunk it, compute embeddings, and save the embeddings. """ - new_chunks = self._chunk_text(self.content) + # Convert dictionary values to a single string if content is a dictionary + if isinstance(self.content, dict): + content_str = "\n".join(str(value) for value in self.content.values()) + else: + content_str = str(self.content) + + new_chunks = self._chunk_text(content_str) self.chunks.extend(new_chunks) self.save_documents(metadata=self.metadata) diff --git a/src/crewai/knowledge/source/json_knowledge_source.py b/src/crewai/knowledge/source/json_knowledge_source.py index 00f01c29e..e14be4ff8 100644 --- a/src/crewai/knowledge/source/json_knowledge_source.py +++ b/src/crewai/knowledge/source/json_knowledge_source.py @@ -1,5 +1,6 @@ import json -from typing import Any, List +from typing import Any, Dict, List +from pathlib import Path from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource @@ -7,11 +8,15 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge class JSONKnowledgeSource(BaseFileKnowledgeSource): """A knowledge source that stores and queries JSON file content using embeddings.""" - def load_content(self) -> str: + def load_content(self) -> Dict[Path, str]: """Load and preprocess JSON file content.""" super().load_content() # Validate the file path - with open(self.file_path, "r", encoding="utf-8") as json_file: - data = json.load(json_file) + paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path + + content = {} + for path in paths: + with open(path, "r", encoding="utf-8") as json_file: + data = json.load(json_file) content = self._json_to_text(data) return content @@ -34,7 +39,10 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource): Add JSON file content to the knowledge source, chunk it, compute embeddings, and save the embeddings. """ - new_chunks = self._chunk_text(self.content) + content_str = ( + str(self.content) if isinstance(self.content, dict) else self.content + ) + new_chunks = self._chunk_text(content_str) self.chunks.extend(new_chunks) self.save_documents(metadata=self.metadata) diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py index e8122bc72..875b01347 100644 --- a/src/crewai/knowledge/storage/knowledge_storage.py +++ b/src/crewai/knowledge/storage/knowledge_storage.py @@ -54,14 +54,14 @@ class KnowledgeStorage(BaseKnowledgeStorage): where=filter, ) results = [] - for i in range(len(fetched["ids"][0])): + for i in range(len(fetched["ids"][0])): # type: ignore result = { - "id": fetched["ids"][0][i], - "metadata": fetched["metadatas"][0][i], - "context": fetched["documents"][0][i], - "score": fetched["distances"][0][i], + "id": fetched["ids"][0][i], # type: ignore + "metadata": fetched["metadatas"][0][i], # type: ignore + "context": fetched["documents"][0][i], # type: ignore + "score": fetched["distances"][0][i], # type: ignore } - if result["score"] >= score_threshold: + if result["score"] >= score_threshold: # type: ignore results.append(result) return results else: