From 0c5b6f2a93d42dd82283453351c2a98255ccea33 Mon Sep 17 00:00:00 2001
From: Lorenze Jay <lorenzejaytech@gmail.com>
Date: Tue, 19 Nov 2024 12:02:06 -0800
Subject: [PATCH] mypysrc fixes

---
 .../knowledge/source/csv_knowledge_source.py   | 17 ++++++++++++-----
 .../knowledge/source/excel_knowledge_source.py | 16 +++++++++++-----
 .../knowledge/source/json_knowledge_source.py  | 18 +++++++++++++-----
 .../knowledge/storage/knowledge_storage.py     | 12 ++++++------
 4 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/crewai/knowledge/source/csv_knowledge_source.py b/src/crewai/knowledge/source/csv_knowledge_source.py
index c65e7f7a0..2f5a3cc2c 100644
--- a/src/crewai/knowledge/source/csv_knowledge_source.py
+++ b/src/crewai/knowledge/source/csv_knowledge_source.py
@@ -1,5 +1,6 @@
 import csv
-from typing import List
+from typing import Dict, List
+from pathlib import Path
 
 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
 
@@ -7,22 +8,28 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
 class CSVKnowledgeSource(BaseFileKnowledgeSource):
     """A knowledge source that stores and queries CSV file content using embeddings."""
 
-    def load_content(self) -> str:
+    def load_content(self) -> Dict[Path, str]:
         """Load and preprocess CSV file content."""
         super().load_content()  # Validate the file path
-        with open(self.file_path, "r", encoding="utf-8") as csvfile:
+        file_path_str = (
+            str(self.file_path) if isinstance(self.file_path, Path) else self.file_path
+        )
+        with open(file_path_str, "r", encoding="utf-8") as csvfile:
             reader = csv.reader(csvfile)
             content = ""
             for row in reader:
                 content += " ".join(row) + "\n"
-        return content
+        return {self.file_path: content}
 
     def add(self) -> None:
         """
         Add CSV file content to the knowledge source, chunk it, compute embeddings,
         and save the embeddings.
         """
-        new_chunks = self._chunk_text(self.content)
+        content_str = (
+            str(self.content) if isinstance(self.content, dict) else self.content
+        )
+        new_chunks = self._chunk_text(content_str)
         self.chunks.extend(new_chunks)
         self.save_documents(metadata=self.metadata)
 
diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py
index ff0475472..5417b5200 100644
--- a/src/crewai/knowledge/source/excel_knowledge_source.py
+++ b/src/crewai/knowledge/source/excel_knowledge_source.py
@@ -1,18 +1,18 @@
-from typing import List
-
+from typing import Dict, List
+from pathlib import Path
 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
 
 
 class ExcelKnowledgeSource(BaseFileKnowledgeSource):
     """A knowledge source that stores and queries Excel file content using embeddings."""
 
-    def load_content(self) -> str:
+    def load_content(self) -> Dict[Path, str]:
         """Load and preprocess Excel file content."""
         super().load_content()  # Validate the file path
         pd = self._import_dependencies()
         df = pd.read_excel(self.file_path)
         content = df.to_csv(index=False)
-        return content
+        return {self.file_path: content}
 
     def _import_dependencies(self):
         """Dynamically import dependencies."""
@@ -32,7 +32,13 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
         Add Excel file content to the knowledge source, chunk it, compute embeddings,
         and save the embeddings.
         """
-        new_chunks = self._chunk_text(self.content)
+        # Convert dictionary values to a single string if content is a dictionary
+        if isinstance(self.content, dict):
+            content_str = "\n".join(str(value) for value in self.content.values())
+        else:
+            content_str = str(self.content)
+
+        new_chunks = self._chunk_text(content_str)
         self.chunks.extend(new_chunks)
         self.save_documents(metadata=self.metadata)
 
diff --git a/src/crewai/knowledge/source/json_knowledge_source.py b/src/crewai/knowledge/source/json_knowledge_source.py
index 00f01c29e..e14be4ff8 100644
--- a/src/crewai/knowledge/source/json_knowledge_source.py
+++ b/src/crewai/knowledge/source/json_knowledge_source.py
@@ -1,5 +1,6 @@
 import json
-from typing import Any, List
+from typing import Any, Dict, List
+from pathlib import Path
 
 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
 
@@ -7,11 +8,15 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
 class JSONKnowledgeSource(BaseFileKnowledgeSource):
     """A knowledge source that stores and queries JSON file content using embeddings."""
 
-    def load_content(self) -> str:
+    def load_content(self) -> Dict[Path, str]:
         """Load and preprocess JSON file content."""
         super().load_content()  # Validate the file path
-        with open(self.file_path, "r", encoding="utf-8") as json_file:
-            data = json.load(json_file)
+        paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
+
+        content = {}
+        for path in paths:
+            with open(path, "r", encoding="utf-8") as json_file:
+                data = json.load(json_file)
             content = self._json_to_text(data)
         return content
 
@@ -34,7 +39,10 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
         Add JSON file content to the knowledge source, chunk it, compute embeddings,
         and save the embeddings.
         """
-        new_chunks = self._chunk_text(self.content)
+        content_str = (
+            str(self.content) if isinstance(self.content, dict) else self.content
+        )
+        new_chunks = self._chunk_text(content_str)
         self.chunks.extend(new_chunks)
         self.save_documents(metadata=self.metadata)
 
diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py
index e8122bc72..875b01347 100644
--- a/src/crewai/knowledge/storage/knowledge_storage.py
+++ b/src/crewai/knowledge/storage/knowledge_storage.py
@@ -54,14 +54,14 @@ class KnowledgeStorage(BaseKnowledgeStorage):
                     where=filter,
                 )
                 results = []
-                for i in range(len(fetched["ids"][0])):
+                for i in range(len(fetched["ids"][0])):  # type: ignore
                     result = {
-                        "id": fetched["ids"][0][i],
-                        "metadata": fetched["metadatas"][0][i],
-                        "context": fetched["documents"][0][i],
-                        "score": fetched["distances"][0][i],
+                        "id": fetched["ids"][0][i],  # type: ignore
+                        "metadata": fetched["metadatas"][0][i],  # type: ignore
+                        "context": fetched["documents"][0][i],  # type: ignore
+                        "score": fetched["distances"][0][i],  # type: ignore
                     }
-                    if result["score"] >= score_threshold:
+                    if result["score"] >= score_threshold:  # type: ignore
                         results.append(result)
                 return results
             else: