mypysrc fixes

2026-01-09 08:08:32 +00:00 · 2024-11-19 12:02:06 -08:00
parent 914067df37
commit 0c5b6f2a93
4 changed files with 42 additions and 21 deletions
--- a/src/crewai/knowledge/source/csv_knowledge_source.py
+++ b/src/crewai/knowledge/source/csv_knowledge_source.py
@@ -1,5 +1,6 @@
 import csv
-from typing import List
+from typing import Dict, List
+from pathlib import Path

 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource

@@ -7,22 +8,28 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
 class CSVKnowledgeSource(BaseFileKnowledgeSource):
    """A knowledge source that stores and queries CSV file content using embeddings."""

-    def load_content(self) -> str:
+    def load_content(self) -> Dict[Path, str]:
        """Load and preprocess CSV file content."""
        super().load_content()  # Validate the file path
-        with open(self.file_path, "r", encoding="utf-8") as csvfile:
+        file_path_str = (
+            str(self.file_path) if isinstance(self.file_path, Path) else self.file_path
+        )
+        with open(file_path_str, "r", encoding="utf-8") as csvfile:
            reader = csv.reader(csvfile)
            content = ""
            for row in reader:
                content += " ".join(row) + "\n"
-        return content
+        return {self.file_path: content}

    def add(self) -> None:
        """
        Add CSV file content to the knowledge source, chunk it, compute embeddings,
        and save the embeddings.
        """
-        new_chunks = self._chunk_text(self.content)
+        content_str = (
+            str(self.content) if isinstance(self.content, dict) else self.content
+        )
+        new_chunks = self._chunk_text(content_str)
        self.chunks.extend(new_chunks)
        self.save_documents(metadata=self.metadata)

--- a/src/crewai/knowledge/source/excel_knowledge_source.py
+++ b/src/crewai/knowledge/source/excel_knowledge_source.py
@@ -1,18 +1,18 @@
-from typing import List
-
+from typing import Dict, List
+from pathlib import Path
 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource


 class ExcelKnowledgeSource(BaseFileKnowledgeSource):
    """A knowledge source that stores and queries Excel file content using embeddings."""

-    def load_content(self) -> str:
+    def load_content(self) -> Dict[Path, str]:
        """Load and preprocess Excel file content."""
        super().load_content()  # Validate the file path
        pd = self._import_dependencies()
        df = pd.read_excel(self.file_path)
        content = df.to_csv(index=False)
-        return content
+        return {self.file_path: content}

    def _import_dependencies(self):
        """Dynamically import dependencies."""
@@ -32,7 +32,13 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
        Add Excel file content to the knowledge source, chunk it, compute embeddings,
        and save the embeddings.
        """
-        new_chunks = self._chunk_text(self.content)
+        # Convert dictionary values to a single string if content is a dictionary
+        if isinstance(self.content, dict):
+            content_str = "\n".join(str(value) for value in self.content.values())
+        else:
+            content_str = str(self.content)
+
+        new_chunks = self._chunk_text(content_str)
        self.chunks.extend(new_chunks)
        self.save_documents(metadata=self.metadata)

--- a/src/crewai/knowledge/source/json_knowledge_source.py
+++ b/src/crewai/knowledge/source/json_knowledge_source.py
@@ -1,5 +1,6 @@
 import json
-from typing import Any, List
+from typing import Any, Dict, List
+from pathlib import Path

 from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource

@@ -7,11 +8,15 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
 class JSONKnowledgeSource(BaseFileKnowledgeSource):
    """A knowledge source that stores and queries JSON file content using embeddings."""

-    def load_content(self) -> str:
+    def load_content(self) -> Dict[Path, str]:
        """Load and preprocess JSON file content."""
        super().load_content()  # Validate the file path
-        with open(self.file_path, "r", encoding="utf-8") as json_file:
-            data = json.load(json_file)
+        paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
+
+        content = {}
+        for path in paths:
+            with open(path, "r", encoding="utf-8") as json_file:
+                data = json.load(json_file)
            content = self._json_to_text(data)
        return content

@@ -34,7 +39,10 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
        Add JSON file content to the knowledge source, chunk it, compute embeddings,
        and save the embeddings.
        """
-        new_chunks = self._chunk_text(self.content)
+        content_str = (
+            str(self.content) if isinstance(self.content, dict) else self.content
+        )
+        new_chunks = self._chunk_text(content_str)
        self.chunks.extend(new_chunks)
        self.save_documents(metadata=self.metadata)

--- a/src/crewai/knowledge/storage/knowledge_storage.py
+++ b/src/crewai/knowledge/storage/knowledge_storage.py
@@ -54,14 +54,14 @@ class KnowledgeStorage(BaseKnowledgeStorage):
                    where=filter,
                )
                results = []
-                for i in range(len(fetched["ids"][0])):
+                for i in range(len(fetched["ids"][0])):  # type: ignore
                    result = {
-                        "id": fetched["ids"][0][i],
-                        "metadata": fetched["metadatas"][0][i],
-                        "context": fetched["documents"][0][i],
-                        "score": fetched["distances"][0][i],
+                        "id": fetched["ids"][0][i],  # type: ignore
+                        "metadata": fetched["metadatas"][0][i],  # type: ignore
+                        "context": fetched["documents"][0][i],  # type: ignore
+                        "score": fetched["distances"][0][i],  # type: ignore
                    }
-                    if result["score"] >= score_threshold:
+                    if result["score"] >= score_threshold:  # type: ignore
                        results.append(result)
                return results
            else: