mypysrc fixes

This commit is contained in:
Lorenze Jay
2024-11-19 12:02:06 -08:00
parent 914067df37
commit 0c5b6f2a93
4 changed files with 42 additions and 21 deletions

View File

@@ -1,5 +1,6 @@
import csv import csv
from typing import List from typing import Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -7,22 +8,28 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
class CSVKnowledgeSource(BaseFileKnowledgeSource): class CSVKnowledgeSource(BaseFileKnowledgeSource):
"""A knowledge source that stores and queries CSV file content using embeddings.""" """A knowledge source that stores and queries CSV file content using embeddings."""
def load_content(self) -> str: def load_content(self) -> Dict[Path, str]:
"""Load and preprocess CSV file content.""" """Load and preprocess CSV file content."""
super().load_content() # Validate the file path super().load_content() # Validate the file path
with open(self.file_path, "r", encoding="utf-8") as csvfile: file_path_str = (
str(self.file_path) if isinstance(self.file_path, Path) else self.file_path
)
with open(file_path_str, "r", encoding="utf-8") as csvfile:
reader = csv.reader(csvfile) reader = csv.reader(csvfile)
content = "" content = ""
for row in reader: for row in reader:
content += " ".join(row) + "\n" content += " ".join(row) + "\n"
return content return {self.file_path: content}
def add(self) -> None: def add(self) -> None:
""" """
Add CSV file content to the knowledge source, chunk it, compute embeddings, Add CSV file content to the knowledge source, chunk it, compute embeddings,
and save the embeddings. and save the embeddings.
""" """
new_chunks = self._chunk_text(self.content) content_str = (
str(self.content) if isinstance(self.content, dict) else self.content
)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks) self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata) self.save_documents(metadata=self.metadata)

View File

@@ -1,18 +1,18 @@
from typing import List from typing import Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
class ExcelKnowledgeSource(BaseFileKnowledgeSource): class ExcelKnowledgeSource(BaseFileKnowledgeSource):
"""A knowledge source that stores and queries Excel file content using embeddings.""" """A knowledge source that stores and queries Excel file content using embeddings."""
def load_content(self) -> str: def load_content(self) -> Dict[Path, str]:
"""Load and preprocess Excel file content.""" """Load and preprocess Excel file content."""
super().load_content() # Validate the file path super().load_content() # Validate the file path
pd = self._import_dependencies() pd = self._import_dependencies()
df = pd.read_excel(self.file_path) df = pd.read_excel(self.file_path)
content = df.to_csv(index=False) content = df.to_csv(index=False)
return content return {self.file_path: content}
def _import_dependencies(self): def _import_dependencies(self):
"""Dynamically import dependencies.""" """Dynamically import dependencies."""
@@ -32,7 +32,13 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
Add Excel file content to the knowledge source, chunk it, compute embeddings, Add Excel file content to the knowledge source, chunk it, compute embeddings,
and save the embeddings. and save the embeddings.
""" """
new_chunks = self._chunk_text(self.content) # Convert dictionary values to a single string if content is a dictionary
if isinstance(self.content, dict):
content_str = "\n".join(str(value) for value in self.content.values())
else:
content_str = str(self.content)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks) self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata) self.save_documents(metadata=self.metadata)

View File

@@ -1,5 +1,6 @@
import json import json
from typing import Any, List from typing import Any, Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -7,11 +8,15 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
class JSONKnowledgeSource(BaseFileKnowledgeSource): class JSONKnowledgeSource(BaseFileKnowledgeSource):
"""A knowledge source that stores and queries JSON file content using embeddings.""" """A knowledge source that stores and queries JSON file content using embeddings."""
def load_content(self) -> str: def load_content(self) -> Dict[Path, str]:
"""Load and preprocess JSON file content.""" """Load and preprocess JSON file content."""
super().load_content() # Validate the file path super().load_content() # Validate the file path
with open(self.file_path, "r", encoding="utf-8") as json_file: paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
data = json.load(json_file)
content = {}
for path in paths:
with open(path, "r", encoding="utf-8") as json_file:
data = json.load(json_file)
content = self._json_to_text(data) content = self._json_to_text(data)
return content return content
@@ -34,7 +39,10 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
Add JSON file content to the knowledge source, chunk it, compute embeddings, Add JSON file content to the knowledge source, chunk it, compute embeddings,
and save the embeddings. and save the embeddings.
""" """
new_chunks = self._chunk_text(self.content) content_str = (
str(self.content) if isinstance(self.content, dict) else self.content
)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks) self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata) self.save_documents(metadata=self.metadata)

View File

@@ -54,14 +54,14 @@ class KnowledgeStorage(BaseKnowledgeStorage):
where=filter, where=filter,
) )
results = [] results = []
for i in range(len(fetched["ids"][0])): for i in range(len(fetched["ids"][0])): # type: ignore
result = { result = {
"id": fetched["ids"][0][i], "id": fetched["ids"][0][i], # type: ignore
"metadata": fetched["metadatas"][0][i], "metadata": fetched["metadatas"][0][i], # type: ignore
"context": fetched["documents"][0][i], "context": fetched["documents"][0][i], # type: ignore
"score": fetched["distances"][0][i], "score": fetched["distances"][0][i], # type: ignore
} }
if result["score"] >= score_threshold: if result["score"] >= score_threshold: # type: ignore
results.append(result) results.append(result)
return results return results
else: else: