mypysrc fixes

This commit is contained in:
Lorenze Jay
2024-11-19 12:02:06 -08:00
parent 914067df37
commit 0c5b6f2a93
4 changed files with 42 additions and 21 deletions

View File

@@ -1,5 +1,6 @@
import csv
from typing import List
from typing import Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -7,22 +8,28 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
class CSVKnowledgeSource(BaseFileKnowledgeSource):
"""A knowledge source that stores and queries CSV file content using embeddings."""
def load_content(self) -> str:
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess CSV file content."""
super().load_content() # Validate the file path
with open(self.file_path, "r", encoding="utf-8") as csvfile:
file_path_str = (
str(self.file_path) if isinstance(self.file_path, Path) else self.file_path
)
with open(file_path_str, "r", encoding="utf-8") as csvfile:
reader = csv.reader(csvfile)
content = ""
for row in reader:
content += " ".join(row) + "\n"
return content
return {self.file_path: content}
def add(self) -> None:
"""
Add CSV file content to the knowledge source, chunk it, compute embeddings,
and save the embeddings.
"""
new_chunks = self._chunk_text(self.content)
content_str = (
str(self.content) if isinstance(self.content, dict) else self.content
)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)

View File

@@ -1,18 +1,18 @@
from typing import List
from typing import Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
class ExcelKnowledgeSource(BaseFileKnowledgeSource):
"""A knowledge source that stores and queries Excel file content using embeddings."""
def load_content(self) -> str:
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess Excel file content."""
super().load_content() # Validate the file path
pd = self._import_dependencies()
df = pd.read_excel(self.file_path)
content = df.to_csv(index=False)
return content
return {self.file_path: content}
def _import_dependencies(self):
"""Dynamically import dependencies."""
@@ -32,7 +32,13 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
Add Excel file content to the knowledge source, chunk it, compute embeddings,
and save the embeddings.
"""
new_chunks = self._chunk_text(self.content)
# Convert dictionary values to a single string if content is a dictionary
if isinstance(self.content, dict):
content_str = "\n".join(str(value) for value in self.content.values())
else:
content_str = str(self.content)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)

View File

@@ -1,5 +1,6 @@
import json
from typing import Any, List
from typing import Any, Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -7,11 +8,15 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge
class JSONKnowledgeSource(BaseFileKnowledgeSource):
"""A knowledge source that stores and queries JSON file content using embeddings."""
def load_content(self) -> str:
def load_content(self) -> Dict[Path, str]:
"""Load and preprocess JSON file content."""
super().load_content() # Validate the file path
with open(self.file_path, "r", encoding="utf-8") as json_file:
data = json.load(json_file)
paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path
content = {}
for path in paths:
with open(path, "r", encoding="utf-8") as json_file:
data = json.load(json_file)
content = self._json_to_text(data)
return content
@@ -34,7 +39,10 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
Add JSON file content to the knowledge source, chunk it, compute embeddings,
and save the embeddings.
"""
new_chunks = self._chunk_text(self.content)
content_str = (
str(self.content) if isinstance(self.content, dict) else self.content
)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)

View File

@@ -54,14 +54,14 @@ class KnowledgeStorage(BaseKnowledgeStorage):
where=filter,
)
results = []
for i in range(len(fetched["ids"][0])):
for i in range(len(fetched["ids"][0])): # type: ignore
result = {
"id": fetched["ids"][0][i],
"metadata": fetched["metadatas"][0][i],
"context": fetched["documents"][0][i],
"score": fetched["distances"][0][i],
"id": fetched["ids"][0][i], # type: ignore
"metadata": fetched["metadatas"][0][i], # type: ignore
"context": fetched["documents"][0][i], # type: ignore
"score": fetched["distances"][0][i], # type: ignore
}
if result["score"] >= score_threshold:
if result["score"] >= score_threshold: # type: ignore
results.append(result)
return results
else: