Files
crewAI/src/crewai/knowledge/source/text_file_knowledge_source.py
2025-05-12 13:30:50 +00:00

33 lines
1.1 KiB
Python

from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
class TextFileKnowledgeSource(BaseFileKnowledgeSource):
"""A knowledge source that stores and queries text file content using embeddings."""
def load_content(self) -> dict[Path, str]:
"""Load and preprocess text file content."""
content = {}
for path in self.safe_file_paths:
path = self.convert_to_path(path)
with open(path, encoding="utf-8") as f:
content[path] = f.read()
return content
def add(self) -> None:
"""Add text file content to the knowledge source, chunk it, compute embeddings,
and save the embeddings.
"""
for text in self.content.values():
new_chunks = self._chunk_text(text)
self.chunks.extend(new_chunks)
self._save_documents()
def _chunk_text(self, text: str) -> list[str]:
"""Utility method to split text into chunks."""
return [
text[i : i + self.chunk_size]
for i in range(0, len(text), self.chunk_size - self.chunk_overlap)
]