mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-20 21:38:14 +00:00
feat: add crewai-tools library to workspace
- Migrate crewai-tools as standalone package in lib/tools - Configure UV workspace for monorepo structure - Move assets to repository root - Clean up duplicate README files - Focus pre-commit hooks on lib/crewai/src only
This commit is contained in:
160
lib/tools/tests/rag/test_text_loaders.py
Normal file
160
lib/tools/tests/rag/test_text_loaders.py
Normal file
@@ -0,0 +1,160 @@
|
||||
import hashlib
|
||||
import os
|
||||
import tempfile
|
||||
import pytest
|
||||
|
||||
from crewai_tools.rag.loaders.text_loader import TextFileLoader, TextLoader
|
||||
from crewai_tools.rag.base_loader import LoaderResult
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
def write_temp_file(content, suffix=".txt", encoding="utf-8"):
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False, encoding=encoding) as f:
|
||||
f.write(content)
|
||||
return f.name
|
||||
|
||||
|
||||
def cleanup_temp_file(path):
|
||||
try:
|
||||
os.unlink(path)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
class TestTextFileLoader:
|
||||
def test_basic_text_file(self):
|
||||
content = "This is test content\nWith multiple lines\nAnd more text"
|
||||
path = write_temp_file(content)
|
||||
try:
|
||||
result = TextFileLoader().load(SourceContent(path))
|
||||
assert isinstance(result, LoaderResult)
|
||||
assert result.content == content
|
||||
assert result.source == path
|
||||
assert result.doc_id
|
||||
assert result.metadata in (None, {})
|
||||
finally:
|
||||
cleanup_temp_file(path)
|
||||
|
||||
def test_empty_file(self):
|
||||
path = write_temp_file("")
|
||||
try:
|
||||
result = TextFileLoader().load(SourceContent(path))
|
||||
assert result.content == ""
|
||||
finally:
|
||||
cleanup_temp_file(path)
|
||||
|
||||
def test_unicode_content(self):
|
||||
content = "Hello 世界 🌍 émojis 🎉 åäö"
|
||||
path = write_temp_file(content)
|
||||
try:
|
||||
result = TextFileLoader().load(SourceContent(path))
|
||||
assert content in result.content
|
||||
finally:
|
||||
cleanup_temp_file(path)
|
||||
|
||||
def test_large_file(self):
|
||||
content = "\n".join(f"Line {i}" for i in range(100))
|
||||
path = write_temp_file(content)
|
||||
try:
|
||||
result = TextFileLoader().load(SourceContent(path))
|
||||
assert "Line 0" in result.content
|
||||
assert "Line 99" in result.content
|
||||
assert result.content.count("\n") == 99
|
||||
finally:
|
||||
cleanup_temp_file(path)
|
||||
|
||||
def test_missing_file(self):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
TextFileLoader().load(SourceContent("/nonexistent/path.txt"))
|
||||
|
||||
def test_permission_denied(self):
|
||||
path = write_temp_file("Some content")
|
||||
os.chmod(path, 0o000)
|
||||
try:
|
||||
with pytest.raises(PermissionError):
|
||||
TextFileLoader().load(SourceContent(path))
|
||||
finally:
|
||||
os.chmod(path, 0o644)
|
||||
cleanup_temp_file(path)
|
||||
|
||||
def test_doc_id_consistency(self):
|
||||
content = "Consistent content"
|
||||
path = write_temp_file(content)
|
||||
try:
|
||||
loader = TextFileLoader()
|
||||
result1 = loader.load(SourceContent(path))
|
||||
result2 = loader.load(SourceContent(path))
|
||||
expected_id = hashlib.sha256((path + content).encode("utf-8")).hexdigest()
|
||||
assert result1.doc_id == result2.doc_id == expected_id
|
||||
finally:
|
||||
cleanup_temp_file(path)
|
||||
|
||||
def test_various_extensions(self):
|
||||
content = "Same content"
|
||||
for ext in [".txt", ".md", ".log", ".json"]:
|
||||
path = write_temp_file(content, suffix=ext)
|
||||
try:
|
||||
result = TextFileLoader().load(SourceContent(path))
|
||||
assert result.content == content
|
||||
finally:
|
||||
cleanup_temp_file(path)
|
||||
|
||||
|
||||
class TestTextLoader:
|
||||
def test_basic_text(self):
|
||||
content = "Raw text"
|
||||
result = TextLoader().load(SourceContent(content))
|
||||
expected_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
assert result.content == content
|
||||
assert result.source == expected_hash
|
||||
assert result.doc_id == expected_hash
|
||||
|
||||
def test_multiline_text(self):
|
||||
content = "Line 1\nLine 2\nLine 3"
|
||||
result = TextLoader().load(SourceContent(content))
|
||||
assert "Line 2" in result.content
|
||||
|
||||
def test_empty_text(self):
|
||||
result = TextLoader().load(SourceContent(""))
|
||||
assert result.content == ""
|
||||
assert result.source == hashlib.sha256("".encode("utf-8")).hexdigest()
|
||||
|
||||
def test_unicode_text(self):
|
||||
content = "世界 🌍 émojis 🎉 åäö"
|
||||
result = TextLoader().load(SourceContent(content))
|
||||
assert content in result.content
|
||||
|
||||
def test_special_characters(self):
|
||||
content = "!@#$$%^&*()_+-=~`{}[]\\|;:'\",.<>/?"
|
||||
result = TextLoader().load(SourceContent(content))
|
||||
assert result.content == content
|
||||
|
||||
def test_doc_id_uniqueness(self):
|
||||
result1 = TextLoader().load(SourceContent("A"))
|
||||
result2 = TextLoader().load(SourceContent("B"))
|
||||
assert result1.doc_id != result2.doc_id
|
||||
|
||||
def test_whitespace_text(self):
|
||||
content = " \n\t "
|
||||
result = TextLoader().load(SourceContent(content))
|
||||
assert result.content == content
|
||||
|
||||
def test_long_text(self):
|
||||
content = "A" * 10000
|
||||
result = TextLoader().load(SourceContent(content))
|
||||
assert len(result.content) == 10000
|
||||
|
||||
|
||||
class TestTextLoadersIntegration:
|
||||
def test_consistency_between_loaders(self):
|
||||
content = "Consistent content"
|
||||
text_result = TextLoader().load(SourceContent(content))
|
||||
file_path = write_temp_file(content)
|
||||
try:
|
||||
file_result = TextFileLoader().load(SourceContent(file_path))
|
||||
|
||||
assert text_result.content == file_result.content
|
||||
assert text_result.source != file_result.source
|
||||
assert text_result.doc_id != file_result.doc_id
|
||||
finally:
|
||||
cleanup_temp_file(file_path)
|
||||
Reference in New Issue
Block a user