feat: add crewai-tools library to workspace

- Migrate crewai-tools as standalone package in lib/tools - Configure UV workspace for monorepo structure - Move assets to repository root - Clean up duplicate README files - Focus pre-commit hooks on lib/crewai/src only
2026-01-20 21:38:14 +00:00 · 2025-09-26 15:05:41 -04:00
parent 3eeb9b8f6c e350817b8d
commit e2270456c4
300 changed files with 31874 additions and 1 deletions
--- a/lib/tools/tests/rag/test_text_loaders.py
+++ b/lib/tools/tests/rag/test_text_loaders.py
@@ -0,0 +1,160 @@
+import hashlib
+import os
+import tempfile
+import pytest
+
+from crewai_tools.rag.loaders.text_loader import TextFileLoader, TextLoader
+from crewai_tools.rag.base_loader import LoaderResult
+from crewai_tools.rag.source_content import SourceContent
+
+
+def write_temp_file(content, suffix=".txt", encoding="utf-8"):
+    with tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False, encoding=encoding) as f:
+        f.write(content)
+        return f.name
+
+
+def cleanup_temp_file(path):
+    try:
+        os.unlink(path)
+    except FileNotFoundError:
+        pass
+
+
+class TestTextFileLoader:
+    def test_basic_text_file(self):
+        content = "This is test content\nWith multiple lines\nAnd more text"
+        path = write_temp_file(content)
+        try:
+            result = TextFileLoader().load(SourceContent(path))
+            assert isinstance(result, LoaderResult)
+            assert result.content == content
+            assert result.source == path
+            assert result.doc_id
+            assert result.metadata in (None, {})
+        finally:
+            cleanup_temp_file(path)
+
+    def test_empty_file(self):
+        path = write_temp_file("")
+        try:
+            result = TextFileLoader().load(SourceContent(path))
+            assert result.content == ""
+        finally:
+            cleanup_temp_file(path)
+
+    def test_unicode_content(self):
+        content = "Hello 世界 🌍 émojis 🎉 åäö"
+        path = write_temp_file(content)
+        try:
+            result = TextFileLoader().load(SourceContent(path))
+            assert content in result.content
+        finally:
+            cleanup_temp_file(path)
+
+    def test_large_file(self):
+        content = "\n".join(f"Line {i}" for i in range(100))
+        path = write_temp_file(content)
+        try:
+            result = TextFileLoader().load(SourceContent(path))
+            assert "Line 0" in result.content
+            assert "Line 99" in result.content
+            assert result.content.count("\n") == 99
+        finally:
+            cleanup_temp_file(path)
+
+    def test_missing_file(self):
+        with pytest.raises(FileNotFoundError):
+            TextFileLoader().load(SourceContent("/nonexistent/path.txt"))
+
+    def test_permission_denied(self):
+        path = write_temp_file("Some content")
+        os.chmod(path, 0o000)
+        try:
+            with pytest.raises(PermissionError):
+                TextFileLoader().load(SourceContent(path))
+        finally:
+            os.chmod(path, 0o644)
+            cleanup_temp_file(path)
+
+    def test_doc_id_consistency(self):
+        content = "Consistent content"
+        path = write_temp_file(content)
+        try:
+            loader = TextFileLoader()
+            result1 = loader.load(SourceContent(path))
+            result2 = loader.load(SourceContent(path))
+            expected_id = hashlib.sha256((path + content).encode("utf-8")).hexdigest()
+            assert result1.doc_id == result2.doc_id == expected_id
+        finally:
+            cleanup_temp_file(path)
+
+    def test_various_extensions(self):
+        content = "Same content"
+        for ext in [".txt", ".md", ".log", ".json"]:
+            path = write_temp_file(content, suffix=ext)
+            try:
+                result = TextFileLoader().load(SourceContent(path))
+                assert result.content == content
+            finally:
+                cleanup_temp_file(path)
+
+
+class TestTextLoader:
+    def test_basic_text(self):
+        content = "Raw text"
+        result = TextLoader().load(SourceContent(content))
+        expected_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
+        assert result.content == content
+        assert result.source == expected_hash
+        assert result.doc_id == expected_hash
+
+    def test_multiline_text(self):
+        content = "Line 1\nLine 2\nLine 3"
+        result = TextLoader().load(SourceContent(content))
+        assert "Line 2" in result.content
+
+    def test_empty_text(self):
+        result = TextLoader().load(SourceContent(""))
+        assert result.content == ""
+        assert result.source == hashlib.sha256("".encode("utf-8")).hexdigest()
+
+    def test_unicode_text(self):
+        content = "世界 🌍 émojis 🎉 åäö"
+        result = TextLoader().load(SourceContent(content))
+        assert content in result.content
+
+    def test_special_characters(self):
+        content = "!@#$$%^&*()_+-=~`{}[]\\|;:'\",.<>/?"
+        result = TextLoader().load(SourceContent(content))
+        assert result.content == content
+
+    def test_doc_id_uniqueness(self):
+        result1 = TextLoader().load(SourceContent("A"))
+        result2 = TextLoader().load(SourceContent("B"))
+        assert result1.doc_id != result2.doc_id
+
+    def test_whitespace_text(self):
+        content = "   \n\t   "
+        result = TextLoader().load(SourceContent(content))
+        assert result.content == content
+
+    def test_long_text(self):
+        content = "A" * 10000
+        result = TextLoader().load(SourceContent(content))
+        assert len(result.content) == 10000
+
+
+class TestTextLoadersIntegration:
+    def test_consistency_between_loaders(self):
+        content = "Consistent content"
+        text_result = TextLoader().load(SourceContent(content))
+        file_path = write_temp_file(content)
+        try:
+            file_result = TextFileLoader().load(SourceContent(file_path))
+
+            assert text_result.content == file_result.content
+            assert text_result.source != file_result.source
+            assert text_result.doc_id != file_result.doc_id
+        finally:
+            cleanup_temp_file(file_path)