feat: merge latest changes from crewAI-tools main into packages/tools

- Merged upstream changes from crewAI-tools main branch - Resolved conflicts due to monorepo structure (crewai_tools -> src/crewai_tools) - Removed deprecated embedchain adapters - Added new RAG loaders and crewai_rag_adapter - Consolidated dependencies in pyproject.toml Fixed critical linting issues: - Added ClassVar annotations for mutable class attributes - Added timeouts to requests calls (30s default) - Fixed exception handling with proper 'from' clauses - Added noqa comments for public API functions (backward compatibility) - Updated ruff config to ignore expected patterns: - F401 in __init__ files (intentional re-exports) - S101 in test files (assertions are expected) - S607 for subprocess calls (uv/pip commands are safe) Remaining issues are from upstream code and will be addressed in separate PRs.
2026-05-01 07:13:00 +00:00 · 2025-09-19 00:08:27 -04:00
parent 78a68c677c c960f26601
commit c5c07331bb
156 changed files with 4530 additions and 2718 deletions
--- a/packages/tools/src/crewai_tools/rag/base_loader.py
+++ b/packages/tools/src/crewai_tools/rag/base_loader.py
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional
+from typing import Any
+
 from pydantic import BaseModel, Field

 from crewai_tools.rag.misc import compute_sha256
@@ -9,19 +10,22 @@ from crewai_tools.rag.source_content import SourceContent
 class LoaderResult(BaseModel):
    content: str = Field(description="The text content of the source")
    source: str = Field(description="The source of the content", default="unknown")
-    metadata: Dict[str, Any] = Field(description="The metadata of the source", default_factory=dict)
+    metadata: dict[str, Any] = Field(
+        description="The metadata of the source", default_factory=dict
+    )
    doc_id: str = Field(description="The id of the document")


 class BaseLoader(ABC):
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
+    def __init__(self, config: dict[str, Any] | None = None):
        self.config = config or {}

    @abstractmethod
-    def load(self, content: SourceContent, **kwargs) -> LoaderResult:
-        ...
+    def load(self, content: SourceContent, **kwargs) -> LoaderResult: ...

-    def generate_doc_id(self, source_ref: str | None = None, content: str | None = None) -> str:
+    def generate_doc_id(
+        self, source_ref: str | None = None, content: str | None = None
+    ) -> str:
        """
        Generate a unique document id based on the source reference and content.
        If the source reference is not provided, the content is used as the source reference.