Files
crewAI/packages/tools/src/crewai_tools/rag/base_loader.py
Greyson Lalonde a7bb489e9f feat: complete monorepo transformation with tools integration
- Add crewai-tools as git subtree preserving full history
- Move tools to proper src/ directory structure with git mv
- Configure tools pyproject.toml for workspace dependency on crewai-core
- Update workspace configuration to include both packages
- Fix build configurations for both packages
2025-09-12 22:07:31 -04:00

38 lines
1.5 KiB
Python

from abc import ABC, abstractmethod
from typing import Any, Dict, Optional
from pydantic import BaseModel, Field
from crewai_tools.rag.misc import compute_sha256
from crewai_tools.rag.source_content import SourceContent
class LoaderResult(BaseModel):
content: str = Field(description="The text content of the source")
source: str = Field(description="The source of the content", default="unknown")
metadata: Dict[str, Any] = Field(description="The metadata of the source", default_factory=dict)
doc_id: str = Field(description="The id of the document")
class BaseLoader(ABC):
def __init__(self, config: Optional[Dict[str, Any]] = None):
self.config = config or {}
@abstractmethod
def load(self, content: SourceContent, **kwargs) -> LoaderResult:
...
def generate_doc_id(self, source_ref: str | None = None, content: str | None = None) -> str:
"""
Generate a unique document id based on the source reference and content.
If the source reference is not provided, the content is used as the source reference.
If the content is not provided, the source reference is used as the content.
If both are provided, the source reference is used as the content.
Both are optional because the TEXT content type does not have a source reference. In this case, the content is used as the source reference.
"""
source_ref = source_ref or ""
content = content or ""
return compute_sha256(source_ref + content)