Files
crewAI/lib/crewai-tools/src/crewai_tools/rag/base_loader.py
2025-10-02 11:06:38 -04:00

40 lines
1.5 KiB
Python

from abc import ABC, abstractmethod
from typing import Any
from pydantic import BaseModel, Field
from crewai_tools.rag.misc import compute_sha256
from crewai_tools.rag.source_content import SourceContent
class LoaderResult(BaseModel):
content: str = Field(description="The text content of the source")
source: str = Field(description="The source of the content", default="unknown")
metadata: dict[str, Any] = Field(
description="The metadata of the source", default_factory=dict
)
doc_id: str = Field(description="The id of the document")
class BaseLoader(ABC):
def __init__(self, config: dict[str, Any] | None = None):
self.config = config or {}
@abstractmethod
def load(self, content: SourceContent, **kwargs) -> LoaderResult: ...
def generate_doc_id(
self, source_ref: str | None = None, content: str | None = None
) -> str:
"""Generate a unique document id based on the source reference and content.
If the source reference is not provided, the content is used as the source reference.
If the content is not provided, the source reference is used as the content.
If both are provided, the source reference is used as the content.
Both are optional because the TEXT content type does not have a source reference. In this case, the content is used as the source reference.
"""
source_ref = source_ref or ""
content = content or ""
return compute_sha256(source_ref + content)