Files
crewAI/crewai_tools/rag/source_content.py
Greyson Lalonde e16606672a Squashed 'packages/tools/' content from commit 78317b9c
git-subtree-dir: packages/tools
git-subtree-split: 78317b9c127f18bd040c1d77e3c0840cdc9a5b38
2025-09-12 21:58:02 -04:00

47 lines
1.2 KiB
Python

import os
from urllib.parse import urlparse
from typing import TYPE_CHECKING
from pathlib import Path
from functools import cached_property
from crewai_tools.rag.misc import compute_sha256
if TYPE_CHECKING:
from crewai_tools.rag.data_types import DataType
class SourceContent:
def __init__(self, source: str | Path):
self.source = str(source)
def is_url(self) -> bool:
if not isinstance(self.source, str):
return False
try:
parsed_url = urlparse(self.source)
return bool(parsed_url.scheme and parsed_url.netloc)
except Exception:
return False
def path_exists(self) -> bool:
return os.path.exists(self.source)
@cached_property
def data_type(self) -> "DataType":
from crewai_tools.rag.data_types import DataTypes
return DataTypes.from_content(self.source)
@cached_property
def source_ref(self) -> str:
""""
Returns the source reference for the content.
If the content is a URL or a local file, returns the source.
Otherwise, returns the hash of the content.
"""
if self.is_url() or self.path_exists():
return self.source
return compute_sha256(self.source)