mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 16:48:30 +00:00
git-subtree-dir: packages/tools git-subtree-split: 78317b9c127f18bd040c1d77e3c0840cdc9a5b38
47 lines
1.2 KiB
Python
47 lines
1.2 KiB
Python
import os
|
|
from urllib.parse import urlparse
|
|
from typing import TYPE_CHECKING
|
|
from pathlib import Path
|
|
from functools import cached_property
|
|
|
|
from crewai_tools.rag.misc import compute_sha256
|
|
|
|
if TYPE_CHECKING:
|
|
from crewai_tools.rag.data_types import DataType
|
|
|
|
|
|
class SourceContent:
|
|
def __init__(self, source: str | Path):
|
|
self.source = str(source)
|
|
|
|
def is_url(self) -> bool:
|
|
if not isinstance(self.source, str):
|
|
return False
|
|
try:
|
|
parsed_url = urlparse(self.source)
|
|
return bool(parsed_url.scheme and parsed_url.netloc)
|
|
except Exception:
|
|
return False
|
|
|
|
def path_exists(self) -> bool:
|
|
return os.path.exists(self.source)
|
|
|
|
@cached_property
|
|
def data_type(self) -> "DataType":
|
|
from crewai_tools.rag.data_types import DataTypes
|
|
|
|
return DataTypes.from_content(self.source)
|
|
|
|
@cached_property
|
|
def source_ref(self) -> str:
|
|
""""
|
|
Returns the source reference for the content.
|
|
If the content is a URL or a local file, returns the source.
|
|
Otherwise, returns the hash of the content.
|
|
"""
|
|
|
|
if self.is_url() or self.path_exists():
|
|
return self.source
|
|
|
|
return compute_sha256(self.source)
|