mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-12 09:38:31 +00:00
- Merged upstream changes from crewAI-tools main branch - Resolved conflicts due to monorepo structure (crewai_tools -> src/crewai_tools) - Removed deprecated embedchain adapters - Added new RAG loaders and crewai_rag_adapter - Consolidated dependencies in pyproject.toml Fixed critical linting issues: - Added ClassVar annotations for mutable class attributes - Added timeouts to requests calls (30s default) - Fixed exception handling with proper 'from' clauses - Added noqa comments for public API functions (backward compatibility) - Updated ruff config to ignore expected patterns: - F401 in __init__ files (intentional re-exports) - S101 in test files (assertions are expected) - S607 for subprocess calls (uv/pip commands are safe) Remaining issues are from upstream code and will be addressed in separate PRs.
47 lines
1.2 KiB
Python
47 lines
1.2 KiB
Python
import os
|
|
from functools import cached_property
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
from urllib.parse import urlparse
|
|
|
|
from crewai_tools.rag.misc import compute_sha256
|
|
|
|
if TYPE_CHECKING:
|
|
from crewai_tools.rag.data_types import DataType
|
|
|
|
|
|
class SourceContent:
|
|
def __init__(self, source: str | Path):
|
|
self.source = str(source)
|
|
|
|
def is_url(self) -> bool:
|
|
if not isinstance(self.source, str):
|
|
return False
|
|
try:
|
|
parsed_url = urlparse(self.source)
|
|
return bool(parsed_url.scheme and parsed_url.netloc)
|
|
except Exception:
|
|
return False
|
|
|
|
def path_exists(self) -> bool:
|
|
return os.path.exists(self.source)
|
|
|
|
@cached_property
|
|
def data_type(self) -> "DataType":
|
|
from crewai_tools.rag.data_types import DataTypes
|
|
|
|
return DataTypes.from_content(self.source)
|
|
|
|
@cached_property
|
|
def source_ref(self) -> str:
|
|
""" "
|
|
Returns the source reference for the content.
|
|
If the content is a URL or a local file, returns the source.
|
|
Otherwise, returns the hash of the content.
|
|
"""
|
|
|
|
if self.is_url() or self.path_exists():
|
|
return self.source
|
|
|
|
return compute_sha256(self.source)
|