feat: add crewai-tools workspace and fix tests/dependencies

* feat: add crewai-tools workspace structure

* Squashed 'temp-crewai-tools/' content from commit 9bae5633

git-subtree-dir: temp-crewai-tools
git-subtree-split: 9bae56339096cb70f03873e600192bd2cd207ac9

* feat: configure crewai-tools workspace package with dependencies

* fix: apply ruff auto-formatting to crewai-tools code

* chore: update lockfile

* fix: don't allow tool tests yet

* fix: comment out extra pytest flags for now

* fix: remove conflicting conftest.py from crewai-tools tests

* fix: resolve dependency conflicts and test issues

- Pin vcrpy to 7.0.0 to fix pytest-recording compatibility
- Comment out types-requests to resolve urllib3 conflict
- Update requests requirement in crewai-tools to >=2.32.0
This commit is contained in:
Greyson LaLonde
2025-09-28 00:05:42 -04:00
committed by GitHub
parent c591c1ac87
commit 289b90f00a
304 changed files with 46489 additions and 376 deletions

View File

@@ -0,0 +1,67 @@
import re
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
from crewai_tools.rag.source_content import SourceContent
class MDXLoader(BaseLoader):
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
source_ref = source_content.source_ref
content = source_content.source
if source_content.is_url():
content = self._load_from_url(source_ref, kwargs)
elif source_content.path_exists():
content = self._load_from_file(source_ref)
return self._parse_mdx(content, source_ref)
def _load_from_url(self, url: str, kwargs: dict) -> str:
import requests
headers = kwargs.get(
"headers",
{
"Accept": "text/markdown, text/x-markdown, text/plain",
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools MDXLoader)",
},
)
try:
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()
return response.text
except Exception as e:
raise ValueError(f"Error fetching MDX from URL {url}: {e!s}")
def _load_from_file(self, path: str) -> str:
with open(path, "r", encoding="utf-8") as file:
return file.read()
def _parse_mdx(self, content: str, source_ref: str) -> LoaderResult:
cleaned_content = content
# Remove import statements
cleaned_content = re.sub(
r"^import\s+.*?\n", "", cleaned_content, flags=re.MULTILINE
)
# Remove export statements
cleaned_content = re.sub(
r"^export\s+.*?(?:\n|$)", "", cleaned_content, flags=re.MULTILINE
)
# Remove JSX tags (simple approach)
cleaned_content = re.sub(r"<[^>]+>", "", cleaned_content)
# Clean up extra whitespace
cleaned_content = re.sub(r"\n\s*\n\s*\n", "\n\n", cleaned_content)
cleaned_content = cleaned_content.strip()
metadata = {"format": "mdx"}
return LoaderResult(
content=cleaned_content,
source=source_ref,
metadata=metadata,
doc_id=self.generate_doc_id(source_ref=source_ref, content=cleaned_content),
)