mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-05 06:08:29 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc83c1ead5 | ||
|
|
7578901f6d | ||
|
|
d1343b96ed | ||
|
|
42f2b4d551 | ||
|
|
0229390ad1 | ||
|
|
f0fb349ddf | ||
|
|
bf2e2a42da | ||
|
|
814c962196 | ||
|
|
2ebb2e845f | ||
|
|
7b550ebfe8 |
4
.github/workflows/publish.yml
vendored
4
.github/workflows/publish.yml
vendored
@@ -7,7 +7,6 @@ on:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: github.event.release.prerelease == true
|
||||
name: Build packages
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
@@ -25,7 +24,7 @@ jobs:
|
||||
|
||||
- name: Build packages
|
||||
run: |
|
||||
uv build --prerelease="allow" --all-packages
|
||||
uv build --all-packages
|
||||
rm dist/.gitignore
|
||||
|
||||
- name: Upload artifacts
|
||||
@@ -35,7 +34,6 @@ jobs:
|
||||
path: dist/
|
||||
|
||||
publish:
|
||||
if: github.event.release.prerelease == true
|
||||
name: Publish to PyPI
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -3,19 +3,24 @@ repos:
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff
|
||||
entry: uv run ruff check
|
||||
entry: bash -c 'source .venv/bin/activate && uv run ruff check --config pyproject.toml "$@"' --
|
||||
language: system
|
||||
pass_filenames: true
|
||||
types: [python]
|
||||
exclude: ^lib/crewai/
|
||||
- id: ruff-format
|
||||
name: ruff-format
|
||||
entry: uv run ruff format
|
||||
entry: bash -c 'source .venv/bin/activate && uv run ruff format --config pyproject.toml "$@"' --
|
||||
language: system
|
||||
pass_filenames: true
|
||||
types: [python]
|
||||
exclude: ^lib/crewai/
|
||||
- id: mypy
|
||||
name: mypy
|
||||
entry: uv run mypy
|
||||
entry: bash -c 'source .venv/bin/activate && uv run mypy --config-file pyproject.toml "$@"' --
|
||||
language: system
|
||||
pass_filenames: true
|
||||
types: [python]
|
||||
exclude: ^lib/crewai/
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
rev: 0.9.3
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
|
||||
|
||||
@@ -134,6 +134,7 @@
|
||||
"group": "MCP Integration",
|
||||
"pages": [
|
||||
"en/mcp/overview",
|
||||
"en/mcp/dsl-integration",
|
||||
"en/mcp/stdio",
|
||||
"en/mcp/sse",
|
||||
"en/mcp/streamable-http",
|
||||
@@ -570,6 +571,7 @@
|
||||
"group": "Integração MCP",
|
||||
"pages": [
|
||||
"pt-BR/mcp/overview",
|
||||
"pt-BR/mcp/dsl-integration",
|
||||
"pt-BR/mcp/stdio",
|
||||
"pt-BR/mcp/sse",
|
||||
"pt-BR/mcp/streamable-http",
|
||||
@@ -825,6 +827,12 @@
|
||||
"group": "Triggers",
|
||||
"pages": [
|
||||
"pt-BR/enterprise/guides/automation-triggers",
|
||||
"pt-BR/enterprise/guides/gmail-trigger",
|
||||
"pt-BR/enterprise/guides/google-calendar-trigger",
|
||||
"pt-BR/enterprise/guides/google-drive-trigger",
|
||||
"pt-BR/enterprise/guides/outlook-trigger",
|
||||
"pt-BR/enterprise/guides/onedrive-trigger",
|
||||
"pt-BR/enterprise/guides/microsoft-teams-trigger",
|
||||
"pt-BR/enterprise/guides/slack-trigger",
|
||||
"pt-BR/enterprise/guides/hubspot-trigger",
|
||||
"pt-BR/enterprise/guides/salesforce-trigger",
|
||||
@@ -983,6 +991,7 @@
|
||||
"group": "MCP 통합",
|
||||
"pages": [
|
||||
"ko/mcp/overview",
|
||||
"ko/mcp/dsl-integration",
|
||||
"ko/mcp/stdio",
|
||||
"ko/mcp/sse",
|
||||
"ko/mcp/streamable-http",
|
||||
@@ -1250,6 +1259,12 @@
|
||||
"group": "트리거",
|
||||
"pages": [
|
||||
"ko/enterprise/guides/automation-triggers",
|
||||
"ko/enterprise/guides/gmail-trigger",
|
||||
"ko/enterprise/guides/google-calendar-trigger",
|
||||
"ko/enterprise/guides/google-drive-trigger",
|
||||
"ko/enterprise/guides/outlook-trigger",
|
||||
"ko/enterprise/guides/onedrive-trigger",
|
||||
"ko/enterprise/guides/microsoft-teams-trigger",
|
||||
"ko/enterprise/guides/slack-trigger",
|
||||
"ko/enterprise/guides/hubspot-trigger",
|
||||
"ko/enterprise/guides/salesforce-trigger",
|
||||
|
||||
@@ -57,6 +57,22 @@ Tools & Integrations is the central hub for connecting third‑party apps and ma
|
||||
uv add crewai-tools
|
||||
```
|
||||
|
||||
### Environment Variable Setup
|
||||
|
||||
<Note>
|
||||
To use integrations with `Agent(apps=[])`, you must set the `CREWAI_PLATFORM_INTEGRATION_TOKEN` environment variable with your Enterprise Token.
|
||||
</Note>
|
||||
|
||||
```bash
|
||||
export CREWAI_PLATFORM_INTEGRATION_TOKEN="your_enterprise_token"
|
||||
```
|
||||
|
||||
Or add it to your `.env` file:
|
||||
|
||||
```
|
||||
CREWAI_PLATFORM_INTEGRATION_TOKEN=your_enterprise_token
|
||||
```
|
||||
|
||||
### Usage Example
|
||||
|
||||
<Tip>
|
||||
|
||||
@@ -117,27 +117,50 @@ Before wiring a trigger into production, make sure you:
|
||||
- Decide whether to pass trigger context automatically using `allow_crewai_trigger_context`
|
||||
- Set up monitoring—webhook logs, CrewAI execution history, and optional external alerting
|
||||
|
||||
### Payload & Crew Examples Repository
|
||||
### Testing Triggers Locally with CLI
|
||||
|
||||
We maintain a comprehensive repository with end-to-end trigger examples to help you build and test your automations:
|
||||
The CrewAI CLI provides powerful commands to help you develop and test trigger-driven automations without deploying to production.
|
||||
|
||||
This repository contains:
|
||||
#### List Available Triggers
|
||||
|
||||
- **Realistic payload samples** for every supported trigger integration
|
||||
- **Ready-to-run crew implementations** that parse each payload and turn it into a business workflow
|
||||
- **Multiple scenarios per integration** (e.g., new events, updates, deletions) so you can match the shape of your data
|
||||
View all available triggers for your connected integrations:
|
||||
|
||||
| Integration | When it fires | Payload Samples | Crew Examples |
|
||||
| :-- | :-- | :-- | :-- |
|
||||
| Gmail | New messages, thread updates | [New alerts, thread updates](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) | [`new-email-crew.py`, `gmail-alert-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) |
|
||||
| Google Calendar | Event created / updated / started / ended / cancelled | [Event lifecycle payloads](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) | [`calendar-event-crew.py`, `calendar-meeting-crew.py`, `calendar-working-location-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) |
|
||||
| Google Drive | File created / updated / deleted | [File lifecycle payloads](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) | [`drive-file-crew.py`, `drive-file-deletion-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) |
|
||||
| Outlook | New email, calendar event removed | [Outlook payloads](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) | [`outlook-message-crew.py`, `outlook-event-removal-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) |
|
||||
| OneDrive | File operations (create, update, share, delete) | [OneDrive payloads](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) | [`onedrive-file-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) |
|
||||
| HubSpot | Record created / updated (contacts, companies, deals) | [HubSpot payloads](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/hubspot) | [`hubspot-company-crew.py`, `hubspot-contact-crew.py`, `hubspot-record-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/hubspot) |
|
||||
| Microsoft Teams | Chat thread created | [Teams chat payload](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) | [`teams-chat-created-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) |
|
||||
```bash
|
||||
crewai triggers list
|
||||
```
|
||||
|
||||
This command displays all triggers available based on your connected integrations, showing:
|
||||
- Integration name and connection status
|
||||
- Available trigger types
|
||||
- Trigger names and descriptions
|
||||
|
||||
#### Simulate Trigger Execution
|
||||
|
||||
Test your crew with realistic trigger payloads before deployment:
|
||||
|
||||
```bash
|
||||
crewai triggers run <trigger_name>
|
||||
```
|
||||
|
||||
For example:
|
||||
|
||||
```bash
|
||||
crewai triggers run microsoft_onedrive/file_changed
|
||||
```
|
||||
|
||||
This command:
|
||||
- Executes your crew locally
|
||||
- Passes a complete, realistic trigger payload
|
||||
- Simulates exactly how your crew will be called in production
|
||||
|
||||
<Warning>
|
||||
**Important Development Notes:**
|
||||
- Use `crewai triggers run <trigger>` to simulate trigger execution during development
|
||||
- Using `crewai run` will NOT simulate trigger calls and won't pass the trigger payload
|
||||
- After deployment, your crew will be executed with the actual trigger payload
|
||||
- If your crew expects parameters that aren't in the trigger payload, execution may fail
|
||||
</Warning>
|
||||
|
||||
Use these samples to understand payload shape, copy the matching crew, and then replace the test payload with your live trigger data.
|
||||
|
||||
### Triggers with Crew
|
||||
|
||||
@@ -241,15 +264,20 @@ def delegate_to_crew(self, crewai_trigger_payload: dict = None):
|
||||
## Troubleshooting
|
||||
|
||||
**Trigger not firing:**
|
||||
- Verify the trigger is enabled
|
||||
- Check integration connection status
|
||||
- Verify the trigger is enabled in your deployment's Triggers tab
|
||||
- Check integration connection status under Tools & Integrations
|
||||
- Ensure all required environment variables are properly configured
|
||||
|
||||
**Execution failures:**
|
||||
- Check the execution logs for error details
|
||||
- If you are developing, make sure the inputs include the `crewai_trigger_payload` parameter with the correct payload
|
||||
- Use `crewai triggers run <trigger_name>` to test locally and see the exact payload structure
|
||||
- Verify your crew can handle the `crewai_trigger_payload` parameter
|
||||
- Ensure your crew doesn't expect parameters that aren't included in the trigger payload
|
||||
|
||||
**Development issues:**
|
||||
- Always test with `crewai triggers run <trigger>` before deploying to see the complete payload
|
||||
- Remember that `crewai run` does NOT simulate trigger calls—use `crewai triggers run` instead
|
||||
- Use `crewai triggers list` to verify which triggers are available for your connected integrations
|
||||
- After deployment, your crew will receive the actual trigger payload, so test thoroughly locally first
|
||||
|
||||
Automation triggers transform your CrewAI deployments into responsive, event-driven systems that can seamlessly integrate with your existing business processes and tools.
|
||||
|
||||
<Card title="CrewAI AMP Trigger Examples" href="https://github.com/crewAIInc/crewai-enterprise-trigger-examples" icon="github">
|
||||
Check them out on GitHub!
|
||||
</Card>
|
||||
|
||||
@@ -51,16 +51,25 @@ class GmailProcessingCrew:
|
||||
)
|
||||
```
|
||||
|
||||
The Gmail payload will be available via the standard context mechanisms. See the payload samples repository for structure and fields.
|
||||
The Gmail payload will be available via the standard context mechanisms.
|
||||
|
||||
### Sample payloads & crews
|
||||
### Testing Locally
|
||||
|
||||
The [CrewAI AMP Trigger Examples repository](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) includes:
|
||||
Test your Gmail trigger integration locally using the CrewAI CLI:
|
||||
|
||||
- `new-email-payload-1.json` / `new-email-payload-2.json` — production-style new message alerts with matching crews in `new-email-crew.py`
|
||||
- `thread-updated-sample-1.json` — follow-up messages on an existing thread, processed by `gmail-alert-crew.py`
|
||||
```bash
|
||||
# View all available triggers
|
||||
crewai triggers list
|
||||
|
||||
Use these samples to validate your parsing logic locally before wiring the trigger to your live Gmail accounts.
|
||||
# Simulate a Gmail trigger with realistic payload
|
||||
crewai triggers run gmail/new_email
|
||||
```
|
||||
|
||||
The `crewai triggers run` command will execute your crew with a complete Gmail payload, allowing you to test your parsing logic before deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run gmail/new_email` (not `crewai run`) to simulate trigger execution during development. After deployment, your crew will automatically receive the trigger payload.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -70,16 +79,10 @@ Track history and performance of triggered runs:
|
||||
<img src="/images/enterprise/list-executions.png" alt="List of executions triggered by automation" />
|
||||
</Frame>
|
||||
|
||||
## Payload Reference
|
||||
|
||||
See the sample payloads and field descriptions:
|
||||
|
||||
<Card title="Gmail samples in Trigger Examples Repo" href="https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail" icon="envelopes-bulk">
|
||||
Gmail samples in Trigger Examples Repo
|
||||
</Card>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure Gmail is connected in Tools & Integrations
|
||||
- Verify the Gmail Trigger is enabled on the Triggers tab
|
||||
- Test locally with `crewai triggers run gmail/new_email` to see the exact payload structure
|
||||
- Check the execution logs and confirm the payload is passed as `crewai_trigger_payload`
|
||||
- Remember: use `crewai triggers run` (not `crewai run`) to simulate trigger execution
|
||||
|
||||
@@ -39,16 +39,23 @@ print(result.raw)
|
||||
|
||||
Use `crewai_trigger_payload` exactly as it is delivered by the trigger so the crew can extract the proper fields.
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testing Locally
|
||||
|
||||
The [Google Calendar examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) show how to handle multiple event types:
|
||||
Test your Google Calendar trigger integration locally using the CrewAI CLI:
|
||||
|
||||
- `new-event.json` → standard event creation handled by `calendar-event-crew.py`
|
||||
- `event-updated.json` / `event-started.json` / `event-ended.json` → in-flight updates processed by `calendar-meeting-crew.py`
|
||||
- `event-canceled.json` → cancellation workflow that alerts attendees via `calendar-meeting-crew.py`
|
||||
- Working location events use `calendar-working-location-crew.py` to extract on-site schedules
|
||||
```bash
|
||||
# View all available triggers
|
||||
crewai triggers list
|
||||
|
||||
Each crew transforms raw event metadata (attendees, rooms, working locations) into the summaries your teams need.
|
||||
# Simulate a Google Calendar trigger with realistic payload
|
||||
crewai triggers run google_calendar/event_changed
|
||||
```
|
||||
|
||||
The `crewai triggers run` command will execute your crew with a complete Calendar payload, allowing you to test your parsing logic before deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run google_calendar/event_changed` (not `crewai run`) to simulate trigger execution during development. After deployment, your crew will automatically receive the trigger payload.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -61,5 +68,7 @@ The **Executions** list in the deployment dashboard tracks every triggered run a
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the correct Google account is connected and the trigger is enabled
|
||||
- Test locally with `crewai triggers run google_calendar/event_changed` to see the exact payload structure
|
||||
- Confirm your workflow handles all-day events (payloads use `start.date` and `end.date` instead of timestamps)
|
||||
- Check execution logs if reminders or attendee arrays are missing—calendar permissions can limit fields in the payload
|
||||
- Remember: use `crewai triggers run` (not `crewai run`) to simulate trigger execution
|
||||
|
||||
@@ -36,15 +36,23 @@ crew.kickoff({
|
||||
})
|
||||
```
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testing Locally
|
||||
|
||||
Explore the [Google Drive examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) to cover different operations:
|
||||
Test your Google Drive trigger integration locally using the CrewAI CLI:
|
||||
|
||||
- `new-file.json` → new uploads processed by `drive-file-crew.py`
|
||||
- `updated-file.json` → file edits and metadata changes handled by `drive-file-crew.py`
|
||||
- `deleted-file.json` → deletion events routed through `drive-file-deletion-crew.py`
|
||||
```bash
|
||||
# View all available triggers
|
||||
crewai triggers list
|
||||
|
||||
Each crew highlights the file name, operation type, owner, permissions, and security considerations so downstream systems can respond appropriately.
|
||||
# Simulate a Google Drive trigger with realistic payload
|
||||
crewai triggers run google_drive/file_changed
|
||||
```
|
||||
|
||||
The `crewai triggers run` command will execute your crew with a complete Drive payload, allowing you to test your parsing logic before deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run google_drive/file_changed` (not `crewai run`) to simulate trigger execution during development. After deployment, your crew will automatically receive the trigger payload.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -57,5 +65,7 @@ Track history and performance of triggered runs with the **Executions** list in
|
||||
## Troubleshooting
|
||||
|
||||
- Verify Google Drive is connected and the trigger toggle is enabled
|
||||
- Test locally with `crewai triggers run google_drive/file_changed` to see the exact payload structure
|
||||
- If a payload is missing permission data, ensure the connected account has access to the file or folder
|
||||
- The trigger sends file IDs only; use the Drive API if you need to fetch binary content during the crew run
|
||||
- Remember: use `crewai triggers run` (not `crewai run`) to simulate trigger execution
|
||||
|
||||
@@ -49,16 +49,4 @@ This guide provides a step-by-step process to set up HubSpot triggers for CrewAI
|
||||
</Step>
|
||||
</Steps>
|
||||
|
||||
## Additional Resources
|
||||
|
||||
### Sample payloads & crews
|
||||
|
||||
You can jump-start development with the [HubSpot examples in the trigger repository](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/hubspot):
|
||||
|
||||
- `record-created-contact.json`, `record-updated-contact.json` → contact lifecycle events handled by `hubspot-contact-crew.py`
|
||||
- `record-created-company.json`, `record-updated-company.json` → company enrichment flows in `hubspot-company-crew.py`
|
||||
- `record-created-deals.json`, `record-updated-deals.json` → deal pipeline automation in `hubspot-record-crew.py`
|
||||
|
||||
Each crew demonstrates how to parse HubSpot record fields, enrich context, and return structured insights.
|
||||
|
||||
For more detailed information on available actions and customization options, refer to the [HubSpot Workflows Documentation](https://knowledge.hubspot.com/workflows/create-workflows).
|
||||
|
||||
@@ -37,16 +37,28 @@ print(result.raw)
|
||||
|
||||
The crew parses thread metadata (subject, created time, roster) and generates an action plan for the receiving team.
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testing Locally
|
||||
|
||||
The [Microsoft Teams examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) include:
|
||||
Test your Microsoft Teams trigger integration locally using the CrewAI CLI:
|
||||
|
||||
- `chat-created.json` → chat creation payload processed by `teams-chat-created-crew.py`
|
||||
```bash
|
||||
# View all available triggers
|
||||
crewai triggers list
|
||||
|
||||
The crew demonstrates how to extract participants, initial messages, tenant information, and compliance metadata from the Microsoft Graph webhook payload.
|
||||
# Simulate a Microsoft Teams trigger with realistic payload
|
||||
crewai triggers run microsoft_teams/teams_message_created
|
||||
```
|
||||
|
||||
The `crewai triggers run` command will execute your crew with a complete Teams payload, allowing you to test your parsing logic before deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run microsoft_teams/teams_message_created` (not `crewai run`) to simulate trigger execution during development. After deployment, your crew will automatically receive the trigger payload.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the Teams connection is active; it must be refreshed if the tenant revokes permissions
|
||||
- Test locally with `crewai triggers run microsoft_teams/teams_message_created` to see the exact payload structure
|
||||
- Confirm the webhook subscription in Microsoft 365 is still valid if payloads stop arriving
|
||||
- Review execution logs for payload shape mismatches—Graph notifications may omit fields when a chat is private or restricted
|
||||
- Remember: use `crewai triggers run` (not `crewai run`) to simulate trigger execution
|
||||
|
||||
@@ -36,18 +36,28 @@ crew.kickoff({
|
||||
|
||||
The crew inspects file metadata, user activity, and permission changes to produce a compliance-friendly summary.
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testing Locally
|
||||
|
||||
The [OneDrive examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) showcase how to:
|
||||
Test your OneDrive trigger integration locally using the CrewAI CLI:
|
||||
|
||||
- Parse file metadata, size, and folder paths
|
||||
- Track who created and last modified the file
|
||||
- Highlight permission and external sharing changes
|
||||
```bash
|
||||
# View all available triggers
|
||||
crewai triggers list
|
||||
|
||||
`onedrive-file-crew.py` bundles the analysis and summarization tasks so you can add remediation steps as needed.
|
||||
# Simulate a OneDrive trigger with realistic payload
|
||||
crewai triggers run microsoft_onedrive/file_changed
|
||||
```
|
||||
|
||||
The `crewai triggers run` command will execute your crew with a complete OneDrive payload, allowing you to test your parsing logic before deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run microsoft_onedrive/file_changed` (not `crewai run`) to simulate trigger execution during development. After deployment, your crew will automatically receive the trigger payload.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the connected account has permission to read the file metadata included in the webhook
|
||||
- Test locally with `crewai triggers run microsoft_onedrive/file_changed` to see the exact payload structure
|
||||
- If the trigger fires but the payload is missing `permissions`, confirm the site-level sharing settings allow Graph to return this field
|
||||
- For large tenants, filter notifications upstream so the crew only runs on relevant directories
|
||||
- Remember: use `crewai triggers run` (not `crewai run`) to simulate trigger execution
|
||||
|
||||
@@ -36,17 +36,28 @@ crew.kickoff({
|
||||
|
||||
The crew extracts sender details, subject, body preview, and attachments before generating a structured response.
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testing Locally
|
||||
|
||||
Review the [Outlook examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) for two common scenarios:
|
||||
Test your Outlook trigger integration locally using the CrewAI CLI:
|
||||
|
||||
- `new-message.json` → new mail notifications parsed by `outlook-message-crew.py`
|
||||
- `event-removed.json` → calendar cleanup handled by `outlook-event-removal-crew.py`
|
||||
```bash
|
||||
# View all available triggers
|
||||
crewai triggers list
|
||||
|
||||
Each crew demonstrates how to handle Microsoft Graph payloads, normalize headers, and keep humans in-the-loop with concise summaries.
|
||||
# Simulate an Outlook trigger with realistic payload
|
||||
crewai triggers run microsoft_outlook/email_received
|
||||
```
|
||||
|
||||
The `crewai triggers run` command will execute your crew with a complete Outlook payload, allowing you to test your parsing logic before deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run microsoft_outlook/email_received` (not `crewai run`) to simulate trigger execution during development. After deployment, your crew will automatically receive the trigger payload.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Verify the Outlook connector is still authorized; the subscription must be renewed periodically
|
||||
- Test locally with `crewai triggers run microsoft_outlook/email_received` to see the exact payload structure
|
||||
- If attachments are missing, confirm the webhook subscription includes the `includeResourceData` flag
|
||||
- Review execution logs when events fail to match—cancellation payloads lack attendee lists by design and the crew should account for that
|
||||
- Remember: use `crewai triggers run` (not `crewai run`) to simulate trigger execution
|
||||
|
||||
344
docs/en/mcp/dsl-integration.mdx
Normal file
344
docs/en/mcp/dsl-integration.mdx
Normal file
@@ -0,0 +1,344 @@
|
||||
---
|
||||
title: MCP DSL Integration
|
||||
description: Learn how to use CrewAI's simple DSL syntax to integrate MCP servers directly with your agents using the mcps field.
|
||||
icon: code
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
CrewAI's MCP DSL (Domain Specific Language) integration provides the **simplest way** to connect your agents to MCP (Model Context Protocol) servers. Just add an `mcps` field to your agent and CrewAI handles all the complexity automatically.
|
||||
|
||||
<Info>
|
||||
This is the **recommended approach** for most MCP use cases. For advanced scenarios requiring manual connection management, see [MCPServerAdapter](/en/mcp/overview#advanced-mcpserveradapter).
|
||||
</Info>
|
||||
|
||||
## Basic Usage
|
||||
|
||||
Add MCP servers to your agent using the `mcps` field:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="Research Assistant",
|
||||
goal="Help with research and analysis tasks",
|
||||
backstory="Expert assistant with access to advanced research tools",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=research"
|
||||
]
|
||||
)
|
||||
|
||||
# MCP tools are now automatically available!
|
||||
# No need for manual connection management or tool configuration
|
||||
```
|
||||
|
||||
## Supported Reference Formats
|
||||
|
||||
### External MCP Remote Servers
|
||||
|
||||
```python
|
||||
# Basic HTTPS server
|
||||
"https://api.example.com/mcp"
|
||||
|
||||
# Server with authentication
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=your_profile"
|
||||
|
||||
# Server with custom path
|
||||
"https://services.company.com/api/v1/mcp"
|
||||
```
|
||||
|
||||
### Specific Tool Selection
|
||||
|
||||
Use the `#` syntax to select specific tools from a server:
|
||||
|
||||
```python
|
||||
# Get only the forecast tool from weather server
|
||||
"https://weather.api.com/mcp#get_forecast"
|
||||
|
||||
# Get only the search tool from Exa
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key#web_search_exa"
|
||||
```
|
||||
|
||||
### CrewAI AMP Marketplace
|
||||
|
||||
Access tools from the CrewAI AMP marketplace:
|
||||
|
||||
```python
|
||||
# Full service with all tools
|
||||
"crewai-amp:financial-data"
|
||||
|
||||
# Specific tool from AMP service
|
||||
"crewai-amp:research-tools#pubmed_search"
|
||||
|
||||
# Multiple AMP services
|
||||
mcps=[
|
||||
"crewai-amp:weather-insights",
|
||||
"crewai-amp:market-analysis",
|
||||
"crewai-amp:social-media-monitoring"
|
||||
]
|
||||
```
|
||||
|
||||
## Complete Example
|
||||
|
||||
Here's a complete example using multiple MCP servers:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, Process
|
||||
|
||||
# Create agent with multiple MCP sources
|
||||
multi_source_agent = Agent(
|
||||
role="Multi-Source Research Analyst",
|
||||
goal="Conduct comprehensive research using multiple data sources",
|
||||
backstory="""Expert researcher with access to web search, weather data,
|
||||
financial information, and academic research tools""",
|
||||
mcps=[
|
||||
# External MCP servers
|
||||
"https://mcp.exa.ai/mcp?api_key=your_exa_key&profile=research",
|
||||
"https://weather.api.com/mcp#get_current_conditions",
|
||||
|
||||
# CrewAI AMP marketplace
|
||||
"crewai-amp:financial-insights",
|
||||
"crewai-amp:academic-research#pubmed_search",
|
||||
"crewai-amp:market-intelligence#competitor_analysis"
|
||||
]
|
||||
)
|
||||
|
||||
# Create comprehensive research task
|
||||
research_task = Task(
|
||||
description="""Research the impact of AI agents on business productivity.
|
||||
Include current weather impacts on remote work, financial market trends,
|
||||
and recent academic publications on AI agent frameworks.""",
|
||||
expected_output="""Comprehensive report covering:
|
||||
1. AI agent business impact analysis
|
||||
2. Weather considerations for remote work
|
||||
3. Financial market trends related to AI
|
||||
4. Academic research citations and insights
|
||||
5. Competitive landscape analysis""",
|
||||
agent=multi_source_agent
|
||||
)
|
||||
|
||||
# Create and execute crew
|
||||
research_crew = Crew(
|
||||
agents=[multi_source_agent],
|
||||
tasks=[research_task],
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
result = research_crew.kickoff()
|
||||
print(f"Research completed with {len(multi_source_agent.mcps)} MCP data sources")
|
||||
```
|
||||
|
||||
## Tool Naming and Organization
|
||||
|
||||
CrewAI automatically handles tool naming to prevent conflicts:
|
||||
|
||||
```python
|
||||
# Original MCP server has tools: "search", "analyze"
|
||||
# CrewAI creates tools: "mcp_exa_ai_search", "mcp_exa_ai_analyze"
|
||||
|
||||
agent = Agent(
|
||||
role="Tool Organization Demo",
|
||||
goal="Show how tool naming works",
|
||||
backstory="Demonstrates automatic tool organization",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=key", # Tools: mcp_exa_ai_*
|
||||
"https://weather.service.com/mcp", # Tools: weather_service_com_*
|
||||
"crewai-amp:financial-data" # Tools: financial_data_*
|
||||
]
|
||||
)
|
||||
|
||||
# Each server's tools get unique prefixes based on the server name
|
||||
# This prevents naming conflicts between different MCP servers
|
||||
```
|
||||
|
||||
## Error Handling and Resilience
|
||||
|
||||
The MCP DSL is designed to be robust and user-friendly:
|
||||
|
||||
### Graceful Server Failures
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
role="Resilient Researcher",
|
||||
goal="Research despite server issues",
|
||||
backstory="Experienced researcher who adapts to available tools",
|
||||
mcps=[
|
||||
"https://primary-server.com/mcp", # Primary data source
|
||||
"https://backup-server.com/mcp", # Backup if primary fails
|
||||
"https://unreachable-server.com/mcp", # Will be skipped with warning
|
||||
"crewai-amp:reliable-service" # Reliable AMP service
|
||||
]
|
||||
)
|
||||
|
||||
# Agent will:
|
||||
# 1. Successfully connect to working servers
|
||||
# 2. Log warnings for failing servers
|
||||
# 3. Continue with available tools
|
||||
# 4. Not crash or hang on server failures
|
||||
```
|
||||
|
||||
### Timeout Protection
|
||||
|
||||
All MCP operations have built-in timeouts:
|
||||
|
||||
- **Connection timeout**: 10 seconds
|
||||
- **Tool execution timeout**: 30 seconds
|
||||
- **Discovery timeout**: 15 seconds
|
||||
|
||||
```python
|
||||
# These servers will timeout gracefully if unresponsive
|
||||
mcps=[
|
||||
"https://slow-server.com/mcp", # Will timeout after 10s if unresponsive
|
||||
"https://overloaded-api.com/mcp" # Will timeout if discovery takes > 15s
|
||||
]
|
||||
```
|
||||
|
||||
## Performance Features
|
||||
|
||||
### Automatic Caching
|
||||
|
||||
Tool schemas are cached for 5 minutes to improve performance:
|
||||
|
||||
```python
|
||||
# First agent creation - discovers tools from server
|
||||
agent1 = Agent(role="First", goal="Test", backstory="Test",
|
||||
mcps=["https://api.example.com/mcp"])
|
||||
|
||||
# Second agent creation (within 5 minutes) - uses cached tool schemas
|
||||
agent2 = Agent(role="Second", goal="Test", backstory="Test",
|
||||
mcps=["https://api.example.com/mcp"]) # Much faster!
|
||||
```
|
||||
|
||||
### On-Demand Connections
|
||||
|
||||
Tool connections are established only when tools are actually used:
|
||||
|
||||
```python
|
||||
# Agent creation is fast - no MCP connections made yet
|
||||
agent = Agent(
|
||||
role="On-Demand Agent",
|
||||
goal="Use tools efficiently",
|
||||
backstory="Efficient agent that connects only when needed",
|
||||
mcps=["https://api.example.com/mcp"]
|
||||
)
|
||||
|
||||
# MCP connection is made only when a tool is actually executed
|
||||
# This minimizes connection overhead and improves startup performance
|
||||
```
|
||||
|
||||
## Integration with Existing Features
|
||||
|
||||
MCP tools work seamlessly with other CrewAI features:
|
||||
|
||||
```python
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
class CustomTool(BaseTool):
|
||||
name: str = "custom_analysis"
|
||||
description: str = "Custom analysis tool"
|
||||
|
||||
def _run(self, **kwargs):
|
||||
return "Custom analysis result"
|
||||
|
||||
agent = Agent(
|
||||
role="Full-Featured Agent",
|
||||
goal="Use all available tool types",
|
||||
backstory="Agent with comprehensive tool access",
|
||||
|
||||
# All tool types work together
|
||||
tools=[CustomTool()], # Custom tools
|
||||
apps=["gmail", "slack"], # Platform integrations
|
||||
mcps=[ # MCP servers
|
||||
"https://mcp.exa.ai/mcp?api_key=key",
|
||||
"crewai-amp:research-tools"
|
||||
],
|
||||
|
||||
verbose=True,
|
||||
max_iter=15
|
||||
)
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Use Specific Tools When Possible
|
||||
|
||||
```python
|
||||
# Good - only get the tools you need
|
||||
mcps=["https://weather.api.com/mcp#get_forecast"]
|
||||
|
||||
# Less efficient - gets all tools from server
|
||||
mcps=["https://weather.api.com/mcp"]
|
||||
```
|
||||
|
||||
### 2. Handle Authentication Securely
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
# Store API keys in environment variables
|
||||
exa_key = os.getenv("EXA_API_KEY")
|
||||
exa_profile = os.getenv("EXA_PROFILE")
|
||||
|
||||
agent = Agent(
|
||||
role="Secure Agent",
|
||||
goal="Use MCP tools securely",
|
||||
backstory="Security-conscious agent",
|
||||
mcps=[f"https://mcp.exa.ai/mcp?api_key={exa_key}&profile={exa_profile}"]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Plan for Server Failures
|
||||
|
||||
```python
|
||||
# Always include backup options
|
||||
mcps=[
|
||||
"https://primary-api.com/mcp", # Primary choice
|
||||
"https://backup-api.com/mcp", # Backup option
|
||||
"crewai-amp:reliable-service" # AMP fallback
|
||||
]
|
||||
```
|
||||
|
||||
### 4. Use Descriptive Agent Roles
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
role="Weather-Enhanced Market Analyst",
|
||||
goal="Analyze markets considering weather impacts",
|
||||
backstory="Financial analyst with access to weather data for agricultural market insights",
|
||||
mcps=[
|
||||
"https://weather.service.com/mcp#get_forecast",
|
||||
"crewai-amp:financial-data#stock_analysis"
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**No tools discovered:**
|
||||
```python
|
||||
# Check your MCP server URL and authentication
|
||||
# Verify the server is running and accessible
|
||||
mcps=["https://mcp.example.com/mcp?api_key=valid_key"]
|
||||
```
|
||||
|
||||
**Connection timeouts:**
|
||||
```python
|
||||
# Server may be slow or overloaded
|
||||
# CrewAI will log warnings and continue with other servers
|
||||
# Check server status or try backup servers
|
||||
```
|
||||
|
||||
**Authentication failures:**
|
||||
```python
|
||||
# Verify API keys and credentials
|
||||
# Check server documentation for required parameters
|
||||
# Ensure query parameters are properly URL encoded
|
||||
```
|
||||
|
||||
## Advanced: MCPServerAdapter
|
||||
|
||||
For complex scenarios requiring manual connection management, use the `MCPServerAdapter` class from `crewai-tools`. Using a Python context manager (`with` statement) is the recommended approach as it automatically handles starting and stopping the connection to the MCP server.
|
||||
@@ -8,14 +8,39 @@ mode: "wide"
|
||||
## Overview
|
||||
|
||||
The [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP) provides a standardized way for AI agents to provide context to LLMs by communicating with external services, known as MCP Servers.
|
||||
The `crewai-tools` library extends CrewAI's capabilities by allowing you to seamlessly integrate tools from these MCP servers into your agents.
|
||||
This gives your crews access to a vast ecosystem of functionalities.
|
||||
|
||||
CrewAI offers **two approaches** for MCP integration:
|
||||
|
||||
### Simple DSL Integration** (Recommended)
|
||||
|
||||
Use the `mcps` field directly on agents for seamless MCP tool integration:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="Research Analyst",
|
||||
goal="Research and analyze information",
|
||||
backstory="Expert researcher with access to external tools",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key", # External MCP server
|
||||
"https://api.weather.com/mcp#get_forecast", # Specific tool from server
|
||||
"crewai-amp:financial-data", # CrewAI AMP marketplace
|
||||
"crewai-amp:research-tools#pubmed_search" # Specific AMP tool
|
||||
]
|
||||
)
|
||||
# MCP tools are now automatically available to your agent!
|
||||
```
|
||||
|
||||
### 🔧 **Advanced: MCPServerAdapter** (For Complex Scenarios)
|
||||
|
||||
For advanced use cases requiring manual connection management, the `crewai-tools` library provides the `MCPServerAdapter` class.
|
||||
|
||||
We currently support the following transport mechanisms:
|
||||
|
||||
- **Stdio**: for local servers (communication via standard input/output between processes on the same machine)
|
||||
- **Server-Sent Events (SSE)**: for remote servers (unidirectional, real-time data streaming from server to client over HTTP)
|
||||
- **Streamable HTTP**: for remote servers (flexible, potentially bi-directional communication over HTTP, often utilizing SSE for server-to-client streams)
|
||||
- **Streamable HTTPS**: for remote servers (flexible, potentially bi-directional communication over HTTPS, often utilizing SSE for server-to-client streams)
|
||||
|
||||
## Video Tutorial
|
||||
Watch this video tutorial for a comprehensive guide on MCP integration with CrewAI:
|
||||
@@ -31,17 +56,125 @@ Watch this video tutorial for a comprehensive guide on MCP integration with Crew
|
||||
|
||||
## Installation
|
||||
|
||||
Before you start using MCP with `crewai-tools`, you need to install the `mcp` extra `crewai-tools` dependency with the following command:
|
||||
CrewAI MCP integration requires the `mcp` library:
|
||||
|
||||
```shell
|
||||
# For Simple DSL Integration (Recommended)
|
||||
uv add mcp
|
||||
|
||||
# For Advanced MCPServerAdapter usage
|
||||
uv pip install 'crewai-tools[mcp]'
|
||||
```
|
||||
|
||||
## Key Concepts & Getting Started
|
||||
## Quick Start: Simple DSL Integration
|
||||
|
||||
The `MCPServerAdapter` class from `crewai-tools` is the primary way to connect to an MCP server and make its tools available to your CrewAI agents. It supports different transport mechanisms and simplifies connection management.
|
||||
The easiest way to integrate MCP servers is using the `mcps` field on your agents:
|
||||
|
||||
Using a Python context manager (`with` statement) is the **recommended approach** for `MCPServerAdapter`. It automatically handles starting and stopping the connection to the MCP server.
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
# Create agent with MCP tools
|
||||
research_agent = Agent(
|
||||
role="Research Analyst",
|
||||
goal="Find and analyze information using advanced search tools",
|
||||
backstory="Expert researcher with access to multiple data sources",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=your_profile",
|
||||
"crewai-amp:weather-service#current_conditions"
|
||||
]
|
||||
)
|
||||
|
||||
# Create task
|
||||
research_task = Task(
|
||||
description="Research the latest developments in AI agent frameworks",
|
||||
expected_output="Comprehensive research report with citations",
|
||||
agent=research_agent
|
||||
)
|
||||
|
||||
# Create and run crew
|
||||
crew = Crew(agents=[research_agent], tasks=[research_task])
|
||||
result = crew.kickoff()
|
||||
```
|
||||
|
||||
That's it! The MCP tools are automatically discovered and available to your agent.
|
||||
|
||||
## MCP Reference Formats
|
||||
|
||||
The `mcps` field supports various reference formats for maximum flexibility:
|
||||
|
||||
### External MCP Servers
|
||||
|
||||
```python
|
||||
mcps=[
|
||||
# Full server - get all available tools
|
||||
"https://mcp.example.com/api",
|
||||
|
||||
# Specific tool from server using # syntax
|
||||
"https://api.weather.com/mcp#get_current_weather",
|
||||
|
||||
# Server with authentication parameters
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=your_profile"
|
||||
]
|
||||
```
|
||||
|
||||
### CrewAI AMP Marketplace
|
||||
|
||||
```python
|
||||
mcps=[
|
||||
# Full AMP MCP service - get all available tools
|
||||
"crewai-amp:financial-data",
|
||||
|
||||
# Specific tool from AMP service using # syntax
|
||||
"crewai-amp:research-tools#pubmed_search",
|
||||
|
||||
# Multiple AMP services
|
||||
"crewai-amp:weather-service",
|
||||
"crewai-amp:market-analysis"
|
||||
]
|
||||
```
|
||||
|
||||
### Mixed References
|
||||
|
||||
```python
|
||||
mcps=[
|
||||
"https://external-api.com/mcp", # External server
|
||||
"https://weather.service.com/mcp#forecast", # Specific external tool
|
||||
"crewai-amp:financial-insights", # AMP service
|
||||
"crewai-amp:data-analysis#sentiment_tool" # Specific AMP tool
|
||||
]
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
- 🔄 **Automatic Tool Discovery**: Tools are automatically discovered and integrated
|
||||
- 🏷️ **Name Collision Prevention**: Server names are prefixed to tool names
|
||||
- ⚡ **Performance Optimized**: On-demand connections with schema caching
|
||||
- 🛡️ **Error Resilience**: Graceful handling of unavailable servers
|
||||
- ⏱️ **Timeout Protection**: Built-in timeouts prevent hanging connections
|
||||
- 📊 **Transparent Integration**: Works seamlessly with existing CrewAI features
|
||||
|
||||
## Error Handling
|
||||
|
||||
The MCP DSL integration is designed to be resilient:
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
role="Resilient Agent",
|
||||
goal="Continue working despite server issues",
|
||||
backstory="Agent that handles failures gracefully",
|
||||
mcps=[
|
||||
"https://reliable-server.com/mcp", # Will work
|
||||
"https://unreachable-server.com/mcp", # Will be skipped gracefully
|
||||
"https://slow-server.com/mcp", # Will timeout gracefully
|
||||
"crewai-amp:working-service" # Will work
|
||||
]
|
||||
)
|
||||
# Agent will use tools from working servers and log warnings for failing ones
|
||||
```
|
||||
|
||||
## Advanced: MCPServerAdapter
|
||||
|
||||
For complex scenarios requiring manual connection management, use the `MCPServerAdapter` class from `crewai-tools`. Using a Python context manager (`with` statement) is the recommended approach as it automatically handles starting and stopping the connection to the MCP server.
|
||||
|
||||
## Connection Configuration
|
||||
|
||||
@@ -241,11 +374,19 @@ class CrewWithCustomTimeout:
|
||||
## Explore MCP Integrations
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card
|
||||
title="Simple DSL Integration"
|
||||
icon="code"
|
||||
href="/en/mcp/dsl-integration"
|
||||
color="#3B82F6"
|
||||
>
|
||||
**Recommended**: Use the simple `mcps=[]` field syntax for effortless MCP integration.
|
||||
</Card>
|
||||
<Card
|
||||
title="Stdio Transport"
|
||||
icon="server"
|
||||
href="/en/mcp/stdio"
|
||||
color="#3B82F6"
|
||||
color="#10B981"
|
||||
>
|
||||
Connect to local MCP servers via standard input/output. Ideal for scripts and local executables.
|
||||
</Card>
|
||||
@@ -253,7 +394,7 @@ class CrewWithCustomTimeout:
|
||||
title="SSE Transport"
|
||||
icon="wifi"
|
||||
href="/en/mcp/sse"
|
||||
color="#10B981"
|
||||
color="#F59E0B"
|
||||
>
|
||||
Integrate with remote MCP servers using Server-Sent Events for real-time data streaming.
|
||||
</Card>
|
||||
@@ -261,7 +402,7 @@ class CrewWithCustomTimeout:
|
||||
title="Streamable HTTP Transport"
|
||||
icon="globe"
|
||||
href="/en/mcp/streamable-http"
|
||||
color="#F59E0B"
|
||||
color="#8B5CF6"
|
||||
>
|
||||
Utilize flexible Streamable HTTP for robust communication with remote MCP servers.
|
||||
</Card>
|
||||
@@ -269,7 +410,7 @@ class CrewWithCustomTimeout:
|
||||
title="Connecting to Multiple Servers"
|
||||
icon="layer-group"
|
||||
href="/en/mcp/multiple-servers"
|
||||
color="#8B5CF6"
|
||||
color="#EF4444"
|
||||
>
|
||||
Aggregate tools from several MCP servers simultaneously using a single adapter.
|
||||
</Card>
|
||||
@@ -277,7 +418,7 @@ class CrewWithCustomTimeout:
|
||||
title="Security Considerations"
|
||||
icon="lock"
|
||||
href="/en/mcp/security"
|
||||
color="#EF4444"
|
||||
color="#DC2626"
|
||||
>
|
||||
Review important security best practices for MCP integration to keep your agents safe.
|
||||
</Card>
|
||||
|
||||
@@ -57,6 +57,22 @@ mode: "wide"
|
||||
uv add crewai-tools
|
||||
```
|
||||
|
||||
### 환경 변수 설정
|
||||
|
||||
<Note>
|
||||
`Agent(apps=[])`와 함께 통합을 사용하려면 Enterprise Token으로 `CREWAI_PLATFORM_INTEGRATION_TOKEN` 환경 변수를 설정해야 합니다.
|
||||
</Note>
|
||||
|
||||
```bash
|
||||
export CREWAI_PLATFORM_INTEGRATION_TOKEN="your_enterprise_token"
|
||||
```
|
||||
|
||||
또는 `.env` 파일에 추가하세요:
|
||||
|
||||
```
|
||||
CREWAI_PLATFORM_INTEGRATION_TOKEN=your_enterprise_token
|
||||
```
|
||||
|
||||
### 사용 예시
|
||||
|
||||
<Tip>
|
||||
|
||||
@@ -110,19 +110,49 @@ CrewAI AMP 트리거는 팀이 이미 사용하고 있는 도구의 실시간
|
||||
- `allow_crewai_trigger_context` 옵션으로 컨텍스트 자동 주입 여부를 결정했나요?
|
||||
- 웹훅 로그, CrewAI 실행 기록, 외부 알림 등 모니터링을 준비했나요?
|
||||
|
||||
### Payload & Crew 예제 저장소
|
||||
### CLI로 로컬에서 트리거 테스트
|
||||
|
||||
| 통합 | 동작 시점 | Payload 예제 | Crew 예제 |
|
||||
| :-- | :-- | :-- | :-- |
|
||||
| Gmail | 신규 메일, 스레드 업데이트 | [Gmail payload](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) | [`new-email-crew.py`, `gmail-alert-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) |
|
||||
| Google Calendar | 이벤트 생성/수정/시작/종료/취소 | [Calendar payload](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) | [`calendar-event-crew.py`, `calendar-meeting-crew.py`, `calendar-working-location-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) |
|
||||
| Google Drive | 파일 생성/수정/삭제 | [Drive payload](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) | [`drive-file-crew.py`, `drive-file-deletion-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) |
|
||||
| Outlook | 새 이메일, 이벤트 제거 | [Outlook payload](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) | [`outlook-message-crew.py`, `outlook-event-removal-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) |
|
||||
| OneDrive | 파일 작업(생성, 수정, 공유, 삭제) | [OneDrive payload](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) | [`onedrive-file-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) |
|
||||
| HubSpot | 레코드 생성/업데이트(연락처, 회사, 딜) | [HubSpot payload](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/hubspot) | [`hubspot-company-crew.py`, `hubspot-contact-crew.py`, `hubspot-record-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/hubspot) |
|
||||
| Microsoft Teams | 채팅 생성 | [Teams payload](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) | [`teams-chat-created-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) |
|
||||
CrewAI CLI는 프로덕션에 배포하기 전에 트리거 기반 자동화를 개발하고 테스트할 수 있는 강력한 명령을 제공합니다.
|
||||
|
||||
예제 payload를 참고해 파싱 로직을 검증하고, 제공되는 crew를 복사해 실제 데이터로 교체하세요.
|
||||
#### 사용 가능한 트리거 목록 보기
|
||||
|
||||
연결된 통합에 사용 가능한 모든 트리거를 확인하세요:
|
||||
|
||||
```bash
|
||||
crewai triggers list
|
||||
```
|
||||
|
||||
이 명령은 연결된 통합을 기반으로 사용 가능한 모든 트리거를 표시합니다:
|
||||
- 통합 이름 및 연결 상태
|
||||
- 사용 가능한 트리거 유형
|
||||
- 트리거 이름 및 설명
|
||||
|
||||
#### 트리거 실행 시뮬레이션
|
||||
|
||||
배포 전에 실제 트리거 payload로 크루를 테스트하세요:
|
||||
|
||||
```bash
|
||||
crewai triggers run <트리거_이름>
|
||||
```
|
||||
|
||||
예시:
|
||||
|
||||
```bash
|
||||
crewai triggers run microsoft_onedrive/file_changed
|
||||
```
|
||||
|
||||
이 명령은:
|
||||
- 로컬에서 크루를 실행합니다
|
||||
- 완전하고 실제적인 트리거 payload를 전달합니다
|
||||
- 프로덕션에서 크루가 호출되는 방식을 정확히 시뮬레이션합니다
|
||||
|
||||
<Warning>
|
||||
**중요한 개발 노트:**
|
||||
- 개발 중 트리거 실행을 시뮬레이션하려면 `crewai triggers run <trigger>`를 사용하세요
|
||||
- `crewai run`을 사용하면 트리거 호출을 시뮬레이션하지 않으며 트리거 payload를 전달하지 않습니다
|
||||
- 배포 후에는 실제 트리거 payload로 크루가 실행됩니다
|
||||
- 크루가 트리거 payload에 없는 매개변수를 기대하면 실행이 실패할 수 있습니다
|
||||
</Warning>
|
||||
|
||||
### 트리거와 Crew 연동
|
||||
|
||||
@@ -191,17 +221,20 @@ def delegate_to_crew(self, crewai_trigger_payload: dict = None):
|
||||
## 문제 해결
|
||||
|
||||
**트리거가 실행되지 않나요?**
|
||||
- 트리거가 활성 상태인지 확인하세요.
|
||||
- 통합 연결 상태를 확인하세요.
|
||||
- 배포의 Triggers 탭에서 트리거가 활성화되어 있는지 확인하세요
|
||||
- Tools & Integrations에서 통합 연결 상태를 확인하세요
|
||||
- 필요한 모든 환경 변수가 올바르게 구성되어 있는지 확인하세요
|
||||
|
||||
**실행 중 오류가 발생하나요?**
|
||||
- 실행 로그에서 오류 메시지를 확인하세요.
|
||||
- 개발 중이라면 `crewai_trigger_payload`가 올바른 데이터로 전달되고 있는지 확인하세요.
|
||||
- 실행 로그에서 오류 세부 정보를 확인하세요
|
||||
- `crewai triggers run <트리거_이름>`을 사용하여 로컬에서 테스트하고 정확한 payload 구조를 확인하세요
|
||||
- 크루가 `crewai_trigger_payload` 매개변수를 처리할 수 있는지 확인하세요
|
||||
- 크루가 트리거 payload에 포함되지 않은 매개변수를 기대하지 않는지 확인하세요
|
||||
|
||||
**개발 문제:**
|
||||
- 배포하기 전에 항상 `crewai triggers run <trigger>`로 테스트하여 전체 payload를 확인하세요
|
||||
- `crewai run`은 트리거 호출을 시뮬레이션하지 않으므로 `crewai triggers run`을 대신 사용하세요
|
||||
- `crewai triggers list`를 사용하여 연결된 통합에 사용 가능한 트리거를 확인하세요
|
||||
- 배포 후 크루는 실제 트리거 payload를 받으므로 먼저 로컬에서 철저히 테스트하세요
|
||||
|
||||
트리거를 활용하면 CrewAI 자동화를 이벤트 기반 시스템으로 전환하여 기존 비즈니스 프로세스와 도구에 자연스럽게 녹여낼 수 있습니다.
|
||||
|
||||
<Callout icon="github" title="예제 저장소">
|
||||
<a href="https://github.com/crewAIInc/crewai-enterprise-trigger-examples">
|
||||
CrewAI AMP Trigger Examples
|
||||
</a>
|
||||
</Callout>
|
||||
|
||||
@@ -51,16 +51,25 @@ class GmailProcessingCrew:
|
||||
)
|
||||
```
|
||||
|
||||
The Gmail payload will be available via the standard context mechanisms. See the payload samples repository for structure and fields.
|
||||
The Gmail payload will be available via the standard context mechanisms.
|
||||
|
||||
### Sample payloads & crews
|
||||
### 로컬에서 테스트
|
||||
|
||||
The [CrewAI AMP Trigger Examples repository](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) includes:
|
||||
CrewAI CLI를 사용하여 Gmail 트리거 통합을 로컬에서 테스트하세요:
|
||||
|
||||
- `new-email-payload-1.json` / `new-email-payload-2.json` — production-style new message alerts with matching crews in `new-email-crew.py`
|
||||
- `thread-updated-sample-1.json` — follow-up messages on an existing thread, processed by `gmail-alert-crew.py`
|
||||
```bash
|
||||
# 사용 가능한 모든 트리거 보기
|
||||
crewai triggers list
|
||||
|
||||
Use these samples to validate your parsing logic locally before wiring the trigger to your live Gmail accounts.
|
||||
# 실제 payload로 Gmail 트리거 시뮬레이션
|
||||
crewai triggers run gmail/new_email
|
||||
```
|
||||
|
||||
`crewai triggers run` 명령은 완전한 Gmail payload로 크루를 실행하여 배포 전에 파싱 로직을 테스트할 수 있게 해줍니다.
|
||||
|
||||
<Warning>
|
||||
개발 중에는 `crewai triggers run gmail/new_email`을 사용하세요 (`crewai run`이 아님). 배포 후에는 크루가 자동으로 트리거 payload를 받습니다.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -70,16 +79,10 @@ Track history and performance of triggered runs:
|
||||
<img src="/images/enterprise/list-executions.png" alt="List of executions triggered by automation" />
|
||||
</Frame>
|
||||
|
||||
## Payload Reference
|
||||
|
||||
See the sample payloads and field descriptions:
|
||||
|
||||
<Card title="Gmail samples in Trigger Examples Repo" href="https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail" icon="envelopes-bulk">
|
||||
Gmail samples in Trigger Examples Repo
|
||||
</Card>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure Gmail is connected in Tools & Integrations
|
||||
- Verify the Gmail Trigger is enabled on the Triggers tab
|
||||
- `crewai triggers run gmail/new_email`로 로컬 테스트하여 정확한 payload 구조를 확인하세요
|
||||
- Check the execution logs and confirm the payload is passed as `crewai_trigger_payload`
|
||||
- 주의: 트리거 실행을 시뮬레이션하려면 `crewai triggers run`을 사용하세요 (`crewai run`이 아님)
|
||||
|
||||
@@ -39,16 +39,23 @@ print(result.raw)
|
||||
|
||||
Use `crewai_trigger_payload` exactly as it is delivered by the trigger so the crew can extract the proper fields.
|
||||
|
||||
## Sample payloads & crews
|
||||
## 로컬에서 테스트
|
||||
|
||||
The [Google Calendar examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) show how to handle multiple event types:
|
||||
CrewAI CLI를 사용하여 Google Calendar 트리거 통합을 로컬에서 테스트하세요:
|
||||
|
||||
- `new-event.json` → standard event creation handled by `calendar-event-crew.py`
|
||||
- `event-updated.json` / `event-started.json` / `event-ended.json` → in-flight updates processed by `calendar-meeting-crew.py`
|
||||
- `event-canceled.json` → cancellation workflow that alerts attendees via `calendar-meeting-crew.py`
|
||||
- Working location events use `calendar-working-location-crew.py` to extract on-site schedules
|
||||
```bash
|
||||
# 사용 가능한 모든 트리거 보기
|
||||
crewai triggers list
|
||||
|
||||
Each crew transforms raw event metadata (attendees, rooms, working locations) into the summaries your teams need.
|
||||
# 실제 payload로 Google Calendar 트리거 시뮬레이션
|
||||
crewai triggers run google_calendar/event_changed
|
||||
```
|
||||
|
||||
`crewai triggers run` 명령은 완전한 Calendar payload로 크루를 실행하여 배포 전에 파싱 로직을 테스트할 수 있게 해줍니다.
|
||||
|
||||
<Warning>
|
||||
개발 중에는 `crewai triggers run google_calendar/event_changed`를 사용하세요 (`crewai run`이 아님). 배포 후에는 크루가 자동으로 트리거 payload를 받습니다.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -61,5 +68,7 @@ The **Executions** list in the deployment dashboard tracks every triggered run a
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the correct Google account is connected and the trigger is enabled
|
||||
- `crewai triggers run google_calendar/event_changed`로 로컬 테스트하여 정확한 payload 구조를 확인하세요
|
||||
- Confirm your workflow handles all-day events (payloads use `start.date` and `end.date` instead of timestamps)
|
||||
- Check execution logs if reminders or attendee arrays are missing—calendar permissions can limit fields in the payload
|
||||
- 주의: 트리거 실행을 시뮬레이션하려면 `crewai triggers run`을 사용하세요 (`crewai run`이 아님)
|
||||
|
||||
@@ -36,15 +36,23 @@ crew.kickoff({
|
||||
})
|
||||
```
|
||||
|
||||
## Sample payloads & crews
|
||||
## 로컬에서 테스트
|
||||
|
||||
Explore the [Google Drive examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) to cover different operations:
|
||||
CrewAI CLI를 사용하여 Google Drive 트리거 통합을 로컬에서 테스트하세요:
|
||||
|
||||
- `new-file.json` → new uploads processed by `drive-file-crew.py`
|
||||
- `updated-file.json` → file edits and metadata changes handled by `drive-file-crew.py`
|
||||
- `deleted-file.json` → deletion events routed through `drive-file-deletion-crew.py`
|
||||
```bash
|
||||
# 사용 가능한 모든 트리거 보기
|
||||
crewai triggers list
|
||||
|
||||
Each crew highlights the file name, operation type, owner, permissions, and security considerations so downstream systems can respond appropriately.
|
||||
# 실제 payload로 Google Drive 트리거 시뮬레이션
|
||||
crewai triggers run google_drive/file_changed
|
||||
```
|
||||
|
||||
`crewai triggers run` 명령은 완전한 Drive payload로 크루를 실행하여 배포 전에 파싱 로직을 테스트할 수 있게 해줍니다.
|
||||
|
||||
<Warning>
|
||||
개발 중에는 `crewai triggers run google_drive/file_changed`를 사용하세요 (`crewai run`이 아님). 배포 후에는 크루가 자동으로 트리거 payload를 받습니다.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -57,5 +65,7 @@ Track history and performance of triggered runs with the **Executions** list in
|
||||
## Troubleshooting
|
||||
|
||||
- Verify Google Drive is connected and the trigger toggle is enabled
|
||||
- `crewai triggers run google_drive/file_changed`로 로컬 테스트하여 정확한 payload 구조를 확인하세요
|
||||
- If a payload is missing permission data, ensure the connected account has access to the file or folder
|
||||
- The trigger sends file IDs only; use the Drive API if you need to fetch binary content during the crew run
|
||||
- 주의: 트리거 실행을 시뮬레이션하려면 `crewai triggers run`을 사용하세요 (`crewai run`이 아님)
|
||||
|
||||
@@ -49,6 +49,4 @@ mode: "wide"
|
||||
</Step>
|
||||
</Steps>
|
||||
|
||||
## 추가 자료
|
||||
|
||||
사용 가능한 작업과 사용자 지정 옵션에 대한 자세한 정보는 [HubSpot 워크플로우 문서](https://knowledge.hubspot.com/workflows/create-workflows)를 참고하세요.
|
||||
|
||||
@@ -37,16 +37,28 @@ print(result.raw)
|
||||
|
||||
The crew parses thread metadata (subject, created time, roster) and generates an action plan for the receiving team.
|
||||
|
||||
## Sample payloads & crews
|
||||
## 로컬에서 테스트
|
||||
|
||||
The [Microsoft Teams examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) include:
|
||||
CrewAI CLI를 사용하여 Microsoft Teams 트리거 통합을 로컬에서 테스트하세요:
|
||||
|
||||
- `chat-created.json` → chat creation payload processed by `teams-chat-created-crew.py`
|
||||
```bash
|
||||
# 사용 가능한 모든 트리거 보기
|
||||
crewai triggers list
|
||||
|
||||
The crew demonstrates how to extract participants, initial messages, tenant information, and compliance metadata from the Microsoft Graph webhook payload.
|
||||
# 실제 payload로 Microsoft Teams 트리거 시뮬레이션
|
||||
crewai triggers run microsoft_teams/teams_message_created
|
||||
```
|
||||
|
||||
`crewai triggers run` 명령은 완전한 Teams payload로 크루를 실행하여 배포 전에 파싱 로직을 테스트할 수 있게 해줍니다.
|
||||
|
||||
<Warning>
|
||||
개발 중에는 `crewai triggers run microsoft_teams/teams_message_created`를 사용하세요 (`crewai run`이 아님). 배포 후에는 크루가 자동으로 트리거 payload를 받습니다.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the Teams connection is active; it must be refreshed if the tenant revokes permissions
|
||||
- `crewai triggers run microsoft_teams/teams_message_created`로 로컬 테스트하여 정확한 payload 구조를 확인하세요
|
||||
- Confirm the webhook subscription in Microsoft 365 is still valid if payloads stop arriving
|
||||
- Review execution logs for payload shape mismatches—Graph notifications may omit fields when a chat is private or restricted
|
||||
- 주의: 트리거 실행을 시뮬레이션하려면 `crewai triggers run`을 사용하세요 (`crewai run`이 아님)
|
||||
|
||||
@@ -36,18 +36,28 @@ crew.kickoff({
|
||||
|
||||
The crew inspects file metadata, user activity, and permission changes to produce a compliance-friendly summary.
|
||||
|
||||
## Sample payloads & crews
|
||||
## 로컬에서 테스트
|
||||
|
||||
The [OneDrive examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) showcase how to:
|
||||
CrewAI CLI를 사용하여 OneDrive 트리거 통합을 로컬에서 테스트하세요:
|
||||
|
||||
- Parse file metadata, size, and folder paths
|
||||
- Track who created and last modified the file
|
||||
- Highlight permission and external sharing changes
|
||||
```bash
|
||||
# 사용 가능한 모든 트리거 보기
|
||||
crewai triggers list
|
||||
|
||||
`onedrive-file-crew.py` bundles the analysis and summarization tasks so you can add remediation steps as needed.
|
||||
# 실제 payload로 OneDrive 트리거 시뮬레이션
|
||||
crewai triggers run microsoft_onedrive/file_changed
|
||||
```
|
||||
|
||||
`crewai triggers run` 명령은 완전한 OneDrive payload로 크루를 실행하여 배포 전에 파싱 로직을 테스트할 수 있게 해줍니다.
|
||||
|
||||
<Warning>
|
||||
개발 중에는 `crewai triggers run microsoft_onedrive/file_changed`를 사용하세요 (`crewai run`이 아님). 배포 후에는 크루가 자동으로 트리거 payload를 받습니다.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the connected account has permission to read the file metadata included in the webhook
|
||||
- `crewai triggers run microsoft_onedrive/file_changed`로 로컬 테스트하여 정확한 payload 구조를 확인하세요
|
||||
- If the trigger fires but the payload is missing `permissions`, confirm the site-level sharing settings allow Graph to return this field
|
||||
- For large tenants, filter notifications upstream so the crew only runs on relevant directories
|
||||
- 주의: 트리거 실행을 시뮬레이션하려면 `crewai triggers run`을 사용하세요 (`crewai run`이 아님)
|
||||
|
||||
@@ -36,17 +36,28 @@ crew.kickoff({
|
||||
|
||||
The crew extracts sender details, subject, body preview, and attachments before generating a structured response.
|
||||
|
||||
## Sample payloads & crews
|
||||
## 로컬에서 테스트
|
||||
|
||||
Review the [Outlook examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) for two common scenarios:
|
||||
CrewAI CLI를 사용하여 Outlook 트리거 통합을 로컬에서 테스트하세요:
|
||||
|
||||
- `new-message.json` → new mail notifications parsed by `outlook-message-crew.py`
|
||||
- `event-removed.json` → calendar cleanup handled by `outlook-event-removal-crew.py`
|
||||
```bash
|
||||
# 사용 가능한 모든 트리거 보기
|
||||
crewai triggers list
|
||||
|
||||
Each crew demonstrates how to handle Microsoft Graph payloads, normalize headers, and keep humans in-the-loop with concise summaries.
|
||||
# 실제 payload로 Outlook 트리거 시뮬레이션
|
||||
crewai triggers run microsoft_outlook/email_received
|
||||
```
|
||||
|
||||
`crewai triggers run` 명령은 완전한 Outlook payload로 크루를 실행하여 배포 전에 파싱 로직을 테스트할 수 있게 해줍니다.
|
||||
|
||||
<Warning>
|
||||
개발 중에는 `crewai triggers run microsoft_outlook/email_received`를 사용하세요 (`crewai run`이 아님). 배포 후에는 크루가 자동으로 트리거 payload를 받습니다.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Verify the Outlook connector is still authorized; the subscription must be renewed periodically
|
||||
- `crewai triggers run microsoft_outlook/email_received`로 로컬 테스트하여 정확한 payload 구조를 확인하세요
|
||||
- If attachments are missing, confirm the webhook subscription includes the `includeResourceData` flag
|
||||
- Review execution logs when events fail to match—cancellation payloads lack attendee lists by design and the crew should account for that
|
||||
- 주의: 트리거 실행을 시뮬레이션하려면 `crewai triggers run`을 사용하세요 (`crewai run`이 아님)
|
||||
|
||||
232
docs/ko/mcp/dsl-integration.mdx
Normal file
232
docs/ko/mcp/dsl-integration.mdx
Normal file
@@ -0,0 +1,232 @@
|
||||
---
|
||||
title: MCP DSL 통합
|
||||
description: CrewAI의 간단한 DSL 구문을 사용하여 mcps 필드로 MCP 서버를 에이전트와 직접 통합하는 방법을 알아보세요.
|
||||
icon: code
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
## 개요
|
||||
|
||||
CrewAI의 MCP DSL(Domain Specific Language) 통합은 에이전트를 MCP(Model Context Protocol) 서버에 연결하는 **가장 간단한 방법**을 제공합니다. 에이전트에 `mcps` 필드만 추가하면 CrewAI가 모든 복잡성을 자동으로 처리합니다.
|
||||
|
||||
<Info>
|
||||
이는 대부분의 MCP 사용 사례에 **권장되는 접근 방식**입니다. 수동 연결 관리가 필요한 고급 시나리오의 경우 [MCPServerAdapter](/ko/mcp/overview#advanced-mcpserveradapter)를 참조하세요.
|
||||
</Info>
|
||||
|
||||
## 기본 사용법
|
||||
|
||||
`mcps` 필드를 사용하여 에이전트에 MCP 서버를 추가하세요:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="연구 보조원",
|
||||
goal="연구 및 분석 업무 지원",
|
||||
backstory="고급 연구 도구에 접근할 수 있는 전문가 보조원",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=research"
|
||||
]
|
||||
)
|
||||
|
||||
# MCP 도구들이 이제 자동으로 사용 가능합니다!
|
||||
# 수동 연결 관리나 도구 구성이 필요 없습니다
|
||||
```
|
||||
|
||||
## 지원되는 참조 형식
|
||||
|
||||
### 외부 MCP 원격 서버
|
||||
|
||||
```python
|
||||
# 기본 HTTPS 서버
|
||||
"https://api.example.com/mcp"
|
||||
|
||||
# 인증이 포함된 서버
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=your_profile"
|
||||
|
||||
# 사용자 정의 경로가 있는 서버
|
||||
"https://services.company.com/api/v1/mcp"
|
||||
```
|
||||
|
||||
### 특정 도구 선택
|
||||
|
||||
`#` 구문을 사용하여 서버에서 특정 도구를 선택하세요:
|
||||
|
||||
```python
|
||||
# 날씨 서버에서 예보 도구만 가져오기
|
||||
"https://weather.api.com/mcp#get_forecast"
|
||||
|
||||
# Exa에서 검색 도구만 가져오기
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key#web_search_exa"
|
||||
```
|
||||
|
||||
### CrewAI AMP 마켓플레이스
|
||||
|
||||
CrewAI AMP 마켓플레이스의 도구에 액세스하세요:
|
||||
|
||||
```python
|
||||
# 모든 도구가 포함된 전체 서비스
|
||||
"crewai-amp:financial-data"
|
||||
|
||||
# AMP 서비스의 특정 도구
|
||||
"crewai-amp:research-tools#pubmed_search"
|
||||
|
||||
# 다중 AMP 서비스
|
||||
mcps=[
|
||||
"crewai-amp:weather-insights",
|
||||
"crewai-amp:market-analysis",
|
||||
"crewai-amp:social-media-monitoring"
|
||||
]
|
||||
```
|
||||
|
||||
## 완전한 예제
|
||||
|
||||
다음은 여러 MCP 서버를 사용하는 완전한 예제입니다:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, Process
|
||||
|
||||
# 다중 MCP 소스를 가진 에이전트 생성
|
||||
multi_source_agent = Agent(
|
||||
role="다중 소스 연구 분석가",
|
||||
goal="다중 데이터 소스를 사용한 종합적인 연구 수행",
|
||||
backstory="""웹 검색, 날씨 데이터, 금융 정보,
|
||||
학술 연구 도구에 접근할 수 있는 전문가 연구원""",
|
||||
mcps=[
|
||||
# 외부 MCP 서버
|
||||
"https://mcp.exa.ai/mcp?api_key=your_exa_key&profile=research",
|
||||
"https://weather.api.com/mcp#get_current_conditions",
|
||||
|
||||
# CrewAI AMP 마켓플레이스
|
||||
"crewai-amp:financial-insights",
|
||||
"crewai-amp:academic-research#pubmed_search",
|
||||
"crewai-amp:market-intelligence#competitor_analysis"
|
||||
]
|
||||
)
|
||||
|
||||
# 종합적인 연구 작업 생성
|
||||
research_task = Task(
|
||||
description="""AI 에이전트가 비즈니스 생산성에 미치는 영향을 연구하세요.
|
||||
원격 근무에 대한 현재 날씨 영향, 금융 시장 트렌드,
|
||||
AI 에이전트 프레임워크에 대한 최근 학술 발표를 포함하세요.""",
|
||||
expected_output="""다음을 다루는 종합 보고서:
|
||||
1. AI 에이전트 비즈니스 영향 분석
|
||||
2. 원격 근무를 위한 날씨 고려사항
|
||||
3. AI 관련 금융 시장 트렌드
|
||||
4. 학술 연구 인용 및 통찰
|
||||
5. 경쟁 환경 분석""",
|
||||
agent=multi_source_agent
|
||||
)
|
||||
|
||||
# crew 생성 및 실행
|
||||
research_crew = Crew(
|
||||
agents=[multi_source_agent],
|
||||
tasks=[research_task],
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
result = research_crew.kickoff()
|
||||
print(f"{len(multi_source_agent.mcps)}개의 MCP 데이터 소스로 연구 완료")
|
||||
```
|
||||
|
||||
## 주요 기능
|
||||
|
||||
- 🔄 **자동 도구 발견**: 도구들이 자동으로 발견되고 통합됩니다
|
||||
- 🏷️ **이름 충돌 방지**: 서버 이름이 도구 이름에 접두사로 붙습니다
|
||||
- ⚡ **성능 최적화**: 스키마 캐싱과 온디맨드 연결
|
||||
- 🛡️ **오류 복원력**: 사용할 수 없는 서버의 우아한 처리
|
||||
- ⏱️ **타임아웃 보호**: 내장 타임아웃으로 연결 중단 방지
|
||||
- 📊 **투명한 통합**: 기존 CrewAI 기능과 완벽한 연동
|
||||
|
||||
## 오류 처리
|
||||
|
||||
MCP DSL 통합은 복원력 있게 설계되었습니다:
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
role="복원력 있는 에이전트",
|
||||
goal="서버 문제에도 불구하고 작업 계속",
|
||||
backstory="장애를 우아하게 처리하는 에이전트",
|
||||
mcps=[
|
||||
"https://reliable-server.com/mcp", # 작동할 것
|
||||
"https://unreachable-server.com/mcp", # 우아하게 건너뛸 것
|
||||
"https://slow-server.com/mcp", # 우아하게 타임아웃될 것
|
||||
"crewai-amp:working-service" # 작동할 것
|
||||
]
|
||||
)
|
||||
# 에이전트는 작동하는 서버의 도구를 사용하고 실패한 서버에 대한 경고를 로그에 남깁니다
|
||||
```
|
||||
|
||||
## 성능 기능
|
||||
|
||||
### 자동 캐싱
|
||||
|
||||
도구 스키마는 성능 향상을 위해 5분간 캐시됩니다:
|
||||
|
||||
```python
|
||||
# 첫 번째 에이전트 생성 - 서버에서 도구 발견
|
||||
agent1 = Agent(role="첫 번째", goal="테스트", backstory="테스트",
|
||||
mcps=["https://api.example.com/mcp"])
|
||||
|
||||
# 두 번째 에이전트 생성 (5분 이내) - 캐시된 도구 스키마 사용
|
||||
agent2 = Agent(role="두 번째", goal="테스트", backstory="테스트",
|
||||
mcps=["https://api.example.com/mcp"]) # 훨씬 빠릅니다!
|
||||
```
|
||||
|
||||
### 온디맨드 연결
|
||||
|
||||
도구 연결은 실제로 사용될 때만 설정됩니다:
|
||||
|
||||
```python
|
||||
# 에이전트 생성은 빠름 - 아직 MCP 연결을 만들지 않음
|
||||
agent = Agent(
|
||||
role="온디맨드 에이전트",
|
||||
goal="도구를 효율적으로 사용",
|
||||
backstory="필요할 때만 연결하는 효율적인 에이전트",
|
||||
mcps=["https://api.example.com/mcp"]
|
||||
)
|
||||
|
||||
# MCP 연결은 도구가 실제로 실행될 때만 만들어집니다
|
||||
# 이는 연결 오버헤드를 최소화하고 시작 성능을 개선합니다
|
||||
```
|
||||
|
||||
## 모범 사례
|
||||
|
||||
### 1. 가능하면 특정 도구 사용
|
||||
|
||||
```python
|
||||
# 좋음 - 필요한 도구만 가져오기
|
||||
mcps=["https://weather.api.com/mcp#get_forecast"]
|
||||
|
||||
# 덜 효율적 - 서버의 모든 도구 가져오기
|
||||
mcps=["https://weather.api.com/mcp"]
|
||||
```
|
||||
|
||||
### 2. 인증을 안전하게 처리
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
# 환경 변수에 API 키 저장
|
||||
exa_key = os.getenv("EXA_API_KEY")
|
||||
exa_profile = os.getenv("EXA_PROFILE")
|
||||
|
||||
agent = Agent(
|
||||
role="안전한 에이전트",
|
||||
goal="MCP 도구를 안전하게 사용",
|
||||
backstory="보안을 고려하는 에이전트",
|
||||
mcps=[f"https://mcp.exa.ai/mcp?api_key={exa_key}&profile={exa_profile}"]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. 서버 장애 계획
|
||||
|
||||
```python
|
||||
# 항상 백업 옵션 포함
|
||||
mcps=[
|
||||
"https://primary-api.com/mcp", # 주요 선택
|
||||
"https://backup-api.com/mcp", # 백업 옵션
|
||||
"crewai-amp:reliable-service" # AMP 폴백
|
||||
]
|
||||
```
|
||||
@@ -8,12 +8,37 @@ mode: "wide"
|
||||
## 개요
|
||||
|
||||
[Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP)는 AI 에이전트가 MCP 서버로 알려진 외부 서비스와 통신함으로써 LLM에 컨텍스트를 제공할 수 있도록 표준화된 방식을 제공합니다.
|
||||
`crewai-tools` 라이브러리는 CrewAI의 기능을 확장하여, 이러한 MCP 서버에서 제공하는 툴을 에이전트에 원활하게 통합할 수 있도록 해줍니다.
|
||||
이를 통해 여러분의 crew는 방대한 기능 에코시스템에 접근할 수 있습니다.
|
||||
|
||||
CrewAI는 MCP 통합을 위한 **두 가지 접근 방식**을 제공합니다:
|
||||
|
||||
### 🚀 **새로운 기능: 간단한 DSL 통합** (권장)
|
||||
|
||||
에이전트에 `mcps` 필드를 직접 사용하여 완벽한 MCP 도구 통합을 구현하세요:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="연구 분석가",
|
||||
goal="정보를 연구하고 분석",
|
||||
backstory="외부 도구에 접근할 수 있는 전문가 연구원",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key", # 외부 MCP 서버
|
||||
"https://api.weather.com/mcp#get_forecast", # 서버의 특정 도구
|
||||
"crewai-amp:financial-data", # CrewAI AMP 마켓플레이스
|
||||
"crewai-amp:research-tools#pubmed_search" # 특정 AMP 도구
|
||||
]
|
||||
)
|
||||
# MCP 도구들이 이제 자동으로 에이전트에서 사용 가능합니다!
|
||||
```
|
||||
|
||||
### 🔧 **고급: MCPServerAdapter** (복잡한 시나리오용)
|
||||
|
||||
수동 연결 관리가 필요한 고급 사용 사례의 경우 `crewai-tools` 라이브러리는 `MCPServerAdapter` 클래스를 제공합니다.
|
||||
|
||||
현재 다음과 같은 전송 메커니즘을 지원합니다:
|
||||
|
||||
- **Stdio**: 로컬 서버용 (동일 머신 내 프로세스 간 표준 입력/출력을 통한 통신)
|
||||
- **HTTPS**: 원격 서버용 (HTTPS를 통한 보안 통신)
|
||||
- **Server-Sent Events (SSE)**: 원격 서버용 (서버에서 클라이언트로의 일방향, 실시간 데이터 스트리밍, HTTP 기반)
|
||||
- **Streamable HTTP**: 원격 서버용 (유연하며 잠재적으로 양방향 통신이 가능, 주로 SSE를 활용한 서버-클라이언트 스트림 제공, HTTP 기반)
|
||||
|
||||
|
||||
@@ -57,6 +57,22 @@ Ferramentas & Integrações é o hub central para conectar aplicações de terce
|
||||
uv add crewai-tools
|
||||
```
|
||||
|
||||
### Configuração de variável de ambiente
|
||||
|
||||
<Note>
|
||||
Para usar integrações com `Agent(apps=[])`, você deve definir a variável de ambiente `CREWAI_PLATFORM_INTEGRATION_TOKEN` com seu Enterprise Token.
|
||||
</Note>
|
||||
|
||||
```bash
|
||||
export CREWAI_PLATFORM_INTEGRATION_TOKEN="seu_enterprise_token"
|
||||
```
|
||||
|
||||
Ou adicione ao seu arquivo `.env`:
|
||||
|
||||
```
|
||||
CREWAI_PLATFORM_INTEGRATION_TOKEN=seu_enterprise_token
|
||||
```
|
||||
|
||||
### Exemplo de uso
|
||||
|
||||
<Tip>
|
||||
|
||||
@@ -116,19 +116,49 @@ Antes de ativar em produção, confirme que você:
|
||||
- Decidiu se usará `allow_crewai_trigger_context` para injetar contexto automaticamente
|
||||
- Configurou monitoramento (webhooks, históricos da CrewAI, alertas externos)
|
||||
|
||||
### Repositório de Payloads e Crews de Exemplo
|
||||
### Testando Triggers Localmente com CLI
|
||||
|
||||
| Integração | Quando dispara | Amostras de payload | Crews de exemplo |
|
||||
| :-- | :-- | :-- | :-- |
|
||||
| Gmail | Novas mensagens, atualização de threads | [Payloads de alertas e threads](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) | [`new-email-crew.py`, `gmail-alert-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) |
|
||||
| Google Calendar | Evento criado/atualizado/iniciado/encerrado/cancelado | [Payloads de eventos](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) | [`calendar-event-crew.py`, `calendar-meeting-crew.py`, `calendar-working-location-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) |
|
||||
| Google Drive | Arquivo criado/atualizado/excluído | [Payloads de arquivos](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) | [`drive-file-crew.py`, `drive-file-deletion-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) |
|
||||
| Outlook | Novo e‑mail, evento removido | [Payloads do Outlook](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) | [`outlook-message-crew.py`, `outlook-event-removal-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) |
|
||||
| OneDrive | Operações de arquivo (criar, atualizar, compartilhar, excluir) | [Payloads do OneDrive](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) | [`onedrive-file-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) |
|
||||
| HubSpot | Registros criados/atualizados (contatos, empresas, negócios) | [Payloads do HubSpot](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/hubspot) | [`hubspot-company-crew.py`, `hubspot-contact-crew.py`, `hubspot-record-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/hubspot) |
|
||||
| Microsoft Teams | Chat criado | [Payload do Teams](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) | [`teams-chat-created-crew.py`](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) |
|
||||
A CLI da CrewAI fornece comandos poderosos para ajudá-lo a desenvolver e testar automações orientadas por triggers sem fazer deploy para produção.
|
||||
|
||||
Use essas amostras para ajustar o parsing, copiar a crew correspondente e substituir o payload de teste pelo dado real.
|
||||
#### Listar Triggers Disponíveis
|
||||
|
||||
Visualize todos os triggers disponíveis para suas integrações conectadas:
|
||||
|
||||
```bash
|
||||
crewai triggers list
|
||||
```
|
||||
|
||||
Este comando exibe todos os triggers disponíveis baseados nas suas integrações conectadas, mostrando:
|
||||
- Nome da integração e status de conexão
|
||||
- Tipos de triggers disponíveis
|
||||
- Nomes e descrições dos triggers
|
||||
|
||||
#### Simular Execução de Trigger
|
||||
|
||||
Teste sua crew com payloads realistas de triggers antes do deployment:
|
||||
|
||||
```bash
|
||||
crewai triggers run <nome_do_trigger>
|
||||
```
|
||||
|
||||
Por exemplo:
|
||||
|
||||
```bash
|
||||
crewai triggers run microsoft_onedrive/file_changed
|
||||
```
|
||||
|
||||
Este comando:
|
||||
- Executa sua crew localmente
|
||||
- Passa um payload de trigger completo e realista
|
||||
- Simula exatamente como sua crew será chamada em produção
|
||||
|
||||
<Warning>
|
||||
**Notas Importantes de Desenvolvimento:**
|
||||
- Use `crewai triggers run <trigger>` para simular execução de trigger durante o desenvolvimento
|
||||
- Usar `crewai run` NÃO simulará chamadas de trigger e não passará o payload do trigger
|
||||
- Após o deployment, sua crew será executada com o payload real do trigger
|
||||
- Se sua crew espera parâmetros que não estão no payload do trigger, a execução pode falhar
|
||||
</Warning>
|
||||
|
||||
### Triggers com Crews
|
||||
|
||||
@@ -203,17 +233,20 @@ def delegar_para_crew(self, crewai_trigger_payload: dict = None):
|
||||
## Solução de Problemas
|
||||
|
||||
**Trigger não dispara:**
|
||||
- Verifique se está habilitado
|
||||
- Confira o status da conexão
|
||||
- Verifique se o trigger está habilitado na aba Triggers do seu deployment
|
||||
- Confira o status da conexão em Tools & Integrations
|
||||
- Garanta que todas as variáveis de ambiente necessárias estão configuradas
|
||||
|
||||
**Falhas de execução:**
|
||||
- Consulte os logs para entender o erro
|
||||
- Durante o desenvolvimento, garanta que `crewai_trigger_payload` está presente com o payload correto
|
||||
- Consulte os logs de execução para detalhes do erro
|
||||
- Use `crewai triggers run <nome_do_trigger>` para testar localmente e ver a estrutura exata do payload
|
||||
- Verifique se sua crew pode processar o parâmetro `crewai_trigger_payload`
|
||||
- Garanta que sua crew não espera parâmetros que não estão incluídos no payload do trigger
|
||||
|
||||
**Problemas de desenvolvimento:**
|
||||
- Sempre teste com `crewai triggers run <trigger>` antes de fazer deploy para ver o payload completo
|
||||
- Lembre-se que `crewai run` NÃO simula chamadas de trigger—use `crewai triggers run` em vez disso
|
||||
- Use `crewai triggers list` para verificar quais triggers estão disponíveis para suas integrações conectadas
|
||||
- Após o deployment, sua crew receberá o payload real do trigger, então teste minuciosamente localmente primeiro
|
||||
|
||||
Os triggers transformam suas implantações CrewAI em sistemas orientados por eventos, integrando-se perfeitamente aos processos e ferramentas já usados pelo seu time.
|
||||
|
||||
<Callout icon="github" title="Exemplos na prática">
|
||||
<a href="https://github.com/crewAIInc/crewai-enterprise-trigger-examples">
|
||||
Repositório CrewAI AMP Trigger Examples
|
||||
</a>
|
||||
</Callout>
|
||||
|
||||
@@ -51,16 +51,25 @@ class GmailProcessingCrew:
|
||||
)
|
||||
```
|
||||
|
||||
The Gmail payload will be available via the standard context mechanisms. See the payload samples repository for structure and fields.
|
||||
The Gmail payload will be available via the standard context mechanisms.
|
||||
|
||||
### Sample payloads & crews
|
||||
### Testando Localmente
|
||||
|
||||
The [CrewAI AMP Trigger Examples repository](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail) includes:
|
||||
Teste sua integração de trigger do Gmail localmente usando a CLI da CrewAI:
|
||||
|
||||
- `new-email-payload-1.json` / `new-email-payload-2.json` — production-style new message alerts with matching crews in `new-email-crew.py`
|
||||
- `thread-updated-sample-1.json` — follow-up messages on an existing thread, processed by `gmail-alert-crew.py`
|
||||
```bash
|
||||
# Visualize todos os triggers disponíveis
|
||||
crewai triggers list
|
||||
|
||||
Use these samples to validate your parsing logic locally before wiring the trigger to your live Gmail accounts.
|
||||
# Simule um trigger do Gmail com payload realista
|
||||
crewai triggers run gmail/new_email
|
||||
```
|
||||
|
||||
O comando `crewai triggers run` executará sua crew com um payload completo do Gmail, permitindo que você teste sua lógica de parsing antes do deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run gmail/new_email` (não `crewai run`) para simular execução de trigger durante o desenvolvimento. Após o deployment, sua crew receberá automaticamente o payload do trigger.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -70,16 +79,10 @@ Track history and performance of triggered runs:
|
||||
<img src="/images/enterprise/list-executions.png" alt="List of executions triggered by automation" />
|
||||
</Frame>
|
||||
|
||||
## Payload Reference
|
||||
|
||||
See the sample payloads and field descriptions:
|
||||
|
||||
<Card title="Gmail samples in Trigger Examples Repo" href="https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/gmail" icon="envelopes-bulk">
|
||||
Gmail samples in Trigger Examples Repo
|
||||
</Card>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure Gmail is connected in Tools & Integrations
|
||||
- Verify the Gmail Trigger is enabled on the Triggers tab
|
||||
- Teste localmente com `crewai triggers run gmail/new_email` para ver a estrutura exata do payload
|
||||
- Check the execution logs and confirm the payload is passed as `crewai_trigger_payload`
|
||||
- Lembre-se: use `crewai triggers run` (não `crewai run`) para simular execução de trigger
|
||||
|
||||
@@ -39,16 +39,23 @@ print(result.raw)
|
||||
|
||||
Use `crewai_trigger_payload` exactly as it is delivered by the trigger so the crew can extract the proper fields.
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testando Localmente
|
||||
|
||||
The [Google Calendar examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_calendar) show how to handle multiple event types:
|
||||
Teste sua integração de trigger do Google Calendar localmente usando a CLI da CrewAI:
|
||||
|
||||
- `new-event.json` → standard event creation handled by `calendar-event-crew.py`
|
||||
- `event-updated.json` / `event-started.json` / `event-ended.json` → in-flight updates processed by `calendar-meeting-crew.py`
|
||||
- `event-canceled.json` → cancellation workflow that alerts attendees via `calendar-meeting-crew.py`
|
||||
- Working location events use `calendar-working-location-crew.py` to extract on-site schedules
|
||||
```bash
|
||||
# Visualize todos os triggers disponíveis
|
||||
crewai triggers list
|
||||
|
||||
Each crew transforms raw event metadata (attendees, rooms, working locations) into the summaries your teams need.
|
||||
# Simule um trigger do Google Calendar com payload realista
|
||||
crewai triggers run google_calendar/event_changed
|
||||
```
|
||||
|
||||
O comando `crewai triggers run` executará sua crew com um payload completo do Calendar, permitindo que você teste sua lógica de parsing antes do deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run google_calendar/event_changed` (não `crewai run`) para simular execução de trigger durante o desenvolvimento. Após o deployment, sua crew receberá automaticamente o payload do trigger.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -61,5 +68,7 @@ The **Executions** list in the deployment dashboard tracks every triggered run a
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the correct Google account is connected and the trigger is enabled
|
||||
- Teste localmente com `crewai triggers run google_calendar/event_changed` para ver a estrutura exata do payload
|
||||
- Confirm your workflow handles all-day events (payloads use `start.date` and `end.date` instead of timestamps)
|
||||
- Check execution logs if reminders or attendee arrays are missing—calendar permissions can limit fields in the payload
|
||||
- Lembre-se: use `crewai triggers run` (não `crewai run`) para simular execução de trigger
|
||||
|
||||
@@ -36,15 +36,23 @@ crew.kickoff({
|
||||
})
|
||||
```
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testando Localmente
|
||||
|
||||
Explore the [Google Drive examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/google_drive) to cover different operations:
|
||||
Teste sua integração de trigger do Google Drive localmente usando a CLI da CrewAI:
|
||||
|
||||
- `new-file.json` → new uploads processed by `drive-file-crew.py`
|
||||
- `updated-file.json` → file edits and metadata changes handled by `drive-file-crew.py`
|
||||
- `deleted-file.json` → deletion events routed through `drive-file-deletion-crew.py`
|
||||
```bash
|
||||
# Visualize todos os triggers disponíveis
|
||||
crewai triggers list
|
||||
|
||||
Each crew highlights the file name, operation type, owner, permissions, and security considerations so downstream systems can respond appropriately.
|
||||
# Simule um trigger do Google Drive com payload realista
|
||||
crewai triggers run google_drive/file_changed
|
||||
```
|
||||
|
||||
O comando `crewai triggers run` executará sua crew com um payload completo do Drive, permitindo que você teste sua lógica de parsing antes do deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run google_drive/file_changed` (não `crewai run`) para simular execução de trigger durante o desenvolvimento. Após o deployment, sua crew receberá automaticamente o payload do trigger.
|
||||
</Warning>
|
||||
|
||||
## Monitoring Executions
|
||||
|
||||
@@ -57,5 +65,7 @@ Track history and performance of triggered runs with the **Executions** list in
|
||||
## Troubleshooting
|
||||
|
||||
- Verify Google Drive is connected and the trigger toggle is enabled
|
||||
- Teste localmente com `crewai triggers run google_drive/file_changed` para ver a estrutura exata do payload
|
||||
- If a payload is missing permission data, ensure the connected account has access to the file or folder
|
||||
- The trigger sends file IDs only; use the Drive API if you need to fetch binary content during the crew run
|
||||
- Lembre-se: use `crewai triggers run` (não `crewai run`) para simular execução de trigger
|
||||
|
||||
@@ -37,16 +37,28 @@ print(result.raw)
|
||||
|
||||
The crew parses thread metadata (subject, created time, roster) and generates an action plan for the receiving team.
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testando Localmente
|
||||
|
||||
The [Microsoft Teams examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/microsoft-teams) include:
|
||||
Teste sua integração de trigger do Microsoft Teams localmente usando a CLI da CrewAI:
|
||||
|
||||
- `chat-created.json` → chat creation payload processed by `teams-chat-created-crew.py`
|
||||
```bash
|
||||
# Visualize todos os triggers disponíveis
|
||||
crewai triggers list
|
||||
|
||||
The crew demonstrates how to extract participants, initial messages, tenant information, and compliance metadata from the Microsoft Graph webhook payload.
|
||||
# Simule um trigger do Microsoft Teams com payload realista
|
||||
crewai triggers run microsoft_teams/teams_message_created
|
||||
```
|
||||
|
||||
O comando `crewai triggers run` executará sua crew com um payload completo do Teams, permitindo que você teste sua lógica de parsing antes do deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run microsoft_teams/teams_message_created` (não `crewai run`) para simular execução de trigger durante o desenvolvimento. Após o deployment, sua crew receberá automaticamente o payload do trigger.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the Teams connection is active; it must be refreshed if the tenant revokes permissions
|
||||
- Teste localmente com `crewai triggers run microsoft_teams/teams_message_created` para ver a estrutura exata do payload
|
||||
- Confirm the webhook subscription in Microsoft 365 is still valid if payloads stop arriving
|
||||
- Review execution logs for payload shape mismatches—Graph notifications may omit fields when a chat is private or restricted
|
||||
- Lembre-se: use `crewai triggers run` (não `crewai run`) para simular execução de trigger
|
||||
|
||||
@@ -36,18 +36,28 @@ crew.kickoff({
|
||||
|
||||
The crew inspects file metadata, user activity, and permission changes to produce a compliance-friendly summary.
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testando Localmente
|
||||
|
||||
The [OneDrive examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/onedrive) showcase how to:
|
||||
Teste sua integração de trigger do OneDrive localmente usando a CLI da CrewAI:
|
||||
|
||||
- Parse file metadata, size, and folder paths
|
||||
- Track who created and last modified the file
|
||||
- Highlight permission and external sharing changes
|
||||
```bash
|
||||
# Visualize todos os triggers disponíveis
|
||||
crewai triggers list
|
||||
|
||||
`onedrive-file-crew.py` bundles the analysis and summarization tasks so you can add remediation steps as needed.
|
||||
# Simule um trigger do OneDrive com payload realista
|
||||
crewai triggers run microsoft_onedrive/file_changed
|
||||
```
|
||||
|
||||
O comando `crewai triggers run` executará sua crew com um payload completo do OneDrive, permitindo que você teste sua lógica de parsing antes do deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run microsoft_onedrive/file_changed` (não `crewai run`) para simular execução de trigger durante o desenvolvimento. Após o deployment, sua crew receberá automaticamente o payload do trigger.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Ensure the connected account has permission to read the file metadata included in the webhook
|
||||
- Teste localmente com `crewai triggers run microsoft_onedrive/file_changed` para ver a estrutura exata do payload
|
||||
- If the trigger fires but the payload is missing `permissions`, confirm the site-level sharing settings allow Graph to return this field
|
||||
- For large tenants, filter notifications upstream so the crew only runs on relevant directories
|
||||
- Lembre-se: use `crewai triggers run` (não `crewai run`) para simular execução de trigger
|
||||
|
||||
@@ -36,17 +36,28 @@ crew.kickoff({
|
||||
|
||||
The crew extracts sender details, subject, body preview, and attachments before generating a structured response.
|
||||
|
||||
## Sample payloads & crews
|
||||
## Testando Localmente
|
||||
|
||||
Review the [Outlook examples](https://github.com/crewAIInc/crewai-enterprise-trigger-examples/tree/main/outlook) for two common scenarios:
|
||||
Teste sua integração de trigger do Outlook localmente usando a CLI da CrewAI:
|
||||
|
||||
- `new-message.json` → new mail notifications parsed by `outlook-message-crew.py`
|
||||
- `event-removed.json` → calendar cleanup handled by `outlook-event-removal-crew.py`
|
||||
```bash
|
||||
# Visualize todos os triggers disponíveis
|
||||
crewai triggers list
|
||||
|
||||
Each crew demonstrates how to handle Microsoft Graph payloads, normalize headers, and keep humans in-the-loop with concise summaries.
|
||||
# Simule um trigger do Outlook com payload realista
|
||||
crewai triggers run microsoft_outlook/email_received
|
||||
```
|
||||
|
||||
O comando `crewai triggers run` executará sua crew com um payload completo do Outlook, permitindo que você teste sua lógica de parsing antes do deployment.
|
||||
|
||||
<Warning>
|
||||
Use `crewai triggers run microsoft_outlook/email_received` (não `crewai run`) para simular execução de trigger durante o desenvolvimento. Após o deployment, sua crew receberá automaticamente o payload do trigger.
|
||||
</Warning>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Verify the Outlook connector is still authorized; the subscription must be renewed periodically
|
||||
- Teste localmente com `crewai triggers run microsoft_outlook/email_received` para ver a estrutura exata do payload
|
||||
- If attachments are missing, confirm the webhook subscription includes the `includeResourceData` flag
|
||||
- Review execution logs when events fail to match—cancellation payloads lack attendee lists by design and the crew should account for that
|
||||
- Lembre-se: use `crewai triggers run` (não `crewai run`) para simular execução de trigger
|
||||
|
||||
232
docs/pt-BR/mcp/dsl-integration.mdx
Normal file
232
docs/pt-BR/mcp/dsl-integration.mdx
Normal file
@@ -0,0 +1,232 @@
|
||||
---
|
||||
title: Integração DSL MCP
|
||||
description: Aprenda a usar a sintaxe DSL simples do CrewAI para integrar servidores MCP diretamente com seus agentes usando o campo mcps.
|
||||
icon: code
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
## Visão Geral
|
||||
|
||||
A integração DSL (Domain Specific Language) MCP do CrewAI oferece a **forma mais simples** de conectar seus agentes aos servidores MCP (Model Context Protocol). Basta adicionar um campo `mcps` ao seu agente e o CrewAI cuida de toda a complexidade automaticamente.
|
||||
|
||||
<Info>
|
||||
Esta é a **abordagem recomendada** para a maioria dos casos de uso de MCP. Para cenários avançados que requerem gerenciamento manual de conexão, veja [MCPServerAdapter](/pt-BR/mcp/overview#advanced-mcpserveradapter).
|
||||
</Info>
|
||||
|
||||
## Uso Básico
|
||||
|
||||
Adicione servidores MCP ao seu agente usando o campo `mcps`:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="Assistente de Pesquisa",
|
||||
goal="Ajudar com tarefas de pesquisa e análise",
|
||||
backstory="Assistente especialista com acesso a ferramentas avançadas de pesquisa",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave&profile=pesquisa"
|
||||
]
|
||||
)
|
||||
|
||||
# As ferramentas MCP agora estão automaticamente disponíveis!
|
||||
# Não é necessário gerenciamento manual de conexão ou configuração de ferramentas
|
||||
```
|
||||
|
||||
## Formatos de Referência Suportados
|
||||
|
||||
### Servidores MCP Remotos Externos
|
||||
|
||||
```python
|
||||
# Servidor HTTPS básico
|
||||
"https://api.example.com/mcp"
|
||||
|
||||
# Servidor com autenticação
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave&profile=seu_perfil"
|
||||
|
||||
# Servidor com caminho personalizado
|
||||
"https://services.company.com/api/v1/mcp"
|
||||
```
|
||||
|
||||
### Seleção de Ferramentas Específicas
|
||||
|
||||
Use a sintaxe `#` para selecionar ferramentas específicas de um servidor:
|
||||
|
||||
```python
|
||||
# Obter apenas a ferramenta de previsão do servidor meteorológico
|
||||
"https://weather.api.com/mcp#get_forecast"
|
||||
|
||||
# Obter apenas a ferramenta de busca do Exa
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave#web_search_exa"
|
||||
```
|
||||
|
||||
### Marketplace CrewAI AMP
|
||||
|
||||
Acesse ferramentas do marketplace CrewAI AMP:
|
||||
|
||||
```python
|
||||
# Serviço completo com todas as ferramentas
|
||||
"crewai-amp:financial-data"
|
||||
|
||||
# Ferramenta específica do serviço AMP
|
||||
"crewai-amp:research-tools#pubmed_search"
|
||||
|
||||
# Múltiplos serviços AMP
|
||||
mcps=[
|
||||
"crewai-amp:weather-insights",
|
||||
"crewai-amp:market-analysis",
|
||||
"crewai-amp:social-media-monitoring"
|
||||
]
|
||||
```
|
||||
|
||||
## Exemplo Completo
|
||||
|
||||
Aqui está um exemplo completo usando múltiplos servidores MCP:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, Process
|
||||
|
||||
# Criar agente com múltiplas fontes MCP
|
||||
agente_multi_fonte = Agent(
|
||||
role="Analista de Pesquisa Multi-Fonte",
|
||||
goal="Conduzir pesquisa abrangente usando múltiplas fontes de dados",
|
||||
backstory="""Pesquisador especialista com acesso a busca web, dados meteorológicos,
|
||||
informações financeiras e ferramentas de pesquisa acadêmica""",
|
||||
mcps=[
|
||||
# Servidores MCP externos
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave_exa&profile=pesquisa",
|
||||
"https://weather.api.com/mcp#get_current_conditions",
|
||||
|
||||
# Marketplace CrewAI AMP
|
||||
"crewai-amp:financial-insights",
|
||||
"crewai-amp:academic-research#pubmed_search",
|
||||
"crewai-amp:market-intelligence#competitor_analysis"
|
||||
]
|
||||
)
|
||||
|
||||
# Criar tarefa de pesquisa abrangente
|
||||
tarefa_pesquisa = Task(
|
||||
description="""Pesquisar o impacto dos agentes de IA na produtividade empresarial.
|
||||
Incluir impactos climáticos atuais no trabalho remoto, tendências do mercado financeiro,
|
||||
e publicações acadêmicas recentes sobre frameworks de agentes de IA.""",
|
||||
expected_output="""Relatório abrangente cobrindo:
|
||||
1. Análise do impacto dos agentes de IA nos negócios
|
||||
2. Considerações climáticas para trabalho remoto
|
||||
3. Tendências do mercado financeiro relacionadas à IA
|
||||
4. Citações e insights de pesquisa acadêmica
|
||||
5. Análise do cenário competitivo""",
|
||||
agent=agente_multi_fonte
|
||||
)
|
||||
|
||||
# Criar e executar crew
|
||||
crew_pesquisa = Crew(
|
||||
agents=[agente_multi_fonte],
|
||||
tasks=[tarefa_pesquisa],
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
resultado = crew_pesquisa.kickoff()
|
||||
print(f"Pesquisa concluída com {len(agente_multi_fonte.mcps)} fontes de dados MCP")
|
||||
```
|
||||
|
||||
## Recursos Principais
|
||||
|
||||
- 🔄 **Descoberta Automática de Ferramentas**: Ferramentas são descobertas e integradas automaticamente
|
||||
- 🏷️ **Prevenção de Colisão de Nomes**: Nomes de servidor são prefixados aos nomes das ferramentas
|
||||
- ⚡ **Otimizado para Performance**: Conexões sob demanda com cache de esquemas
|
||||
- 🛡️ **Resiliência a Erros**: Tratamento gracioso de servidores indisponíveis
|
||||
- ⏱️ **Proteção por Timeout**: Timeouts integrados previnem conexões travadas
|
||||
- 📊 **Integração Transparente**: Funciona perfeitamente com recursos existentes do CrewAI
|
||||
|
||||
## Tratamento de Erros
|
||||
|
||||
A integração DSL MCP é projetada para ser resiliente:
|
||||
|
||||
```python
|
||||
agente = Agent(
|
||||
role="Agente Resiliente",
|
||||
goal="Continuar trabalhando apesar de problemas no servidor",
|
||||
backstory="Agente que lida graciosamente com falhas",
|
||||
mcps=[
|
||||
"https://servidor-confiavel.com/mcp", # Vai funcionar
|
||||
"https://servidor-inalcancavel.com/mcp", # Será ignorado graciosamente
|
||||
"https://servidor-lento.com/mcp", # Timeout gracioso
|
||||
"crewai-amp:servico-funcionando" # Vai funcionar
|
||||
]
|
||||
)
|
||||
# O agente usará ferramentas de servidores funcionais e registrará avisos para os que falharem
|
||||
```
|
||||
|
||||
## Recursos de Performance
|
||||
|
||||
### Cache Automático
|
||||
|
||||
Esquemas de ferramentas são cacheados por 5 minutos para melhorar a performance:
|
||||
|
||||
```python
|
||||
# Primeira criação de agente - descobre ferramentas do servidor
|
||||
agente1 = Agent(role="Primeiro", goal="Teste", backstory="Teste",
|
||||
mcps=["https://api.example.com/mcp"])
|
||||
|
||||
# Segunda criação de agente (dentro de 5 minutos) - usa esquemas cacheados
|
||||
agente2 = Agent(role="Segundo", goal="Teste", backstory="Teste",
|
||||
mcps=["https://api.example.com/mcp"]) # Muito mais rápido!
|
||||
```
|
||||
|
||||
### Conexões Sob Demanda
|
||||
|
||||
Conexões de ferramentas são estabelecidas apenas quando as ferramentas são realmente usadas:
|
||||
|
||||
```python
|
||||
# Criação do agente é rápida - nenhuma conexão MCP feita ainda
|
||||
agente = Agent(
|
||||
role="Agente Sob Demanda",
|
||||
goal="Usar ferramentas eficientemente",
|
||||
backstory="Agente eficiente que conecta apenas quando necessário",
|
||||
mcps=["https://api.example.com/mcp"]
|
||||
)
|
||||
|
||||
# Conexão MCP é feita apenas quando uma ferramenta é realmente executada
|
||||
# Isso minimiza o overhead de conexão e melhora a performance de inicialização
|
||||
```
|
||||
|
||||
## Melhores Práticas
|
||||
|
||||
### 1. Use Ferramentas Específicas Quando Possível
|
||||
|
||||
```python
|
||||
# Bom - obter apenas as ferramentas necessárias
|
||||
mcps=["https://weather.api.com/mcp#get_forecast"]
|
||||
|
||||
# Menos eficiente - obter todas as ferramentas do servidor
|
||||
mcps=["https://weather.api.com/mcp"]
|
||||
```
|
||||
|
||||
### 2. Lidar com Autenticação de Forma Segura
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
# Armazenar chaves API em variáveis de ambiente
|
||||
exa_key = os.getenv("EXA_API_KEY")
|
||||
exa_profile = os.getenv("EXA_PROFILE")
|
||||
|
||||
agente = Agent(
|
||||
role="Agente Seguro",
|
||||
goal="Usar ferramentas MCP com segurança",
|
||||
backstory="Agente consciente da segurança",
|
||||
mcps=[f"https://mcp.exa.ai/mcp?api_key={exa_key}&profile={exa_profile}"]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Planejar para Falhas de Servidor
|
||||
|
||||
```python
|
||||
# Sempre incluir opções de backup
|
||||
mcps=[
|
||||
"https://api-principal.com/mcp", # Escolha principal
|
||||
"https://api-backup.com/mcp", # Opção de backup
|
||||
"crewai-amp:servico-confiavel" # Fallback AMP
|
||||
]
|
||||
```
|
||||
@@ -8,12 +8,37 @@ mode: "wide"
|
||||
## Visão Geral
|
||||
|
||||
O [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP) fornece uma maneira padronizada para agentes de IA fornecerem contexto para LLMs comunicando-se com serviços externos, conhecidos como Servidores MCP.
|
||||
A biblioteca `crewai-tools` expande as capacidades do CrewAI permitindo que você integre facilmente ferramentas desses servidores MCP em seus agentes.
|
||||
Isso oferece às suas crews acesso a um vasto ecossistema de funcionalidades.
|
||||
|
||||
O CrewAI oferece **duas abordagens** para integração MCP:
|
||||
|
||||
### 🚀 **Novo: Integração DSL Simples** (Recomendado)
|
||||
|
||||
Use o campo `mcps` diretamente nos agentes para integração perfeita de ferramentas MCP:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="Analista de Pesquisa",
|
||||
goal="Pesquisar e analisar informações",
|
||||
backstory="Pesquisador especialista com acesso a ferramentas externas",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave", # Servidor MCP externo
|
||||
"https://api.weather.com/mcp#get_forecast", # Ferramenta específica do servidor
|
||||
"crewai-amp:financial-data", # Marketplace CrewAI AMP
|
||||
"crewai-amp:research-tools#pubmed_search" # Ferramenta AMP específica
|
||||
]
|
||||
)
|
||||
# Ferramentas MCP agora estão automaticamente disponíveis para seu agente!
|
||||
```
|
||||
|
||||
### 🔧 **Avançado: MCPServerAdapter** (Para Cenários Complexos)
|
||||
|
||||
Para casos de uso avançados que requerem gerenciamento manual de conexão, a biblioteca `crewai-tools` fornece a classe `MCPServerAdapter`.
|
||||
|
||||
Atualmente, suportamos os seguintes mecanismos de transporte:
|
||||
|
||||
- **Stdio**: para servidores locais (comunicação via entrada/saída padrão entre processos na mesma máquina)
|
||||
- **HTTPS**: para servidores remotos (comunicação segura via HTTPS)
|
||||
- **Server-Sent Events (SSE)**: para servidores remotos (transmissão de dados unidirecional em tempo real do servidor para o cliente via HTTP)
|
||||
- **Streamable HTTP**: para servidores remotos (comunicação flexível e potencialmente bidirecional via HTTP, geralmente utilizando SSE para streams do servidor para o cliente)
|
||||
|
||||
|
||||
@@ -12,10 +12,9 @@ dependencies = [
|
||||
"pytube>=15.0.0",
|
||||
"requests>=2.32.5",
|
||||
"docker>=7.1.0",
|
||||
"crewai==1.0.0a4",
|
||||
"crewai==1.0.0",
|
||||
"lancedb>=0.5.4",
|
||||
"tiktoken>=0.8.0",
|
||||
"stagehand>=0.4.1",
|
||||
"beautifulsoup4>=4.13.4",
|
||||
"pypdf>=5.9.0",
|
||||
"python-docx>=1.2.0",
|
||||
|
||||
@@ -43,9 +43,6 @@ from crewai_tools.tools.contextualai_rerank_tool.contextual_rerank_tool import (
|
||||
from crewai_tools.tools.couchbase_tool.couchbase_tool import (
|
||||
CouchbaseFTSVectorSearchTool,
|
||||
)
|
||||
from crewai_tools.tools.crewai_enterprise_tools.crewai_enterprise_tools import (
|
||||
CrewaiEnterpriseTools,
|
||||
)
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_tools import (
|
||||
CrewaiPlatformTools,
|
||||
)
|
||||
@@ -214,7 +211,6 @@ __all__ = [
|
||||
"ContextualAIQueryTool",
|
||||
"ContextualAIRerankTool",
|
||||
"CouchbaseFTSVectorSearchTool",
|
||||
"CrewaiEnterpriseTools",
|
||||
"CrewaiPlatformTools",
|
||||
"DOCXSearchTool",
|
||||
"DallETool",
|
||||
@@ -291,4 +287,4 @@ __all__ = [
|
||||
"ZapierActionTools",
|
||||
]
|
||||
|
||||
__version__ = "1.0.0a4"
|
||||
__version__ = "1.0.0"
|
||||
|
||||
@@ -3,13 +3,16 @@
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Any, TypeAlias, TypedDict
|
||||
import uuid
|
||||
|
||||
from crewai.rag.config.types import RagConfigType
|
||||
from crewai.rag.config.utils import get_rag_client
|
||||
from crewai.rag.core.base_client import BaseClient
|
||||
from crewai.rag.factory import create_client
|
||||
from crewai.rag.qdrant.config import QdrantConfig
|
||||
from crewai.rag.types import BaseRecord, SearchResult
|
||||
from pydantic import PrivateAttr
|
||||
from qdrant_client.models import VectorParams
|
||||
from typing_extensions import Unpack
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
@@ -52,7 +55,11 @@ class CrewAIRagAdapter(Adapter):
|
||||
self._client = create_client(self.config)
|
||||
else:
|
||||
self._client = get_rag_client()
|
||||
self._client.get_or_create_collection(collection_name=self.collection_name)
|
||||
collection_params: dict[str, Any] = {"collection_name": self.collection_name}
|
||||
if isinstance(self.config, QdrantConfig) and self.config.vectors_config:
|
||||
if isinstance(self.config.vectors_config, VectorParams):
|
||||
collection_params["vectors_config"] = self.config.vectors_config
|
||||
self._client.get_or_create_collection(**collection_params)
|
||||
|
||||
def query(
|
||||
self,
|
||||
@@ -76,6 +83,8 @@ class CrewAIRagAdapter(Adapter):
|
||||
if similarity_threshold is not None
|
||||
else self.similarity_threshold
|
||||
)
|
||||
if self._client is None:
|
||||
raise ValueError("Client is not initialized")
|
||||
|
||||
results: list[SearchResult] = self._client.search(
|
||||
collection_name=self.collection_name,
|
||||
@@ -201,9 +210,10 @@ class CrewAIRagAdapter(Adapter):
|
||||
if isinstance(arg, dict):
|
||||
file_metadata.update(arg.get("metadata", {}))
|
||||
|
||||
chunk_id = hashlib.sha256(
|
||||
chunk_hash = hashlib.sha256(
|
||||
f"{file_result.doc_id}_{chunk_idx}_{file_chunk}".encode()
|
||||
).hexdigest()
|
||||
chunk_id = str(uuid.UUID(chunk_hash[:32]))
|
||||
|
||||
documents.append(
|
||||
{
|
||||
@@ -251,9 +261,10 @@ class CrewAIRagAdapter(Adapter):
|
||||
if isinstance(arg, dict):
|
||||
chunk_metadata.update(arg.get("metadata", {}))
|
||||
|
||||
chunk_id = hashlib.sha256(
|
||||
chunk_hash = hashlib.sha256(
|
||||
f"{loader_result.doc_id}_{i}_{chunk}".encode()
|
||||
).hexdigest()
|
||||
chunk_id = str(uuid.UUID(chunk_hash[:32]))
|
||||
|
||||
documents.append(
|
||||
{
|
||||
@@ -264,6 +275,8 @@ class CrewAIRagAdapter(Adapter):
|
||||
)
|
||||
|
||||
if documents:
|
||||
if self._client is None:
|
||||
raise ValueError("Client is not initialized")
|
||||
self._client.add_documents(
|
||||
collection_name=self.collection_name, documents=documents
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Literal, Optional, Union, cast, get_origin
|
||||
from typing import Any, Literal, Optional, Union, _SpecialForm, cast, get_origin
|
||||
import warnings
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
@@ -41,7 +41,7 @@ class EnterpriseActionTool(BaseTool):
|
||||
action_schema: dict[str, Any],
|
||||
enterprise_api_base_url: str | None = None,
|
||||
):
|
||||
self._model_registry = {}
|
||||
self._model_registry = {} # type: ignore[var-annotated]
|
||||
self._base_name = self._sanitize_name(name)
|
||||
|
||||
schema_props, required = self._extract_schema_info(action_schema)
|
||||
@@ -67,7 +67,7 @@ class EnterpriseActionTool(BaseTool):
|
||||
# Create the model
|
||||
if field_definitions:
|
||||
try:
|
||||
args_schema = create_model(
|
||||
args_schema = create_model( # type: ignore[call-overload]
|
||||
f"{self._base_name}Schema", **field_definitions
|
||||
)
|
||||
except Exception:
|
||||
@@ -110,7 +110,9 @@ class EnterpriseActionTool(BaseTool):
|
||||
)
|
||||
return schema_props, required
|
||||
|
||||
def _process_schema_type(self, schema: dict[str, Any], type_name: str) -> type[Any]:
|
||||
def _process_schema_type(
|
||||
self, schema: dict[str, Any], type_name: str
|
||||
) -> type[Any] | _SpecialForm:
|
||||
"""Process a JSON schema and return appropriate Python type."""
|
||||
if "anyOf" in schema:
|
||||
any_of_types = schema["anyOf"]
|
||||
@@ -139,7 +141,7 @@ class EnterpriseActionTool(BaseTool):
|
||||
if json_type == "array":
|
||||
items_schema = schema.get("items", {"type": "string"})
|
||||
item_type = self._process_schema_type(items_schema, f"{type_name}Item")
|
||||
return list[item_type]
|
||||
return list[item_type] # type: ignore[valid-type]
|
||||
|
||||
if json_type == "object":
|
||||
return self._create_nested_model(schema, type_name)
|
||||
@@ -174,18 +176,20 @@ class EnterpriseActionTool(BaseTool):
|
||||
prop_type = str
|
||||
|
||||
field_definitions[prop_name] = self._create_field_definition(
|
||||
prop_type, is_required, prop_desc
|
||||
prop_type,
|
||||
is_required,
|
||||
prop_desc, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
try:
|
||||
nested_model = create_model(full_model_name, **field_definitions)
|
||||
nested_model = create_model(full_model_name, **field_definitions) # type: ignore[call-overload]
|
||||
self._model_registry[full_model_name] = nested_model
|
||||
return nested_model
|
||||
except Exception:
|
||||
return dict
|
||||
|
||||
def _create_field_definition(
|
||||
self, field_type: type[Any], is_required: bool, description: str
|
||||
self, field_type: type[Any] | _SpecialForm, is_required: bool, description: str
|
||||
) -> tuple:
|
||||
"""Create Pydantic field definition based on type and requirement."""
|
||||
if is_required:
|
||||
@@ -276,7 +280,7 @@ class EnterpriseActionKitToolAdapter:
|
||||
):
|
||||
"""Initialize the adapter with an enterprise action token."""
|
||||
self._set_enterprise_action_token(enterprise_action_token)
|
||||
self._actions_schema = {}
|
||||
self._actions_schema = {} # type: ignore[var-annotated]
|
||||
self._tools = None
|
||||
self.enterprise_api_base_url = (
|
||||
enterprise_api_base_url or get_enterprise_api_base_url()
|
||||
|
||||
@@ -2,8 +2,11 @@ from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from lancedb import DBConnection as LanceDBConnection, connect as lancedb_connect
|
||||
from lancedb.table import Table as LanceDBTable
|
||||
from lancedb import ( # type: ignore[import-untyped]
|
||||
DBConnection as LanceDBConnection,
|
||||
connect as lancedb_connect,
|
||||
)
|
||||
from lancedb.table import Table as LanceDBTable # type: ignore[import-untyped]
|
||||
from openai import Client as OpenAIClient
|
||||
from pydantic import Field, PrivateAttr
|
||||
|
||||
@@ -37,7 +40,7 @@ class LanceDBAdapter(Adapter):
|
||||
|
||||
super().model_post_init(__context)
|
||||
|
||||
def query(self, question: str) -> str:
|
||||
def query(self, question: str) -> str: # type: ignore[override]
|
||||
query = self.embedding_function([question])[0]
|
||||
results = (
|
||||
self._table.search(query, vector_column_name=self.vector_column_name)
|
||||
|
||||
@@ -27,7 +27,7 @@ class RAGAdapter(Adapter):
|
||||
embedding_config=embedding_config,
|
||||
)
|
||||
|
||||
def query(self, question: str) -> str:
|
||||
def query(self, question: str) -> str: # type: ignore[override]
|
||||
return self._adapter.query(question)
|
||||
|
||||
def add(
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import Generic, TypeVar
|
||||
|
||||
@@ -29,7 +31,7 @@ class ToolCollection(list, Generic[T]):
|
||||
def _build_name_cache(self) -> None:
|
||||
self._name_cache = {tool.name.lower(): tool for tool in self}
|
||||
|
||||
def __getitem__(self, key: int | str) -> T:
|
||||
def __getitem__(self, key: int | str) -> T: # type: ignore[override]
|
||||
if isinstance(key, str):
|
||||
return self._name_cache[key.lower()]
|
||||
return super().__getitem__(key)
|
||||
@@ -38,11 +40,11 @@ class ToolCollection(list, Generic[T]):
|
||||
super().append(tool)
|
||||
self._name_cache[tool.name.lower()] = tool
|
||||
|
||||
def extend(self, tools: list[T]) -> None:
|
||||
def extend(self, tools: list[T]) -> None: # type: ignore[override]
|
||||
super().extend(tools)
|
||||
self._build_name_cache()
|
||||
|
||||
def insert(self, index: int, tool: T) -> None:
|
||||
def insert(self, index: int, tool: T) -> None: # type: ignore[override]
|
||||
super().insert(index, tool)
|
||||
self._name_cache[tool.name.lower()] = tool
|
||||
|
||||
@@ -51,13 +53,13 @@ class ToolCollection(list, Generic[T]):
|
||||
if tool.name.lower() in self._name_cache:
|
||||
del self._name_cache[tool.name.lower()]
|
||||
|
||||
def pop(self, index: int = -1) -> T:
|
||||
def pop(self, index: int = -1) -> T: # type: ignore[override]
|
||||
tool = super().pop(index)
|
||||
if tool.name.lower() in self._name_cache:
|
||||
del self._name_cache[tool.name.lower()]
|
||||
return tool
|
||||
|
||||
def filter_by_names(self, names: list[str] | None = None) -> "ToolCollection[T]":
|
||||
def filter_by_names(self, names: list[str] | None = None) -> ToolCollection[T]:
|
||||
if names is None:
|
||||
return self
|
||||
|
||||
@@ -69,7 +71,7 @@ class ToolCollection(list, Generic[T]):
|
||||
]
|
||||
)
|
||||
|
||||
def filter_where(self, func: Callable[[T], bool]) -> "ToolCollection[T]":
|
||||
def filter_where(self, func: Callable[[T], bool]) -> ToolCollection[T]:
|
||||
return ToolCollection([tool for tool in self if func(tool)])
|
||||
|
||||
def clear(self) -> None:
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import Final, Literal
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import Field, create_model
|
||||
import requests
|
||||
|
||||
|
||||
ACTIONS_URL = "https://actions.zapier.com/api/v2/ai-actions"
|
||||
ACTIONS_URL: Final[Literal["https://actions.zapier.com/api/v2/ai-actions"]] = (
|
||||
"https://actions.zapier.com/api/v2/ai-actions"
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -55,8 +58,6 @@ class ZapierActionTool(BaseTool):
|
||||
class ZapierActionsAdapter:
|
||||
"""Adapter for Zapier Actions."""
|
||||
|
||||
api_key: str
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
self.api_key = api_key or os.getenv("ZAPIER_API_KEY")
|
||||
if not self.api_key:
|
||||
@@ -77,7 +78,7 @@ class ZapierActionsAdapter:
|
||||
|
||||
return response.json()
|
||||
|
||||
def tools(self) -> list[BaseTool]:
|
||||
def tools(self) -> list[ZapierActionTool]:
|
||||
"""Convert Zapier actions to BaseTool instances."""
|
||||
actions_response = self.get_zapier_actions()
|
||||
tools = []
|
||||
@@ -91,12 +92,12 @@ class ZapierActionsAdapter:
|
||||
)
|
||||
|
||||
params = action.get("params", {})
|
||||
args_fields = {}
|
||||
|
||||
args_fields["instructions"] = (
|
||||
str,
|
||||
Field(description="Instructions for how to execute this action"),
|
||||
)
|
||||
args_fields = {
|
||||
"instructions": (
|
||||
str,
|
||||
Field(description="Instructions for how to execute this action"),
|
||||
)
|
||||
}
|
||||
|
||||
for param_name, param_info in params.items():
|
||||
field_type = (
|
||||
@@ -112,7 +113,7 @@ class ZapierActionsAdapter:
|
||||
Field(description=field_description),
|
||||
)
|
||||
|
||||
args_schema = create_model(f"{tool_name.title()}Schema", **args_fields)
|
||||
args_schema = create_model(f"{tool_name.title()}Schema", **args_fields) # type: ignore[call-overload]
|
||||
|
||||
tool = ZapierActionTool(
|
||||
name=tool_name,
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from .bedrock import (
|
||||
from crewai_tools.aws.bedrock import (
|
||||
BedrockInvokeAgentTool,
|
||||
BedrockKBRetrieverTool,
|
||||
create_browser_toolkit,
|
||||
create_code_interpreter_toolkit,
|
||||
)
|
||||
from .s3 import S3ReaderTool, S3WriterTool
|
||||
from crewai_tools.aws.s3 import S3ReaderTool, S3WriterTool
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from .agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
from .browser import create_browser_toolkit
|
||||
from .code_interpreter import create_code_interpreter_toolkit
|
||||
from .knowledge_base.retriever_tool import BedrockKBRetrieverTool
|
||||
from crewai_tools.aws.bedrock.agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
from crewai_tools.aws.bedrock.browser import create_browser_toolkit
|
||||
from crewai_tools.aws.bedrock.code_interpreter import create_code_interpreter_toolkit
|
||||
from crewai_tools.aws.bedrock.knowledge_base.retriever_tool import (
|
||||
BedrockKBRetrieverTool,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .invoke_agent_tool import BedrockInvokeAgentTool
|
||||
from crewai_tools.aws.bedrock.agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
|
||||
|
||||
__all__ = ["BedrockInvokeAgentTool"]
|
||||
|
||||
@@ -7,7 +7,10 @@ from crewai.tools import BaseTool
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..exceptions import BedrockAgentError, BedrockValidationError
|
||||
from crewai_tools.aws.bedrock.exceptions import (
|
||||
BedrockAgentError,
|
||||
BedrockValidationError,
|
||||
)
|
||||
|
||||
|
||||
# Load environment variables from .env file
|
||||
@@ -24,9 +27,9 @@ class BedrockInvokeAgentTool(BaseTool):
|
||||
name: str = "Bedrock Agent Invoke Tool"
|
||||
description: str = "An agent responsible for policy analysis."
|
||||
args_schema: type[BaseModel] = BedrockInvokeAgentToolInput
|
||||
agent_id: str = None
|
||||
agent_alias_id: str = None
|
||||
session_id: str = None
|
||||
agent_id: str | None = None
|
||||
agent_alias_id: str | None = None
|
||||
session_id: str | None = None
|
||||
enable_trace: bool = False
|
||||
end_session: bool = False
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["boto3"])
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
from .browser_toolkit import BrowserToolkit, create_browser_toolkit
|
||||
from crewai_tools.aws.bedrock.browser.browser_toolkit import (
|
||||
BrowserToolkit,
|
||||
create_browser_toolkit,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["BrowserToolkit", "create_browser_toolkit"]
|
||||
|
||||
@@ -9,8 +9,10 @@ from urllib.parse import urlparse
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .browser_session_manager import BrowserSessionManager
|
||||
from .utils import aget_current_page, get_current_page
|
||||
from crewai_tools.aws.bedrock.browser.browser_session_manager import (
|
||||
BrowserSessionManager,
|
||||
)
|
||||
from crewai_tools.aws.bedrock.browser.utils import aget_current_page, get_current_page
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -80,9 +82,9 @@ class CurrentWebPageToolInput(BaseModel):
|
||||
class BrowserBaseTool(BaseTool):
|
||||
"""Base class for browser tools."""
|
||||
|
||||
def __init__(self, session_manager: BrowserSessionManager):
|
||||
def __init__(self, session_manager: BrowserSessionManager): # type: ignore[call-arg]
|
||||
"""Initialize with a session manager."""
|
||||
super().__init__()
|
||||
super().__init__() # type: ignore[call-arg]
|
||||
self._session_manager = session_manager
|
||||
|
||||
if self._is_in_asyncio_loop() and hasattr(self, "_arun"):
|
||||
@@ -91,7 +93,7 @@ class BrowserBaseTool(BaseTool):
|
||||
# Override _run to use _arun when in an asyncio loop
|
||||
def patched_run(*args, **kwargs):
|
||||
try:
|
||||
import nest_asyncio
|
||||
import nest_asyncio # type: ignore[import-untyped]
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
nest_asyncio.apply(loop)
|
||||
@@ -101,7 +103,7 @@ class BrowserBaseTool(BaseTool):
|
||||
except Exception as e:
|
||||
return f"Error in patched _run: {e!s}"
|
||||
|
||||
self._run = patched_run
|
||||
self._run = patched_run # type: ignore[method-assign]
|
||||
|
||||
async def get_async_page(self, thread_id: str) -> Any:
|
||||
"""Get or create a page for the specified thread."""
|
||||
@@ -356,7 +358,7 @@ class ExtractHyperlinksTool(BrowserBaseTool):
|
||||
for link in soup.find_all("a", href=True):
|
||||
text = link.get_text().strip()
|
||||
href = link["href"]
|
||||
if href.startswith(("http", "https")):
|
||||
if href.startswith(("http", "https")): # type: ignore[union-attr]
|
||||
links.append({"text": text, "url": href})
|
||||
|
||||
if not links:
|
||||
@@ -388,7 +390,7 @@ class ExtractHyperlinksTool(BrowserBaseTool):
|
||||
for link in soup.find_all("a", href=True):
|
||||
text = link.get_text().strip()
|
||||
href = link["href"]
|
||||
if href.startswith(("http", "https")):
|
||||
if href.startswith(("http", "https")): # type: ignore[union-attr]
|
||||
links.append({"text": text, "url": href})
|
||||
|
||||
if not links:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .code_interpreter_toolkit import (
|
||||
from crewai_tools.aws.bedrock.code_interpreter.code_interpreter_toolkit import (
|
||||
CodeInterpreterToolkit,
|
||||
create_code_interpreter_toolkit,
|
||||
)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
from .retriever_tool import BedrockKBRetrieverTool
|
||||
from crewai_tools.aws.bedrock.knowledge_base.retriever_tool import (
|
||||
BedrockKBRetrieverTool,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["BedrockKBRetrieverTool"]
|
||||
|
||||
@@ -6,7 +6,10 @@ from crewai.tools import BaseTool
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..exceptions import BedrockKnowledgeBaseError, BedrockValidationError
|
||||
from crewai_tools.aws.bedrock.exceptions import (
|
||||
BedrockKnowledgeBaseError,
|
||||
BedrockValidationError,
|
||||
)
|
||||
|
||||
|
||||
# Load environment variables from .env file
|
||||
@@ -27,7 +30,7 @@ class BedrockKBRetrieverTool(BaseTool):
|
||||
"Retrieves information from an Amazon Bedrock Knowledge Base given a query"
|
||||
)
|
||||
args_schema: type[BaseModel] = BedrockKBRetrieverToolInput
|
||||
knowledge_base_id: str = None
|
||||
knowledge_base_id: str = None # type: ignore[assignment]
|
||||
number_of_results: int | None = 5
|
||||
retrieval_configuration: dict[str, Any] | None = None
|
||||
guardrail_configuration: dict[str, Any] | None = None
|
||||
@@ -55,7 +58,7 @@ class BedrockKBRetrieverTool(BaseTool):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Get knowledge_base_id from environment variable if not provided
|
||||
self.knowledge_base_id = knowledge_base_id or os.getenv("BEDROCK_KB_ID")
|
||||
self.knowledge_base_id = knowledge_base_id or os.getenv("BEDROCK_KB_ID") # type: ignore[assignment]
|
||||
self.number_of_results = number_of_results
|
||||
self.guardrail_configuration = guardrail_configuration
|
||||
self.next_token = next_token
|
||||
@@ -239,7 +242,7 @@ class BedrockKBRetrieverTool(BaseTool):
|
||||
if results:
|
||||
response_object["results"] = results
|
||||
else:
|
||||
response_object["message"] = "No results found for the given query."
|
||||
response_object["message"] = "No results found for the given query." # type: ignore[assignment]
|
||||
|
||||
if "nextToken" in response:
|
||||
response_object["nextToken"] = response["nextToken"]
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
from .reader_tool import S3ReaderTool as S3ReaderTool
|
||||
from .writer_tool import S3WriterTool as S3WriterTool
|
||||
from crewai_tools.aws.s3.reader_tool import S3ReaderTool as S3ReaderTool
|
||||
from crewai_tools.aws.s3.writer_tool import S3WriterTool as S3WriterTool
|
||||
|
||||
@@ -17,14 +17,15 @@ class LoaderResult(BaseModel):
|
||||
|
||||
|
||||
class BaseLoader(ABC):
|
||||
def __init__(self, config: dict[str, Any] | None = None):
|
||||
def __init__(self, config: dict[str, Any] | None = None) -> None:
|
||||
self.config = config or {}
|
||||
|
||||
@abstractmethod
|
||||
def load(self, content: SourceContent, **kwargs) -> LoaderResult: ...
|
||||
|
||||
@staticmethod
|
||||
def generate_doc_id(
|
||||
self, source_ref: str | None = None, content: str | None = None
|
||||
source_ref: str | None = None, content: str | None = None
|
||||
) -> str:
|
||||
"""Generate a unique document id based on the source reference and content.
|
||||
If the source reference is not provided, the content is used as the source reference.
|
||||
|
||||
@@ -10,7 +10,7 @@ class RecursiveCharacterTextSplitter:
|
||||
chunk_overlap: int = 200,
|
||||
separators: list[str] | None = None,
|
||||
keep_separator: bool = True,
|
||||
):
|
||||
) -> None:
|
||||
"""Initialize the RecursiveCharacterTextSplitter.
|
||||
|
||||
Args:
|
||||
@@ -36,6 +36,14 @@ class RecursiveCharacterTextSplitter:
|
||||
]
|
||||
|
||||
def split_text(self, text: str) -> list[str]:
|
||||
"""Split the input text into chunks.
|
||||
|
||||
Args:
|
||||
text: The text to split.
|
||||
|
||||
Returns:
|
||||
A list of text chunks.
|
||||
"""
|
||||
return self._split_text(text, self._separators)
|
||||
|
||||
def _split_text(self, text: str, separators: list[str]) -> list[str]:
|
||||
@@ -99,8 +107,8 @@ class RecursiveCharacterTextSplitter:
|
||||
|
||||
def _merge_splits(self, splits: list[str], separator: str) -> list[str]:
|
||||
"""Merge splits into chunks with proper overlap."""
|
||||
docs = []
|
||||
current_doc = []
|
||||
docs: list[str] = []
|
||||
current_doc: list[str] = []
|
||||
total = 0
|
||||
|
||||
for split in splits:
|
||||
@@ -152,7 +160,7 @@ class BaseChunker:
|
||||
chunk_overlap: int = 200,
|
||||
separators: list[str] | None = None,
|
||||
keep_separator: bool = True,
|
||||
):
|
||||
) -> None:
|
||||
"""Initialize the Chunker.
|
||||
|
||||
Args:
|
||||
@@ -169,6 +177,14 @@ class BaseChunker:
|
||||
)
|
||||
|
||||
def chunk(self, text: str) -> list[str]:
|
||||
"""Chunk the input text into smaller pieces.
|
||||
|
||||
Args:
|
||||
text: The text to chunk.
|
||||
|
||||
Returns:
|
||||
A list of text chunks.
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
|
||||
@@ -4,12 +4,12 @@ from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
import chromadb
|
||||
import litellm
|
||||
from pydantic import BaseModel, Field, PrivateAttr
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader
|
||||
from crewai_tools.rag.chunkers.base_chunker import BaseChunker
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
from crewai_tools.rag.embedding_service import EmbeddingService
|
||||
from crewai_tools.rag.misc import compute_sha256
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
from crewai_tools.tools.rag.rag_tool import Adapter
|
||||
@@ -18,31 +18,6 @@ from crewai_tools.tools.rag.rag_tool import Adapter
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
def __init__(self, model: str = "text-embedding-3-small", **kwargs):
|
||||
self.model = model
|
||||
self.kwargs = kwargs
|
||||
|
||||
def embed_text(self, text: str) -> list[float]:
|
||||
try:
|
||||
response = litellm.embedding(model=self.model, input=[text], **self.kwargs)
|
||||
return response.data[0]["embedding"]
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating embedding: {e}")
|
||||
raise
|
||||
|
||||
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
try:
|
||||
response = litellm.embedding(model=self.model, input=texts, **self.kwargs)
|
||||
return [data["embedding"] for data in response.data]
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating batch embeddings: {e}")
|
||||
raise
|
||||
|
||||
|
||||
class Document(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
content: str
|
||||
@@ -54,6 +29,7 @@ class Document(BaseModel):
|
||||
class RAG(Adapter):
|
||||
collection_name: str = "crewai_knowledge_base"
|
||||
persist_directory: str | None = None
|
||||
embedding_provider: str = "openai"
|
||||
embedding_model: str = "text-embedding-3-large"
|
||||
summarize: bool = False
|
||||
top_k: int = 5
|
||||
@@ -79,7 +55,9 @@ class RAG(Adapter):
|
||||
)
|
||||
|
||||
self._embedding_service = EmbeddingService(
|
||||
model=self.embedding_model, **self.embedding_config
|
||||
provider=self.embedding_provider,
|
||||
model=self.embedding_model,
|
||||
**self.embedding_config,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize ChromaDB: {e}")
|
||||
@@ -181,7 +159,7 @@ class RAG(Adapter):
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add documents to ChromaDB: {e}")
|
||||
|
||||
def query(self, question: str, where: dict[str, Any] | None = None) -> str:
|
||||
def query(self, question: str, where: dict[str, Any] | None = None) -> str: # type: ignore
|
||||
try:
|
||||
question_embedding = self._embedding_service.embed_text(question)
|
||||
|
||||
@@ -240,8 +218,9 @@ class RAG(Adapter):
|
||||
logger.error(f"Failed to get collection info: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
@staticmethod
|
||||
def _get_data_type(
|
||||
self, content: SourceContent, data_type: str | DataType | None = None
|
||||
content: SourceContent, data_type: str | DataType | None = None
|
||||
) -> DataType:
|
||||
try:
|
||||
if isinstance(data_type, str):
|
||||
|
||||
@@ -116,7 +116,7 @@ class DataTypes:
|
||||
if isinstance(content, str):
|
||||
try:
|
||||
url = urlparse(content)
|
||||
is_url = (url.scheme and url.netloc) or url.scheme == "file"
|
||||
is_url = bool(url.scheme and url.netloc) or url.scheme == "file"
|
||||
except Exception: # noqa: S110
|
||||
pass
|
||||
|
||||
|
||||
511
lib/crewai-tools/src/crewai_tools/rag/embedding_service.py
Normal file
511
lib/crewai-tools/src/crewai_tools/rag/embedding_service.py
Normal file
@@ -0,0 +1,511 @@
|
||||
"""
|
||||
Enhanced embedding service that leverages CrewAI's existing embedding providers.
|
||||
This replaces the litellm-based EmbeddingService with a more flexible architecture.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingConfig(BaseModel):
|
||||
"""Configuration for embedding providers."""
|
||||
|
||||
provider: str = Field(description="Embedding provider name")
|
||||
model: str = Field(description="Model name to use")
|
||||
api_key: str | None = Field(default=None, description="API key for the provider")
|
||||
timeout: float | None = Field(
|
||||
default=30.0, description="Request timeout in seconds"
|
||||
)
|
||||
max_retries: int = Field(default=3, description="Maximum number of retries")
|
||||
batch_size: int = Field(
|
||||
default=100, description="Batch size for processing multiple texts"
|
||||
)
|
||||
extra_config: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Additional provider-specific configuration"
|
||||
)
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
"""
|
||||
Enhanced embedding service that uses CrewAI's existing embedding providers.
|
||||
|
||||
Supports multiple providers:
|
||||
- openai: OpenAI embeddings (text-embedding-3-small, text-embedding-3-large, etc.)
|
||||
- voyageai: Voyage AI embeddings (voyage-2, voyage-large-2, etc.)
|
||||
- cohere: Cohere embeddings (embed-english-v3.0, embed-multilingual-v3.0, etc.)
|
||||
- google-generativeai: Google Gemini embeddings (models/embedding-001, etc.)
|
||||
- google-vertex: Google Vertex embeddings (models/embedding-001, etc.)
|
||||
- huggingface: Hugging Face embeddings (sentence-transformers/all-MiniLM-L6-v2, etc.)
|
||||
- jina: Jina embeddings (jina-embeddings-v2-base-en, etc.)
|
||||
- ollama: Ollama embeddings (nomic-embed-text, etc.)
|
||||
- openai: OpenAI embeddings (text-embedding-3-small, text-embedding-3-large, etc.)
|
||||
- roboflow: Roboflow embeddings (roboflow-embeddings-v2-base-en, etc.)
|
||||
- voyageai: Voyage AI embeddings (voyage-2, voyage-large-2, etc.)
|
||||
- watsonx: Watson X embeddings (ibm/slate-125m-english-rtrvr, etc.)
|
||||
- custom: Custom embeddings (embedding_callable, etc.)
|
||||
- sentence-transformer: Sentence Transformers embeddings (all-MiniLM-L6-v2, etc.)
|
||||
- text2vec: Text2Vec embeddings (text2vec-base-en, etc.)
|
||||
- openclip: OpenClip embeddings (openclip-large-v2, etc.)
|
||||
- instructor: Instructor embeddings (hkunlp/instructor-large, etc.)
|
||||
- onnx: ONNX embeddings (onnx-large-v2, etc.)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
provider: str = "openai",
|
||||
model: str = "text-embedding-3-small",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
Initialize the embedding service.
|
||||
|
||||
Args:
|
||||
provider: The embedding provider to use
|
||||
model: The model name
|
||||
api_key: API key (if not provided, will look for environment variables)
|
||||
**kwargs: Additional configuration options
|
||||
"""
|
||||
self.config = EmbeddingConfig(
|
||||
provider=provider,
|
||||
model=model,
|
||||
api_key=api_key or self._get_default_api_key(provider),
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self._embedding_function = None
|
||||
self._initialize_embedding_function()
|
||||
|
||||
@staticmethod
|
||||
def _get_default_api_key(provider: str) -> str | None:
|
||||
"""Get default API key from environment variables."""
|
||||
env_key_map = {
|
||||
"azure": "AZURE_OPENAI_API_KEY",
|
||||
"amazon-bedrock": "AWS_ACCESS_KEY_ID", # or AWS_PROFILE
|
||||
"cohere": "COHERE_API_KEY",
|
||||
"google-generativeai": "GOOGLE_API_KEY",
|
||||
"google-vertex": "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
"huggingface": "HUGGINGFACE_API_KEY",
|
||||
"jina": "JINA_API_KEY",
|
||||
"ollama": None, # Ollama typically runs locally without API key
|
||||
"openai": "OPENAI_API_KEY",
|
||||
"roboflow": "ROBOFLOW_API_KEY",
|
||||
"voyageai": "VOYAGE_API_KEY",
|
||||
"watsonx": "WATSONX_API_KEY",
|
||||
}
|
||||
|
||||
env_key = env_key_map.get(provider)
|
||||
if env_key:
|
||||
return os.getenv(env_key)
|
||||
return None
|
||||
|
||||
def _initialize_embedding_function(self):
|
||||
"""Initialize the embedding function using CrewAI's factory."""
|
||||
try:
|
||||
from crewai.rag.embeddings.factory import build_embedder
|
||||
|
||||
# Build the configuration for CrewAI's factory
|
||||
config = self._build_provider_config()
|
||||
|
||||
# Create the embedding function
|
||||
self._embedding_function = build_embedder(config)
|
||||
|
||||
logger.info(
|
||||
f"Initialized {self.config.provider} embedding service with model "
|
||||
f"{self.config.model}"
|
||||
)
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
f"CrewAI embedding providers not available. "
|
||||
f"Make sure crewai is installed: {e}"
|
||||
) from e
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize embedding function: {e}")
|
||||
raise RuntimeError(
|
||||
f"Failed to initialize {self.config.provider} embedding service: {e}"
|
||||
) from e
|
||||
|
||||
def _build_provider_config(self) -> dict[str, Any]:
|
||||
"""Build configuration dictionary for CrewAI's embedding factory."""
|
||||
base_config = {"provider": self.config.provider, "config": {}}
|
||||
|
||||
# Provider-specific configuration mapping
|
||||
if self.config.provider == "openai":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "azure":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "voyageai":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model": self.config.model,
|
||||
"max_retries": self.config.max_retries,
|
||||
"timeout": self.config.timeout,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "cohere":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider in ["google-generativeai", "google-vertex"]:
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "amazon-bedrock":
|
||||
base_config["config"] = {
|
||||
"aws_access_key_id": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "huggingface":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "jina":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "ollama":
|
||||
base_config["config"] = {
|
||||
"model": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "sentence-transformer":
|
||||
base_config["config"] = {
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "instructor":
|
||||
base_config["config"] = {
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "onnx":
|
||||
base_config["config"] = {
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "roboflow":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "openclip":
|
||||
base_config["config"] = {
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "text2vec":
|
||||
base_config["config"] = {
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "watsonx":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "custom":
|
||||
# Custom provider requires embedding_callable in extra_config
|
||||
base_config["config"] = {
|
||||
**self.config.extra_config,
|
||||
}
|
||||
else:
|
||||
# Generic configuration for any unlisted providers
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
|
||||
return base_config
|
||||
|
||||
def embed_text(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding for a single text.
|
||||
|
||||
Args:
|
||||
text: Text to embed
|
||||
|
||||
Returns:
|
||||
List of floats representing the embedding
|
||||
|
||||
Raises:
|
||||
RuntimeError: If embedding generation fails
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
logger.warning("Empty text provided for embedding")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Use ChromaDB's embedding function interface
|
||||
embeddings = self._embedding_function([text]) # type: ignore
|
||||
return embeddings[0] if embeddings else []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating embedding for text: {e}")
|
||||
raise RuntimeError(f"Failed to generate embedding: {e}") from e
|
||||
|
||||
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of embedding vectors
|
||||
|
||||
Raises:
|
||||
RuntimeError: If embedding generation fails
|
||||
"""
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
# Filter out empty texts
|
||||
valid_texts = [text for text in texts if text and text.strip()]
|
||||
if not valid_texts:
|
||||
logger.warning("No valid texts provided for batch embedding")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Process in batches to avoid API limits
|
||||
all_embeddings = []
|
||||
|
||||
for i in range(0, len(valid_texts), self.config.batch_size):
|
||||
batch = valid_texts[i : i + self.config.batch_size]
|
||||
batch_embeddings = self._embedding_function(batch) # type: ignore
|
||||
all_embeddings.extend(batch_embeddings)
|
||||
|
||||
return all_embeddings
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating batch embeddings: {e}")
|
||||
raise RuntimeError(f"Failed to generate batch embeddings: {e}") from e
|
||||
|
||||
def get_embedding_dimension(self) -> int | None:
|
||||
"""
|
||||
Get the dimension of embeddings produced by this service.
|
||||
|
||||
Returns:
|
||||
Embedding dimension or None if unknown
|
||||
"""
|
||||
# Try to get dimension by generating a test embedding
|
||||
try:
|
||||
test_embedding = self.embed_text("test")
|
||||
return len(test_embedding) if test_embedding else None
|
||||
except Exception:
|
||||
logger.warning("Could not determine embedding dimension")
|
||||
return None
|
||||
|
||||
def validate_connection(self) -> bool:
|
||||
"""
|
||||
Validate that the embedding service is working correctly.
|
||||
|
||||
Returns:
|
||||
True if the service is working, False otherwise
|
||||
"""
|
||||
try:
|
||||
test_embedding = self.embed_text("test connection")
|
||||
return len(test_embedding) > 0
|
||||
except Exception as e:
|
||||
logger.error(f"Connection validation failed: {e}")
|
||||
return False
|
||||
|
||||
def get_service_info(self) -> dict[str, Any]:
|
||||
"""
|
||||
Get information about the current embedding service.
|
||||
|
||||
Returns:
|
||||
Dictionary with service information
|
||||
"""
|
||||
return {
|
||||
"provider": self.config.provider,
|
||||
"model": self.config.model,
|
||||
"embedding_dimension": self.get_embedding_dimension(),
|
||||
"batch_size": self.config.batch_size,
|
||||
"is_connected": self.validate_connection(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def list_supported_providers(cls) -> list[str]:
|
||||
"""
|
||||
List all supported embedding providers.
|
||||
|
||||
Returns:
|
||||
List of supported provider names
|
||||
"""
|
||||
return [
|
||||
"azure",
|
||||
"amazon-bedrock",
|
||||
"cohere",
|
||||
"custom",
|
||||
"google-generativeai",
|
||||
"google-vertex",
|
||||
"huggingface",
|
||||
"instructor",
|
||||
"jina",
|
||||
"ollama",
|
||||
"onnx",
|
||||
"openai",
|
||||
"openclip",
|
||||
"roboflow",
|
||||
"sentence-transformer",
|
||||
"text2vec",
|
||||
"voyageai",
|
||||
"watsonx",
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def create_openai_service(
|
||||
cls,
|
||||
model: str = "text-embedding-3-small",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an OpenAI embedding service."""
|
||||
return cls(provider="openai", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_voyage_service(
|
||||
cls, model: str = "voyage-2", api_key: str | None = None, **kwargs: Any
|
||||
) -> EmbeddingService:
|
||||
"""Create a Voyage AI embedding service."""
|
||||
return cls(provider="voyageai", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_cohere_service(
|
||||
cls,
|
||||
model: str = "embed-english-v3.0",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Cohere embedding service."""
|
||||
return cls(provider="cohere", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_gemini_service(
|
||||
cls,
|
||||
model: str = "models/embedding-001",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Google Gemini embedding service."""
|
||||
return cls(
|
||||
provider="google-generativeai", model=model, api_key=api_key, **kwargs
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def create_azure_service(
|
||||
cls,
|
||||
model: str = "text-embedding-ada-002",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an Azure OpenAI embedding service."""
|
||||
return cls(provider="azure", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_bedrock_service(
|
||||
cls,
|
||||
model: str = "amazon.titan-embed-text-v1",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an Amazon Bedrock embedding service."""
|
||||
return cls(provider="amazon-bedrock", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_huggingface_service(
|
||||
cls,
|
||||
model: str = "sentence-transformers/all-MiniLM-L6-v2",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Hugging Face embedding service."""
|
||||
return cls(provider="huggingface", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_sentence_transformer_service(
|
||||
cls,
|
||||
model: str = "all-MiniLM-L6-v2",
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Sentence Transformers embedding service (local)."""
|
||||
return cls(provider="sentence-transformer", model=model, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_ollama_service(
|
||||
cls,
|
||||
model: str = "nomic-embed-text",
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an Ollama embedding service (local)."""
|
||||
return cls(provider="ollama", model=model, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_jina_service(
|
||||
cls,
|
||||
model: str = "jina-embeddings-v2-base-en",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Jina AI embedding service."""
|
||||
return cls(provider="jina", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_instructor_service(
|
||||
cls,
|
||||
model: str = "hkunlp/instructor-large",
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an Instructor embedding service."""
|
||||
return cls(provider="instructor", model=model, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_watsonx_service(
|
||||
cls,
|
||||
model: str = "ibm/slate-125m-english-rtrvr",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Watson X embedding service."""
|
||||
return cls(provider="watsonx", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_custom_service(
|
||||
cls,
|
||||
embedding_callable: Any,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a custom embedding service with your own embedding function."""
|
||||
return cls(
|
||||
provider="custom",
|
||||
model="custom",
|
||||
extra_config={"embedding_callable": embedding_callable},
|
||||
**kwargs,
|
||||
)
|
||||
@@ -2,41 +2,30 @@ import csv
|
||||
from io import StringIO
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.loaders.utils import load_from_url
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class CSVLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
source_ref = source_content.source_ref
|
||||
|
||||
content_str = source_content.source
|
||||
if source_content.is_url():
|
||||
content_str = self._load_from_url(content_str, kwargs)
|
||||
content_str = load_from_url(
|
||||
content_str,
|
||||
kwargs,
|
||||
accept_header="text/csv, application/csv, text/plain",
|
||||
loader_name="CSVLoader",
|
||||
)
|
||||
elif source_content.path_exists():
|
||||
content_str = self._load_from_file(content_str)
|
||||
|
||||
return self._parse_csv(content_str, source_ref)
|
||||
|
||||
def _load_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": "text/csv, application/csv, text/plain",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools CSVLoader)",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching CSV from URL {url}: {e!s}") from e
|
||||
|
||||
def _load_from_file(self, path: str) -> str:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
@staticmethod
|
||||
def _load_from_file(path: str) -> str:
|
||||
with open(path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
def _parse_csv(self, content: str, source_ref: str) -> LoaderResult:
|
||||
|
||||
@@ -6,11 +6,11 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class DirectoryLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and process all files from a directory recursively.
|
||||
|
||||
Args:
|
||||
source: Directory path or URL to a directory listing
|
||||
source_content: Directory path or URL to a directory listing
|
||||
**kwargs: Additional options:
|
||||
- recursive: bool (default True) - Whether to search recursively
|
||||
- include_extensions: list - Only include files with these extensions
|
||||
@@ -33,16 +33,16 @@ class DirectoryLoader(BaseLoader):
|
||||
return self._process_directory(source_ref, kwargs)
|
||||
|
||||
def _process_directory(self, dir_path: str, kwargs: dict) -> LoaderResult:
|
||||
recursive = kwargs.get("recursive", True)
|
||||
include_extensions = kwargs.get("include_extensions", None)
|
||||
exclude_extensions = kwargs.get("exclude_extensions", None)
|
||||
max_files = kwargs.get("max_files", None)
|
||||
recursive: bool = kwargs.get("recursive", True)
|
||||
include_extensions: list[str] | None = kwargs.get("include_extensions", None)
|
||||
exclude_extensions: list[str] | None = kwargs.get("exclude_extensions", None)
|
||||
max_files: int | None = kwargs.get("max_files", None)
|
||||
|
||||
files = self._find_files(
|
||||
dir_path, recursive, include_extensions, exclude_extensions
|
||||
)
|
||||
|
||||
if max_files and len(files) > max_files:
|
||||
if max_files is not None and len(files) > max_files:
|
||||
files = files[:max_files]
|
||||
|
||||
all_contents = []
|
||||
@@ -115,8 +115,8 @@ class DirectoryLoader(BaseLoader):
|
||||
|
||||
return sorted(files)
|
||||
|
||||
@staticmethod
|
||||
def _should_include_file(
|
||||
self,
|
||||
filename: str,
|
||||
include_ext: list[str] | None = None,
|
||||
exclude_ext: list[str] | None = None,
|
||||
@@ -141,7 +141,8 @@ class DirectoryLoader(BaseLoader):
|
||||
|
||||
return True
|
||||
|
||||
def _process_single_file(self, file_path: str) -> LoaderResult:
|
||||
@staticmethod
|
||||
def _process_single_file(file_path: str) -> LoaderResult:
|
||||
from crewai_tools.rag.data_types import DataTypes
|
||||
|
||||
data_type = DataTypes.from_content(Path(file_path))
|
||||
|
||||
@@ -12,7 +12,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class DocsSiteLoader(BaseLoader):
|
||||
"""Loader for documentation websites."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load content from a documentation site.
|
||||
|
||||
Args:
|
||||
@@ -40,7 +40,6 @@ class DocsSiteLoader(BaseLoader):
|
||||
title = soup.find("title")
|
||||
title_text = title.get_text(strip=True) if title else "Documentation"
|
||||
|
||||
main_content = None
|
||||
for selector in [
|
||||
"main",
|
||||
"article",
|
||||
@@ -82,8 +81,10 @@ class DocsSiteLoader(BaseLoader):
|
||||
if nav:
|
||||
links = nav.find_all("a", href=True)
|
||||
for link in links[:20]:
|
||||
href = link["href"]
|
||||
if not href.startswith(("http://", "https://", "mailto:", "#")):
|
||||
href = link.get("href", "")
|
||||
if isinstance(href, str) and not href.startswith(
|
||||
("http://", "https://", "mailto:", "#")
|
||||
):
|
||||
full_url = urljoin(docs_url, href)
|
||||
nav_links.append(f"- {link.get_text(strip=True)}: {full_url}")
|
||||
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class DOCXLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
try:
|
||||
from docx import Document as DocxDocument
|
||||
except ImportError as e:
|
||||
@@ -29,9 +32,8 @@ class DOCXLoader(BaseLoader):
|
||||
f"Source must be a valid file path or URL, got: {source_content.source}"
|
||||
)
|
||||
|
||||
def _download_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
@staticmethod
|
||||
def _download_from_url(url: str, kwargs: dict) -> str:
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
@@ -49,13 +51,13 @@ class DOCXLoader(BaseLoader):
|
||||
temp_file.write(response.content)
|
||||
return temp_file.name
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching DOCX from URL {url}: {e!s}") from e
|
||||
raise ValueError(f"Error fetching content from URL {url}: {e!s}") from e
|
||||
|
||||
def _load_from_file(
|
||||
self,
|
||||
file_path: str,
|
||||
source_ref: str,
|
||||
DocxDocument, # noqa: N803
|
||||
DocxDocument: Any, # noqa: N803
|
||||
) -> LoaderResult:
|
||||
try:
|
||||
doc = DocxDocument(file_path)
|
||||
|
||||
@@ -9,7 +9,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class GithubLoader(BaseLoader):
|
||||
"""Loader for GitHub repository content."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load content from a GitHub repository.
|
||||
|
||||
Args:
|
||||
@@ -54,9 +54,7 @@ class GithubLoader(BaseLoader):
|
||||
try:
|
||||
readme = repo.get_readme()
|
||||
all_content.append("README:")
|
||||
all_content.append(
|
||||
readme.decoded_content.decode("utf-8", errors="ignore")
|
||||
)
|
||||
all_content.append(readme.decoded_content.decode(errors="ignore"))
|
||||
all_content.append("")
|
||||
except GithubException:
|
||||
pass
|
||||
|
||||
@@ -1,52 +1,30 @@
|
||||
import json
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.loaders.utils import load_from_url
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class JSONLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
source_ref = source_content.source_ref
|
||||
content = source_content.source
|
||||
|
||||
if source_content.is_url():
|
||||
content = self._load_from_url(source_ref, kwargs)
|
||||
content = load_from_url(
|
||||
source_ref,
|
||||
kwargs,
|
||||
accept_header="application/json",
|
||||
loader_name="JSONLoader",
|
||||
)
|
||||
elif source_content.path_exists():
|
||||
content = self._load_from_file(source_ref)
|
||||
|
||||
return self._parse_json(content, source_ref)
|
||||
|
||||
def _load_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools JSONLoader)",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return (
|
||||
response.text
|
||||
if not self._is_json_response(response)
|
||||
else json.dumps(response.json(), indent=2)
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching JSON from URL {url}: {e!s}") from e
|
||||
|
||||
def _is_json_response(self, response) -> bool:
|
||||
try:
|
||||
response.json()
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def _load_from_file(self, path: str) -> str:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
@staticmethod
|
||||
def _load_from_file(path: str) -> str:
|
||||
with open(path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
def _parse_json(self, content: str, source_ref: str) -> LoaderResult:
|
||||
|
||||
@@ -1,61 +1,55 @@
|
||||
import re
|
||||
from typing import Final
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.loaders.utils import load_from_url
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
_IMPORT_PATTERN: Final[re.Pattern[str]] = re.compile(r"^import\s+.*?\n", re.MULTILINE)
|
||||
_EXPORT_PATTERN: Final[re.Pattern[str]] = re.compile(
|
||||
r"^export\s+.*?(?:\n|$)", re.MULTILINE
|
||||
)
|
||||
_JSX_TAG_PATTERN: Final[re.Pattern[str]] = re.compile(r"<[^>]+>")
|
||||
_EXTRA_NEWLINES_PATTERN: Final[re.Pattern[str]] = re.compile(r"\n\s*\n\s*\n")
|
||||
|
||||
|
||||
class MDXLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
source_ref = source_content.source_ref
|
||||
content = source_content.source
|
||||
|
||||
if source_content.is_url():
|
||||
content = self._load_from_url(source_ref, kwargs)
|
||||
content = load_from_url(
|
||||
source_ref,
|
||||
kwargs,
|
||||
accept_header="text/markdown, text/x-markdown, text/plain",
|
||||
loader_name="MDXLoader",
|
||||
)
|
||||
elif source_content.path_exists():
|
||||
content = self._load_from_file(source_ref)
|
||||
|
||||
return self._parse_mdx(content, source_ref)
|
||||
|
||||
def _load_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": "text/markdown, text/x-markdown, text/plain",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools MDXLoader)",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching MDX from URL {url}: {e!s}") from e
|
||||
|
||||
def _load_from_file(self, path: str) -> str:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
@staticmethod
|
||||
def _load_from_file(path: str) -> str:
|
||||
with open(path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
def _parse_mdx(self, content: str, source_ref: str) -> LoaderResult:
|
||||
cleaned_content = content
|
||||
|
||||
# Remove import statements
|
||||
cleaned_content = re.sub(
|
||||
r"^import\s+.*?\n", "", cleaned_content, flags=re.MULTILINE
|
||||
)
|
||||
cleaned_content = _IMPORT_PATTERN.sub("", cleaned_content)
|
||||
|
||||
# Remove export statements
|
||||
cleaned_content = re.sub(
|
||||
r"^export\s+.*?(?:\n|$)", "", cleaned_content, flags=re.MULTILINE
|
||||
)
|
||||
cleaned_content = _EXPORT_PATTERN.sub("", cleaned_content)
|
||||
|
||||
# Remove JSX tags (simple approach)
|
||||
cleaned_content = re.sub(r"<[^>]+>", "", cleaned_content)
|
||||
cleaned_content = _JSX_TAG_PATTERN.sub("", cleaned_content)
|
||||
|
||||
# Clean up extra whitespace
|
||||
cleaned_content = re.sub(r"\n\s*\n\s*\n", "\n\n", cleaned_content)
|
||||
cleaned_content = _EXTRA_NEWLINES_PATTERN.sub("\n\n", cleaned_content)
|
||||
cleaned_content = cleaned_content.strip()
|
||||
|
||||
metadata = {"format": "mdx"}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
"""MySQL database loader."""
|
||||
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pymysql
|
||||
from pymysql import Error, connect
|
||||
from pymysql.cursors import DictCursor
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
@@ -11,7 +13,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class MySQLLoader(BaseLoader):
|
||||
"""Loader for MySQL database content."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs: Any) -> LoaderResult: # type: ignore[override]
|
||||
"""Load content from a MySQL database table.
|
||||
|
||||
Args:
|
||||
@@ -40,14 +42,14 @@ class MySQLLoader(BaseLoader):
|
||||
"password": parsed.password,
|
||||
"database": parsed.path.lstrip("/") if parsed.path else None,
|
||||
"charset": "utf8mb4",
|
||||
"cursorclass": pymysql.cursors.DictCursor,
|
||||
"cursorclass": DictCursor,
|
||||
}
|
||||
|
||||
if not connection_params["database"]:
|
||||
raise ValueError("Database name is required in the URI")
|
||||
|
||||
try:
|
||||
connection = pymysql.connect(**connection_params)
|
||||
connection = connect(**connection_params)
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(query)
|
||||
@@ -94,7 +96,7 @@ class MySQLLoader(BaseLoader):
|
||||
)
|
||||
finally:
|
||||
connection.close()
|
||||
except pymysql.Error as e:
|
||||
except Error as e:
|
||||
raise ValueError(f"MySQL database error: {e}") from e
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to load data from MySQL: {e}") from e
|
||||
|
||||
@@ -11,7 +11,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class PDFLoader(BaseLoader):
|
||||
"""Loader for PDF files."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and extract text from a PDF file.
|
||||
|
||||
Args:
|
||||
@@ -28,7 +28,7 @@ class PDFLoader(BaseLoader):
|
||||
import pypdf
|
||||
except ImportError:
|
||||
try:
|
||||
import PyPDF2 as pypdf # noqa: N813
|
||||
import PyPDF2 as pypdf # type: ignore[import-not-found,no-redef] # noqa: N813
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"PDF support requires pypdf or PyPDF2. Install with: uv add pypdf"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import psycopg2
|
||||
from psycopg2 import Error, connect
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
@@ -12,7 +12,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class PostgresLoader(BaseLoader):
|
||||
"""Loader for PostgreSQL database content."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load content from a PostgreSQL database table.
|
||||
|
||||
Args:
|
||||
@@ -47,7 +47,7 @@ class PostgresLoader(BaseLoader):
|
||||
raise ValueError("Database name is required in the URI")
|
||||
|
||||
try:
|
||||
connection = psycopg2.connect(**connection_params)
|
||||
connection = connect(**connection_params)
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(query)
|
||||
@@ -94,7 +94,7 @@ class PostgresLoader(BaseLoader):
|
||||
)
|
||||
finally:
|
||||
connection.close()
|
||||
except psycopg2.Error as e:
|
||||
except Error as e:
|
||||
raise ValueError(f"PostgreSQL database error: {e}") from e
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to load data from PostgreSQL: {e}") from e
|
||||
|
||||
@@ -3,14 +3,14 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class TextFileLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
source_ref = source_content.source_ref
|
||||
if not source_content.path_exists():
|
||||
raise FileNotFoundError(
|
||||
f"The following file does not exist: {source_content.source}"
|
||||
)
|
||||
|
||||
with open(source_content.source, "r", encoding="utf-8") as file:
|
||||
with open(source_content.source, encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
|
||||
return LoaderResult(
|
||||
@@ -21,7 +21,7 @@ class TextFileLoader(BaseLoader):
|
||||
|
||||
|
||||
class TextLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
return LoaderResult(
|
||||
content=source_content.source,
|
||||
source=source_content.source_ref,
|
||||
|
||||
36
lib/crewai-tools/src/crewai_tools/rag/loaders/utils.py
Normal file
36
lib/crewai-tools/src/crewai_tools/rag/loaders/utils.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""Utility functions for RAG loaders."""
|
||||
|
||||
|
||||
def load_from_url(
|
||||
url: str, kwargs: dict, accept_header: str = "*/*", loader_name: str = "Loader"
|
||||
) -> str:
|
||||
"""Load content from a URL.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch content from
|
||||
kwargs: Additional keyword arguments (can include 'headers' override)
|
||||
accept_header: The Accept header value for the request
|
||||
loader_name: The name of the loader for the User-Agent header
|
||||
|
||||
Returns:
|
||||
The text content from the URL
|
||||
|
||||
Raises:
|
||||
ValueError: If there's an error fetching the URL
|
||||
"""
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": accept_header,
|
||||
"User-Agent": f"Mozilla/5.0 (compatible; crewai-tools {loader_name})",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching content from URL {url}: {e!s}") from e
|
||||
@@ -1,4 +1,5 @@
|
||||
import re
|
||||
from typing import Final
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
@@ -7,8 +8,12 @@ from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
_SPACES_PATTERN: Final[re.Pattern[str]] = re.compile(r"[ \t]+")
|
||||
_NEWLINE_PATTERN: Final[re.Pattern[str]] = re.compile(r"\s+\n\s+")
|
||||
|
||||
|
||||
class WebPageLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
url = source_content.source
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
@@ -29,8 +34,8 @@ class WebPageLoader(BaseLoader):
|
||||
script.decompose()
|
||||
|
||||
text = soup.get_text(" ")
|
||||
text = re.sub("[ \t]+", " ", text)
|
||||
text = re.sub("\\s+\n\\s+", "\n", text)
|
||||
text = _SPACES_PATTERN.sub(" ", text)
|
||||
text = _NEWLINE_PATTERN.sub("\n", text)
|
||||
text = text.strip()
|
||||
|
||||
title = (
|
||||
|
||||
@@ -1,49 +1,48 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Any
|
||||
from xml.etree.ElementTree import ParseError, fromstring, parse
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.loaders.utils import load_from_url
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class XMLLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs: Any) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and parse XML content from various sources.
|
||||
|
||||
Args:
|
||||
source_content: SourceContent: The source content to load.
|
||||
**kwargs: Additional keyword arguments for loading from URL.
|
||||
|
||||
Returns:
|
||||
LoaderResult: The result of loading and parsing the XML content.
|
||||
"""
|
||||
source_ref = source_content.source_ref
|
||||
content = source_content.source
|
||||
|
||||
if source_content.is_url():
|
||||
content = self._load_from_url(source_ref, kwargs)
|
||||
content = load_from_url(
|
||||
source_ref,
|
||||
kwargs,
|
||||
accept_header="application/xml, text/xml, text/plain",
|
||||
loader_name="XMLLoader",
|
||||
)
|
||||
elif source_content.path_exists():
|
||||
content = self._load_from_file(source_ref)
|
||||
|
||||
return self._parse_xml(content, source_ref)
|
||||
|
||||
def _load_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": "application/xml, text/xml, text/plain",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools XMLLoader)",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching XML from URL {url}: {e!s}") from e
|
||||
|
||||
def _load_from_file(self, path: str) -> str:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
@staticmethod
|
||||
def _load_from_file(path: str) -> str:
|
||||
with open(path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
def _parse_xml(self, content: str, source_ref: str) -> LoaderResult:
|
||||
try:
|
||||
if content.strip().startswith("<"):
|
||||
root = ET.fromstring(content) # noqa: S314
|
||||
root = fromstring(content) # noqa: S314
|
||||
else:
|
||||
root = ET.parse(source_ref).getroot() # noqa: S314
|
||||
root = parse(source_ref).getroot() # noqa: S314
|
||||
|
||||
text_parts = []
|
||||
for text_content in root.itertext():
|
||||
@@ -52,7 +51,7 @@ class XMLLoader(BaseLoader):
|
||||
|
||||
text = "\n".join(text_parts)
|
||||
metadata = {"format": "xml", "root_tag": root.tag}
|
||||
except ET.ParseError as e:
|
||||
except ParseError as e:
|
||||
text = content
|
||||
metadata = {"format": "xml", "parse_error": str(e)}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class YoutubeChannelLoader(BaseLoader):
|
||||
"""Loader for YouTube channels."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and extract content from a YouTube channel.
|
||||
|
||||
Args:
|
||||
@@ -24,7 +24,7 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
ValueError: If the URL is not a valid YouTube channel URL
|
||||
"""
|
||||
try:
|
||||
from pytube import Channel
|
||||
from pytube import Channel # type: ignore[import-untyped]
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"YouTube channel support requires pytube. Install with: uv add pytube"
|
||||
@@ -89,7 +89,6 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
try:
|
||||
api = YouTubeTranscriptApi()
|
||||
transcript_list = api.list(video_id)
|
||||
transcript = None
|
||||
|
||||
try:
|
||||
transcript = transcript_list.find_transcript(["en"])
|
||||
@@ -101,7 +100,7 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
transcript = next(iter(transcript_list), None)
|
||||
transcript = next(iter(transcript_list))
|
||||
|
||||
if transcript:
|
||||
transcript_data = transcript.fetch()
|
||||
@@ -148,7 +147,8 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
doc_id=self.generate_doc_id(source_ref=channel_url, content=content),
|
||||
)
|
||||
|
||||
def _extract_video_id(self, url: str) -> str | None:
|
||||
@staticmethod
|
||||
def _extract_video_id(url: str) -> str | None:
|
||||
"""Extract video ID from YouTube URL."""
|
||||
patterns = [
|
||||
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([^&\n?#]+)",
|
||||
|
||||
@@ -11,7 +11,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class YoutubeVideoLoader(BaseLoader):
|
||||
"""Loader for YouTube videos."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and extract transcript from a YouTube video.
|
||||
|
||||
Args:
|
||||
@@ -48,7 +48,6 @@ class YoutubeVideoLoader(BaseLoader):
|
||||
api = YouTubeTranscriptApi()
|
||||
transcript_list = api.list(video_id)
|
||||
|
||||
transcript = None
|
||||
try:
|
||||
transcript = transcript_list.find_transcript(["en"])
|
||||
except Exception:
|
||||
@@ -72,7 +71,7 @@ class YoutubeVideoLoader(BaseLoader):
|
||||
content = " ".join(text_content)
|
||||
|
||||
try:
|
||||
from pytube import YouTube
|
||||
from pytube import YouTube # type: ignore[import-untyped]
|
||||
|
||||
yt = YouTube(video_url)
|
||||
metadata["title"] = yt.title
|
||||
@@ -103,7 +102,8 @@ class YoutubeVideoLoader(BaseLoader):
|
||||
doc_id=self.generate_doc_id(source_ref=video_url, content=content),
|
||||
)
|
||||
|
||||
def _extract_video_id(self, url: str) -> str | None:
|
||||
@staticmethod
|
||||
def _extract_video_id(url: str) -> str | None:
|
||||
"""Extract video ID from various YouTube URL formats."""
|
||||
patterns = [
|
||||
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([^&\n?#]+)",
|
||||
|
||||
@@ -3,7 +3,15 @@ from typing import Any
|
||||
|
||||
|
||||
def compute_sha256(content: str) -> str:
|
||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
"""Compute the SHA-256 hash of the given content.
|
||||
|
||||
Args:
|
||||
content: The content to hash.
|
||||
|
||||
Returns:
|
||||
The SHA-256 hash of the content as a hexadecimal string.
|
||||
"""
|
||||
return hashlib.sha256(content.encode()).hexdigest()
|
||||
|
||||
|
||||
def sanitize_metadata_for_chromadb(metadata: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import cached_property
|
||||
import os
|
||||
from pathlib import Path
|
||||
@@ -28,7 +30,7 @@ class SourceContent:
|
||||
return os.path.exists(self.source)
|
||||
|
||||
@cached_property
|
||||
def data_type(self) -> "DataType":
|
||||
def data_type(self) -> DataType:
|
||||
from crewai_tools.rag.data_types import DataTypes
|
||||
|
||||
return DataTypes.from_content(self.source)
|
||||
|
||||
@@ -32,9 +32,6 @@ from crewai_tools.tools.contextualai_rerank_tool.contextual_rerank_tool import (
|
||||
from crewai_tools.tools.couchbase_tool.couchbase_tool import (
|
||||
CouchbaseFTSVectorSearchTool,
|
||||
)
|
||||
from crewai_tools.tools.crewai_enterprise_tools.crewai_enterprise_tools import (
|
||||
CrewaiEnterpriseTools,
|
||||
)
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_tools import (
|
||||
CrewaiPlatformTools,
|
||||
)
|
||||
@@ -199,7 +196,6 @@ __all__ = [
|
||||
"ContextualAIQueryTool",
|
||||
"ContextualAIRerankTool",
|
||||
"CouchbaseFTSVectorSearchTool",
|
||||
"CrewaiEnterpriseTools",
|
||||
"CrewaiPlatformTools",
|
||||
"DOCXSearchTool",
|
||||
"DallETool",
|
||||
|
||||
@@ -4,6 +4,7 @@ from typing import Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from openai import OpenAI
|
||||
from openai.types.chat import ChatCompletion
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@@ -30,7 +31,7 @@ class AIMindTool(BaseTool):
|
||||
)
|
||||
args_schema: type[BaseModel] = AIMindToolInputSchema
|
||||
api_key: str | None = None
|
||||
datasources: list[dict[str, Any]] | None = None
|
||||
datasources: list[dict[str, Any]] = Field(default_factory=list)
|
||||
mind_name: str | None = None
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["minds-sdk"])
|
||||
env_vars: list[EnvVar] = Field(
|
||||
@@ -87,10 +88,15 @@ class AIMindTool(BaseTool):
|
||||
base_url=AIMindToolConstants.MINDS_API_BASE_URL, api_key=self.api_key
|
||||
)
|
||||
|
||||
if self.mind_name is None:
|
||||
raise ValueError("Mind name is not set.")
|
||||
|
||||
completion = openai_client.chat.completions.create(
|
||||
model=self.mind_name,
|
||||
messages=[{"role": "user", "content": query}],
|
||||
stream=False,
|
||||
)
|
||||
if not isinstance(completion, ChatCompletion):
|
||||
raise ValueError("Invalid response from AI-Mind")
|
||||
|
||||
return completion.choices[0].message.content
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
@@ -49,7 +51,7 @@ class ApifyActorsTool(BaseTool):
|
||||
print(f"URL: {result['metadata']['url']}")
|
||||
print(f"Content: {result.get('markdown', 'N/A')[:100]}...")
|
||||
"""
|
||||
actor_tool: "_ApifyActorsTool" = Field(description="Apify Actor Tool")
|
||||
actor_tool: _ApifyActorsTool = Field(description="Apify Actor Tool")
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["langchain-apify"])
|
||||
|
||||
def __init__(self, actor_name: str, *args: Any, **kwargs: Any) -> None:
|
||||
|
||||
@@ -36,14 +36,9 @@ class ArxivPaperTool(BaseTool):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["pydantic"])
|
||||
env_vars: list[EnvVar] = Field(default_factory=list)
|
||||
|
||||
def __init__(
|
||||
self, download_pdfs=False, save_dir="./arxiv_pdfs", use_title_as_filename=False
|
||||
):
|
||||
super().__init__()
|
||||
self.download_pdfs = download_pdfs
|
||||
self.save_dir = save_dir
|
||||
self.use_title_as_filename = use_title_as_filename
|
||||
download_pdfs: bool = False
|
||||
save_dir: str = "./arxiv_pdfs"
|
||||
use_title_as_filename: bool = False
|
||||
|
||||
def _run(self, search_query: str, max_results: int = 5) -> str:
|
||||
try:
|
||||
@@ -70,7 +65,7 @@ class ArxivPaperTool(BaseTool):
|
||||
filename = f"{filename_base[:500]}.pdf"
|
||||
save_path = Path(save_dir) / filename
|
||||
|
||||
self.download_pdf(paper["pdf_url"], save_path)
|
||||
self.download_pdf(paper["pdf_url"], save_path) # type: ignore[arg-type]
|
||||
time.sleep(self.SLEEP_DURATION)
|
||||
|
||||
results = [self._format_paper_result(p) for p in papers]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import datetime
|
||||
from datetime import datetime
|
||||
import os
|
||||
import time
|
||||
from typing import Any, ClassVar
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from .brightdata_dataset import BrightDataDatasetTool
|
||||
from .brightdata_serp import BrightDataSearchTool
|
||||
from .brightdata_unlocker import BrightDataWebUnlockerTool
|
||||
from crewai_tools.tools.brightdata_tool.brightdata_dataset import BrightDataDatasetTool
|
||||
from crewai_tools.tools.brightdata_tool.brightdata_serp import BrightDataSearchTool
|
||||
from crewai_tools.tools.brightdata_tool.brightdata_unlocker import (
|
||||
BrightDataWebUnlockerTool,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["BrightDataDatasetTool", "BrightDataSearchTool", "BrightDataWebUnlockerTool"]
|
||||
|
||||
@@ -72,6 +72,6 @@ class BrowserbaseLoadTool(BaseTool):
|
||||
self.proxy = proxy
|
||||
|
||||
def _run(self, url: str):
|
||||
return self.browserbase.load_url(
|
||||
return self.browserbase.load_url( # type: ignore[union-attr]
|
||||
url, self.text_content, self.session_id, self.proxy
|
||||
)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedCodeDocsSearchToolSchema(BaseModel):
|
||||
@@ -38,7 +37,7 @@ class CodeDocsSearchTool(RagTool):
|
||||
def add(self, docs_url: str) -> None:
|
||||
super().add(docs_url, data_type=DataType.DOCS_SITE)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
docs_url: str | None = None,
|
||||
|
||||
@@ -7,18 +7,30 @@ potentially unsafe operations and importing restricted modules.
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import subprocess
|
||||
from types import ModuleType
|
||||
from typing import Any, ClassVar
|
||||
from typing import Any, ClassVar, TypedDict
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from docker import DockerClient, from_env as docker_from_env
|
||||
from docker.errors import ImageNotFound, NotFound
|
||||
from docker.models.containers import Container
|
||||
from docker import ( # type: ignore[import-untyped]
|
||||
DockerClient,
|
||||
from_env as docker_from_env,
|
||||
)
|
||||
from docker.errors import ImageNotFound, NotFound # type: ignore[import-untyped]
|
||||
from docker.models.containers import Container # type: ignore[import-untyped]
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Unpack
|
||||
|
||||
from crewai_tools.printer import Printer
|
||||
|
||||
|
||||
class RunKwargs(TypedDict, total=False):
|
||||
"""Keyword arguments for the _run method."""
|
||||
|
||||
code: str
|
||||
libraries_used: list[str]
|
||||
|
||||
|
||||
class CodeInterpreterSchema(BaseModel):
|
||||
"""Schema for defining inputs to the CodeInterpreterTool.
|
||||
|
||||
@@ -115,14 +127,14 @@ class SandboxPython:
|
||||
return safe_builtins
|
||||
|
||||
@staticmethod
|
||||
def exec(code: str, locals: dict[str, Any]) -> None:
|
||||
def exec(code: str, locals_: dict[str, Any]) -> None:
|
||||
"""Executes Python code in a restricted environment.
|
||||
|
||||
Args:
|
||||
code: The Python code to execute as a string.
|
||||
locals: A dictionary that will be used for local variable storage.
|
||||
locals_: A dictionary that will be used for local variable storage.
|
||||
"""
|
||||
exec(code, {"__builtins__": SandboxPython.safe_builtins()}, locals) # noqa: S102
|
||||
exec(code, {"__builtins__": SandboxPython.safe_builtins()}, locals_) # noqa: S102
|
||||
|
||||
|
||||
class CodeInterpreterTool(BaseTool):
|
||||
@@ -148,8 +160,13 @@ class CodeInterpreterTool(BaseTool):
|
||||
|
||||
Returns:
|
||||
The directory path where the package is installed.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the package cannot be found.
|
||||
"""
|
||||
spec = importlib.util.find_spec("crewai_tools")
|
||||
if spec is None or spec.origin is None:
|
||||
raise RuntimeError("Cannot find crewai_tools package installation path")
|
||||
return os.path.dirname(spec.origin)
|
||||
|
||||
def _verify_docker_image(self) -> None:
|
||||
@@ -189,7 +206,7 @@ class CodeInterpreterTool(BaseTool):
|
||||
rm=True,
|
||||
)
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
def _run(self, **kwargs: Unpack[RunKwargs]) -> str:
|
||||
"""Runs the code interpreter tool with the provided arguments.
|
||||
|
||||
Args:
|
||||
@@ -198,14 +215,18 @@ class CodeInterpreterTool(BaseTool):
|
||||
Returns:
|
||||
The output of the executed code as a string.
|
||||
"""
|
||||
code = kwargs.get("code", self.code)
|
||||
libraries_used = kwargs.get("libraries_used", [])
|
||||
code: str | None = kwargs.get("code", self.code)
|
||||
libraries_used: list[str] = kwargs.get("libraries_used", [])
|
||||
|
||||
if not code:
|
||||
return "No code provided to execute."
|
||||
|
||||
if self.unsafe_mode:
|
||||
return self.run_code_unsafe(code, libraries_used)
|
||||
return self.run_code_safety(code, libraries_used)
|
||||
|
||||
def _install_libraries(self, container: Container, libraries: list[str]) -> None:
|
||||
@staticmethod
|
||||
def _install_libraries(container: Container, libraries: list[str]) -> None:
|
||||
"""Installs required Python libraries in the Docker container.
|
||||
|
||||
Args:
|
||||
@@ -245,7 +266,8 @@ class CodeInterpreterTool(BaseTool):
|
||||
volumes={current_path: {"bind": "/workspace", "mode": "rw"}}, # type: ignore
|
||||
)
|
||||
|
||||
def _check_docker_available(self) -> bool:
|
||||
@staticmethod
|
||||
def _check_docker_available() -> bool:
|
||||
"""Checks if Docker is available and running on the system.
|
||||
|
||||
Attempts to run the 'docker info' command to verify Docker availability.
|
||||
@@ -254,7 +276,6 @@ class CodeInterpreterTool(BaseTool):
|
||||
Returns:
|
||||
True if Docker is available and running, False otherwise.
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
@@ -319,7 +340,8 @@ class CodeInterpreterTool(BaseTool):
|
||||
return f"Something went wrong while running the code: \n{exec_result.output.decode('utf-8')}"
|
||||
return exec_result.output.decode("utf-8")
|
||||
|
||||
def run_code_in_restricted_sandbox(self, code: str) -> str:
|
||||
@staticmethod
|
||||
def run_code_in_restricted_sandbox(code: str) -> str:
|
||||
"""Runs Python code in a restricted sandbox environment.
|
||||
|
||||
Executes the code with restricted access to potentially dangerous modules and
|
||||
@@ -333,14 +355,15 @@ class CodeInterpreterTool(BaseTool):
|
||||
or an error message if execution failed.
|
||||
"""
|
||||
Printer.print("Running code in restricted sandbox", color="yellow")
|
||||
exec_locals = {}
|
||||
exec_locals: dict[str, Any] = {}
|
||||
try:
|
||||
SandboxPython.exec(code=code, locals=exec_locals)
|
||||
SandboxPython.exec(code=code, locals_=exec_locals)
|
||||
return exec_locals.get("result", "No result variable found.")
|
||||
except Exception as e:
|
||||
return f"An error occurred: {e!s}"
|
||||
|
||||
def run_code_unsafe(self, code: str, libraries_used: list[str]) -> str:
|
||||
@staticmethod
|
||||
def run_code_unsafe(code: str, libraries_used: list[str]) -> str:
|
||||
"""Runs code directly on the host machine without any safety restrictions.
|
||||
|
||||
WARNING: This mode is unsafe and should only be used in trusted environments
|
||||
@@ -361,7 +384,7 @@ class CodeInterpreterTool(BaseTool):
|
||||
|
||||
# Execute the code
|
||||
try:
|
||||
exec_locals = {}
|
||||
exec_locals: dict[str, Any] = {}
|
||||
exec(code, {}, exec_locals) # noqa: S102
|
||||
return exec_locals.get("result", "No result variable found.")
|
||||
except Exception as e:
|
||||
|
||||
@@ -124,5 +124,5 @@ class ComposioTool(BaseTool):
|
||||
|
||||
return [
|
||||
cls.from_action(action=action, **kwargs)
|
||||
for action in toolset.find_actions_by_tags(*apps, tags=tags)
|
||||
for action in toolset.find_actions_by_tags(*apps, tags=tags) # type: ignore[arg-type]
|
||||
]
|
||||
|
||||
@@ -82,7 +82,7 @@ class ContextualAIQueryTool(BaseTool):
|
||||
if loop and loop.is_running():
|
||||
# Already inside an event loop
|
||||
try:
|
||||
import nest_asyncio
|
||||
import nest_asyncio # type: ignore[import-untyped]
|
||||
|
||||
nest_asyncio.apply(loop)
|
||||
loop.run_until_complete(
|
||||
|
||||
@@ -4,10 +4,13 @@ from typing import Any
|
||||
|
||||
|
||||
try:
|
||||
from couchbase.cluster import Cluster
|
||||
from couchbase.options import SearchOptions
|
||||
import couchbase.search as search
|
||||
from couchbase.vector_search import VectorQuery, VectorSearch
|
||||
from couchbase.cluster import Cluster # type: ignore[import-untyped]
|
||||
from couchbase.options import SearchOptions # type: ignore[import-untyped]
|
||||
import couchbase.search as search # type: ignore[import-untyped]
|
||||
from couchbase.vector_search import ( # type: ignore[import-untyped]
|
||||
VectorQuery,
|
||||
VectorSearch,
|
||||
)
|
||||
|
||||
COUCHBASE_AVAILABLE = True
|
||||
except ImportError:
|
||||
@@ -38,24 +41,31 @@ class CouchbaseFTSVectorSearchTool(BaseTool):
|
||||
name: str = "CouchbaseFTSVectorSearchTool"
|
||||
description: str = "A tool to search the Couchbase database for relevant information on internal documents."
|
||||
args_schema: type[BaseModel] = CouchbaseToolSchema
|
||||
cluster: SkipValidation[Cluster | None] = None
|
||||
collection_name: str | None = (None,)
|
||||
scope_name: str | None = (None,)
|
||||
bucket_name: str | None = (None,)
|
||||
index_name: str | None = (None,)
|
||||
cluster: SkipValidation[Cluster] = Field(
|
||||
description="An instance of the Couchbase Cluster connected to the desired Couchbase server.",
|
||||
)
|
||||
collection_name: str = Field(
|
||||
description="The name of the Couchbase collection to search",
|
||||
)
|
||||
scope_name: str = Field(
|
||||
description="The name of the Couchbase scope containing the collection to search.",
|
||||
)
|
||||
bucket_name: str = Field(
|
||||
description="The name of the Couchbase bucket to search",
|
||||
)
|
||||
index_name: str = Field(
|
||||
description="The name of the Couchbase index to search",
|
||||
)
|
||||
embedding_key: str | None = Field(
|
||||
default="embedding",
|
||||
description="Name of the field in the search index that stores the vector",
|
||||
)
|
||||
scoped_index: bool | None = (
|
||||
Field(
|
||||
default=True,
|
||||
description="Specify whether the index is scoped. Is True by default.",
|
||||
),
|
||||
scoped_index: bool = Field(
|
||||
default=True,
|
||||
description="Specify whether the index is scoped. Is True by default.",
|
||||
)
|
||||
limit: int | None = Field(default=3)
|
||||
embedding_function: SkipValidation[Callable[[str], list[float]]] = Field(
|
||||
default=None,
|
||||
description="A function that takes a string and returns a list of floats. This is used to embed the query before searching the database.",
|
||||
)
|
||||
|
||||
@@ -112,6 +122,9 @@ class CouchbaseFTSVectorSearchTool(BaseTool):
|
||||
" Please create the index before searching."
|
||||
)
|
||||
else:
|
||||
if not self.cluster:
|
||||
raise ValueError("Cluster instance must be provided")
|
||||
|
||||
all_indexes = [
|
||||
index.name for index in self.cluster.search_indexes().get_all_indexes()
|
||||
]
|
||||
@@ -140,24 +153,6 @@ class CouchbaseFTSVectorSearchTool(BaseTool):
|
||||
super().__init__(**kwargs)
|
||||
if COUCHBASE_AVAILABLE:
|
||||
try:
|
||||
if not self.cluster:
|
||||
raise ValueError("Cluster instance must be provided")
|
||||
|
||||
if not self.bucket_name:
|
||||
raise ValueError("Bucket name must be provided")
|
||||
|
||||
if not self.scope_name:
|
||||
raise ValueError("Scope name must be provided")
|
||||
|
||||
if not self.collection_name:
|
||||
raise ValueError("Collection name must be provided")
|
||||
|
||||
if not self.index_name:
|
||||
raise ValueError("Index name must be provided")
|
||||
|
||||
if not self.embedding_function:
|
||||
raise ValueError("Embedding function must be provided")
|
||||
|
||||
self._bucket = self.cluster.bucket(self.bucket_name)
|
||||
self._scope = self._bucket.scope(self.scope_name)
|
||||
self._collection = self._scope.collection(self.collection_name)
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
"""Crewai Enterprise Tools."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
from crewai_tools.adapters.enterprise_adapter import EnterpriseActionKitToolAdapter
|
||||
from crewai_tools.adapters.tool_collection import ToolCollection
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def CrewaiEnterpriseTools( # noqa: N802
|
||||
enterprise_token: str | None = None,
|
||||
actions_list: list[str] | None = None,
|
||||
enterprise_action_kit_project_id: str | None = None,
|
||||
enterprise_action_kit_project_url: str | None = None,
|
||||
) -> ToolCollection[BaseTool]:
|
||||
"""Factory function that returns crewai enterprise tools.
|
||||
|
||||
Args:
|
||||
enterprise_token: The token for accessing enterprise actions.
|
||||
If not provided, will try to use CREWAI_ENTERPRISE_TOOLS_TOKEN env var.
|
||||
actions_list: Optional list of specific tool names to include.
|
||||
If provided, only tools with these names will be returned.
|
||||
enterprise_action_kit_project_id: Optional ID of the Enterprise Action Kit project.
|
||||
enterprise_action_kit_project_url: Optional URL of the Enterprise Action Kit project.
|
||||
|
||||
Returns:
|
||||
A ToolCollection of BaseTool instances for enterprise actions
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"CrewaiEnterpriseTools will be removed in v1.0.0. Considering use `Agent(apps=[...])` instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if enterprise_token is None or enterprise_token == "":
|
||||
enterprise_token = os.environ.get("CREWAI_ENTERPRISE_TOOLS_TOKEN")
|
||||
if not enterprise_token:
|
||||
logger.warning("No enterprise token provided")
|
||||
|
||||
adapter_kwargs = {"enterprise_action_token": enterprise_token}
|
||||
|
||||
if enterprise_action_kit_project_id is not None:
|
||||
adapter_kwargs["enterprise_action_kit_project_id"] = (
|
||||
enterprise_action_kit_project_id
|
||||
)
|
||||
if enterprise_action_kit_project_url is not None:
|
||||
adapter_kwargs["enterprise_action_kit_project_url"] = (
|
||||
enterprise_action_kit_project_url
|
||||
)
|
||||
|
||||
adapter = EnterpriseActionKitToolAdapter(**adapter_kwargs)
|
||||
all_tools = adapter.tools()
|
||||
parsed_actions_list = _parse_actions_list(actions_list)
|
||||
|
||||
# Filter tools based on the provided list
|
||||
return ToolCollection(all_tools).filter_by_names(parsed_actions_list)
|
||||
|
||||
|
||||
# ENTERPRISE INJECTION ONLY
|
||||
def _parse_actions_list(actions_list: list[str] | None) -> list[str] | None:
|
||||
"""Parse a string representation of a list of tool names to a list of tool names.
|
||||
|
||||
Args:
|
||||
actions_list: A string representation of a list of tool names.
|
||||
|
||||
Returns:
|
||||
A list of tool names.
|
||||
"""
|
||||
if actions_list is not None:
|
||||
return actions_list
|
||||
|
||||
actions_list_from_env = os.environ.get("CREWAI_ENTERPRISE_TOOLS_ACTIONS_LIST")
|
||||
if actions_list_from_env is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
return json.loads(actions_list_from_env)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse actions_list as JSON: {actions_list_from_env}")
|
||||
return None
|
||||
@@ -18,7 +18,7 @@ class CrewaiPlatformToolBuilder:
|
||||
apps: list[str],
|
||||
):
|
||||
self._apps = apps
|
||||
self._actions_schema = {}
|
||||
self._actions_schema = {} # type: ignore[var-annotated]
|
||||
self._tools = None
|
||||
|
||||
def tools(self) -> list[BaseTool]:
|
||||
|
||||
@@ -24,4 +24,4 @@ def CrewaiPlatformTools( # noqa: N802
|
||||
"""
|
||||
builder = CrewaiPlatformToolBuilder(apps=apps)
|
||||
|
||||
return builder.tools()
|
||||
return builder.tools() # type: ignore
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedCSVSearchToolSchema(BaseModel):
|
||||
@@ -38,7 +37,7 @@ class CSVSearchTool(RagTool):
|
||||
def add(self, csv: str) -> None:
|
||||
super().add(csv, data_type=DataType.CSV)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
csv: str | None = None,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import json
|
||||
from typing import Literal
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from openai import OpenAI
|
||||
from openai import Omit, OpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@@ -19,8 +20,22 @@ class DallETool(BaseTool):
|
||||
args_schema: type[BaseModel] = ImagePromptSchema
|
||||
|
||||
model: str = "dall-e-3"
|
||||
size: str = "1024x1024"
|
||||
quality: str = "standard"
|
||||
size: (
|
||||
Literal[
|
||||
"auto",
|
||||
"1024x1024",
|
||||
"1536x1024",
|
||||
"1024x1536",
|
||||
"256x256",
|
||||
"512x512",
|
||||
"1792x1024",
|
||||
"1024x1792",
|
||||
]
|
||||
| None
|
||||
) = "1024x1024"
|
||||
quality: (
|
||||
Literal["standard", "hd", "low", "medium", "high", "auto"] | None | Omit
|
||||
) = "standard"
|
||||
n: int = 1
|
||||
|
||||
env_vars: list[EnvVar] = Field(
|
||||
@@ -49,6 +64,9 @@ class DallETool(BaseTool):
|
||||
n=self.n,
|
||||
)
|
||||
|
||||
if not response or not response.data:
|
||||
return "Failed to generate image."
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"image_url": response.data[0].url,
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, TypeGuard, TypedDict
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
@@ -9,6 +12,28 @@ if TYPE_CHECKING:
|
||||
from databricks.sdk import WorkspaceClient
|
||||
|
||||
|
||||
class ExecutionContext(TypedDict, total=False):
|
||||
catalog: str
|
||||
schema: str
|
||||
|
||||
|
||||
def _has_data_array(result: Any) -> TypeGuard[Any]:
|
||||
"""Type guard to check if result has data_array attribute.
|
||||
|
||||
Args:
|
||||
result: The result object to check.
|
||||
|
||||
Returns:
|
||||
True if result.result.data_array exists and is not None.
|
||||
"""
|
||||
return (
|
||||
hasattr(result, "result")
|
||||
and result.result is not None
|
||||
and hasattr(result.result, "data_array")
|
||||
and result.result.data_array is not None
|
||||
)
|
||||
|
||||
|
||||
class DatabricksQueryToolSchema(BaseModel):
|
||||
"""Input schema for DatabricksQueryTool."""
|
||||
|
||||
@@ -32,7 +57,7 @@ class DatabricksQueryToolSchema(BaseModel):
|
||||
)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_input(self) -> "DatabricksQueryToolSchema":
|
||||
def validate_input(self) -> DatabricksQueryToolSchema:
|
||||
"""Validate the input parameters."""
|
||||
# Ensure the query is not empty
|
||||
if not self.query or not self.query.strip():
|
||||
@@ -72,7 +97,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
default_schema: str | None = None
|
||||
default_warehouse_id: str | None = None
|
||||
|
||||
_workspace_client: Optional["WorkspaceClient"] = None
|
||||
_workspace_client: WorkspaceClient | None = None
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["databricks-sdk"])
|
||||
|
||||
def __init__(
|
||||
@@ -110,7 +135,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
)
|
||||
|
||||
@property
|
||||
def workspace_client(self) -> "WorkspaceClient":
|
||||
def workspace_client(self) -> WorkspaceClient:
|
||||
"""Get or create a Databricks WorkspaceClient instance."""
|
||||
if self._workspace_client is None:
|
||||
try:
|
||||
@@ -209,8 +234,12 @@ class DatabricksQueryTool(BaseTool):
|
||||
db_schema = validated_input.db_schema
|
||||
warehouse_id = validated_input.warehouse_id
|
||||
|
||||
if warehouse_id is None:
|
||||
return "SQL warehouse ID must be provided either as a parameter or as a default."
|
||||
|
||||
# Setup SQL context with catalog/schema if provided
|
||||
context = {}
|
||||
|
||||
context: ExecutionContext = {}
|
||||
if catalog:
|
||||
context["catalog"] = catalog
|
||||
if db_schema:
|
||||
@@ -231,7 +260,6 @@ class DatabricksQueryTool(BaseTool):
|
||||
return f"Error starting query execution: {execute_error!s}"
|
||||
|
||||
# Poll for results with better error handling
|
||||
import time
|
||||
|
||||
result = None
|
||||
timeout = 300 # 5 minutes timeout
|
||||
@@ -239,6 +267,9 @@ class DatabricksQueryTool(BaseTool):
|
||||
poll_count = 0
|
||||
previous_state = None # Track previous state to detect changes
|
||||
|
||||
if statement_id is None:
|
||||
return "Failed to retrieve statement ID after execution."
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
poll_count += 1
|
||||
try:
|
||||
@@ -248,7 +279,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
# Check if finished - be very explicit about state checking
|
||||
if hasattr(result, "status") and hasattr(result.status, "state"):
|
||||
state_value = str(
|
||||
result.status.state
|
||||
result.status.state # type: ignore[union-attr]
|
||||
) # Convert to string to handle both string and enum
|
||||
|
||||
# Track state changes for debugging
|
||||
@@ -265,16 +296,16 @@ class DatabricksQueryTool(BaseTool):
|
||||
# First try direct access to error.message
|
||||
if (
|
||||
hasattr(result.status, "error")
|
||||
and result.status.error
|
||||
and result.status.error # type: ignore[union-attr]
|
||||
):
|
||||
if hasattr(result.status.error, "message"):
|
||||
error_info = result.status.error.message
|
||||
if hasattr(result.status.error, "message"): # type: ignore[union-attr]
|
||||
error_info = result.status.error.message # type: ignore[union-attr,assignment]
|
||||
# Some APIs may have a different structure
|
||||
elif hasattr(result.status.error, "error_message"):
|
||||
error_info = result.status.error.error_message
|
||||
elif hasattr(result.status.error, "error_message"): # type: ignore[union-attr]
|
||||
error_info = result.status.error.error_message # type: ignore[union-attr]
|
||||
# Last resort, try to convert the whole error object to string
|
||||
else:
|
||||
error_info = str(result.status.error)
|
||||
error_info = str(result.status.error) # type: ignore[union-attr]
|
||||
except Exception as err_extract_error:
|
||||
# If all else fails, try to get any info we can
|
||||
error_info = (
|
||||
@@ -302,7 +333,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
return "Query completed but returned an invalid result structure"
|
||||
|
||||
# Convert state to string for comparison
|
||||
state_value = str(result.status.state)
|
||||
state_value = str(result.status.state) # type: ignore[union-attr]
|
||||
if not any(
|
||||
state in state_value for state in ["SUCCEEDED", "FAILED", "CANCELED"]
|
||||
):
|
||||
@@ -323,7 +354,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
if has_schema and has_result:
|
||||
try:
|
||||
# Get schema for column names
|
||||
columns = [col.name for col in result.manifest.schema.columns]
|
||||
columns = [col.name for col in result.manifest.schema.columns] # type: ignore[union-attr]
|
||||
|
||||
# Debug info for schema
|
||||
|
||||
@@ -331,7 +362,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
all_columns = set(columns)
|
||||
|
||||
# Dump the raw structure of result data to help troubleshoot
|
||||
if hasattr(result.result, "data_array"):
|
||||
if _has_data_array(result):
|
||||
# Add defensive check for None data_array
|
||||
if result.result.data_array is None:
|
||||
# Return empty result handling rather than trying to process null data
|
||||
@@ -343,8 +374,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
|
||||
# Only try to analyze sample if data_array exists and has content
|
||||
if (
|
||||
hasattr(result.result, "data_array")
|
||||
and result.result.data_array
|
||||
_has_data_array(result)
|
||||
and len(result.result.data_array) > 0
|
||||
and len(result.result.data_array[0]) > 0
|
||||
):
|
||||
@@ -385,17 +415,17 @@ class DatabricksQueryTool(BaseTool):
|
||||
rows_with_single_item = 0
|
||||
if (
|
||||
hasattr(result.result, "data_array")
|
||||
and result.result.data_array
|
||||
and len(result.result.data_array) > 0
|
||||
and result.result.data_array # type: ignore[union-attr]
|
||||
and len(result.result.data_array) > 0 # type: ignore[union-attr]
|
||||
):
|
||||
sample_size_for_rows = (
|
||||
min(sample_size, len(result.result.data_array[0]))
|
||||
min(sample_size, len(result.result.data_array[0])) # type: ignore[union-attr]
|
||||
if "sample_size" in locals()
|
||||
else min(20, len(result.result.data_array[0]))
|
||||
else min(20, len(result.result.data_array[0])) # type: ignore[union-attr]
|
||||
)
|
||||
rows_with_single_item = sum(
|
||||
1
|
||||
for row in result.result.data_array[0][
|
||||
1 # type: ignore[misc]
|
||||
for row in result.result.data_array[0][ # type: ignore[union-attr]
|
||||
:sample_size_for_rows
|
||||
]
|
||||
if isinstance(row, list) and len(row) == 1
|
||||
@@ -424,13 +454,13 @@ class DatabricksQueryTool(BaseTool):
|
||||
# We're dealing with data where the rows may be incorrectly structured
|
||||
|
||||
# Collect all values into a flat list
|
||||
all_values = []
|
||||
all_values: list[Any] = []
|
||||
if (
|
||||
hasattr(result.result, "data_array")
|
||||
and result.result.data_array
|
||||
and result.result.data_array # type: ignore[union-attr]
|
||||
):
|
||||
# Flatten all values into a single list
|
||||
for chunk in result.result.data_array:
|
||||
for chunk in result.result.data_array: # type: ignore[union-attr]
|
||||
for item in chunk:
|
||||
if isinstance(item, (list, tuple)):
|
||||
all_values.extend(item)
|
||||
@@ -629,7 +659,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
# Fix titles that might still have issues
|
||||
if (
|
||||
isinstance(row.get("Title"), str)
|
||||
and len(row.get("Title")) <= 1
|
||||
and len(row.get("Title")) <= 1 # type: ignore[arg-type]
|
||||
):
|
||||
# This is likely still a fragmented title - mark as potentially incomplete
|
||||
row["Title"] = f"[INCOMPLETE] {row.get('Title')}"
|
||||
@@ -645,11 +675,11 @@ class DatabricksQueryTool(BaseTool):
|
||||
# Check different result structures
|
||||
if (
|
||||
hasattr(result.result, "data_array")
|
||||
and result.result.data_array
|
||||
and result.result.data_array # type: ignore[union-attr]
|
||||
):
|
||||
# Check if data appears to be malformed within chunks
|
||||
for _chunk_idx, chunk in enumerate(
|
||||
result.result.data_array
|
||||
result.result.data_array # type: ignore[union-attr]
|
||||
):
|
||||
# Check if chunk might actually contain individual columns of a single row
|
||||
# This is another way data might be malformed - check the first few values
|
||||
@@ -756,10 +786,10 @@ class DatabricksQueryTool(BaseTool):
|
||||
|
||||
chunk_results.append(row_dict)
|
||||
|
||||
elif hasattr(result.result, "data") and result.result.data:
|
||||
elif hasattr(result.result, "data") and result.result.data: # type: ignore[union-attr]
|
||||
# Alternative data structure
|
||||
|
||||
for _row_idx, row in enumerate(result.result.data):
|
||||
for _row_idx, row in enumerate(result.result.data): # type: ignore[union-attr]
|
||||
# Debug info
|
||||
|
||||
# Safely create dictionary matching column names to values
|
||||
@@ -803,12 +833,12 @@ class DatabricksQueryTool(BaseTool):
|
||||
|
||||
# If we have no results but the query succeeded (e.g., for DDL statements)
|
||||
if not chunk_results and hasattr(result, "status"):
|
||||
state_value = str(result.status.state)
|
||||
state_value = str(result.status.state) # type: ignore[union-attr]
|
||||
if "SUCCEEDED" in state_value:
|
||||
return "Query executed successfully (no results to display)"
|
||||
|
||||
# Format and return results
|
||||
return self._format_results(chunk_results)
|
||||
return self._format_results(chunk_results) # type: ignore[arg-type]
|
||||
|
||||
except Exception as e:
|
||||
# Include more details in the error message to help with debugging
|
||||
|
||||
@@ -35,7 +35,10 @@ class DirectoryReadTool(BaseTool):
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
directory = kwargs.get("directory", self.directory)
|
||||
directory: str | None = kwargs.get("directory", self.directory)
|
||||
if directory is None:
|
||||
raise ValueError("Directory must be provided.")
|
||||
|
||||
if directory[-1] == "/":
|
||||
directory = directory[:-1]
|
||||
files_list = [
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedDirectorySearchToolSchema(BaseModel):
|
||||
@@ -38,7 +37,7 @@ class DirectorySearchTool(RagTool):
|
||||
def add(self, directory: str) -> None:
|
||||
super().add(directory, data_type=DataType.DIRECTORY)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
directory: str | None = None,
|
||||
|
||||
@@ -3,8 +3,7 @@ from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedDOCXSearchToolSchema(BaseModel):
|
||||
@@ -46,7 +45,7 @@ class DOCXSearchTool(RagTool):
|
||||
def add(self, docx: str) -> None:
|
||||
super().add(docx, data_type=DataType.DOCX)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
docx: str | None = None,
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from builtins import type as type_
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
from typing import Any, TypedDict
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import Required
|
||||
|
||||
|
||||
try:
|
||||
from exa_py import Exa
|
||||
class SearchParams(TypedDict, total=False):
|
||||
"""Parameters for Exa search API."""
|
||||
|
||||
EXA_INSTALLED = True
|
||||
except ImportError:
|
||||
Exa = Any
|
||||
EXA_INSTALLED = False
|
||||
type: Required[str | None]
|
||||
start_published_date: str
|
||||
end_published_date: str
|
||||
include_domains: list[str]
|
||||
|
||||
|
||||
class EXABaseToolSchema(BaseModel):
|
||||
@@ -31,8 +35,8 @@ class EXASearchTool(BaseTool):
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
name: str = "EXASearchTool"
|
||||
description: str = "Search the internet using Exa"
|
||||
args_schema: type[BaseModel] = EXABaseToolSchema
|
||||
client: Optional["Exa"] = None
|
||||
args_schema: type_[BaseModel] = EXABaseToolSchema
|
||||
client: Any | None = None
|
||||
content: bool | None = False
|
||||
summary: bool | None = False
|
||||
type: str | None = "auto"
|
||||
@@ -72,7 +76,9 @@ class EXASearchTool(BaseTool):
|
||||
super().__init__(
|
||||
**kwargs,
|
||||
)
|
||||
if not EXA_INSTALLED:
|
||||
try:
|
||||
from exa_py import Exa
|
||||
except ImportError as e:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
@@ -82,11 +88,16 @@ class EXASearchTool(BaseTool):
|
||||
|
||||
subprocess.run(["uv", "add", "exa_py"], check=True) # noqa: S607
|
||||
|
||||
# Re-import after installation
|
||||
from exa_py import Exa
|
||||
else:
|
||||
raise ImportError(
|
||||
"You are missing the 'exa_py' package. Would you like to install it?"
|
||||
)
|
||||
client_kwargs = {"api_key": self.api_key}
|
||||
) from e
|
||||
|
||||
client_kwargs: dict[str, str] = {}
|
||||
if self.api_key:
|
||||
client_kwargs["api_key"] = self.api_key
|
||||
if self.base_url:
|
||||
client_kwargs["base_url"] = self.base_url
|
||||
self.client = Exa(**client_kwargs)
|
||||
@@ -104,7 +115,7 @@ class EXASearchTool(BaseTool):
|
||||
if self.client is None:
|
||||
raise ValueError("Client not initialized")
|
||||
|
||||
search_params = {
|
||||
search_params: SearchParams = {
|
||||
"type": self.type,
|
||||
}
|
||||
|
||||
|
||||
@@ -72,9 +72,9 @@ class FileCompressorTool(BaseTool):
|
||||
"tar.xz": self._compress_tar,
|
||||
}
|
||||
if format == "zip":
|
||||
format_compression[format](input_path, output_path)
|
||||
format_compression[format](input_path, output_path) # type: ignore[operator]
|
||||
else:
|
||||
format_compression[format](input_path, output_path, format)
|
||||
format_compression[format](input_path, output_path, format) # type: ignore[operator]
|
||||
|
||||
return f"Successfully compressed '{input_path}' into '{output_path}'"
|
||||
except FileNotFoundError:
|
||||
@@ -84,7 +84,8 @@ class FileCompressorTool(BaseTool):
|
||||
except Exception as e:
|
||||
return f"An unexpected error occurred during compression: {e!s}"
|
||||
|
||||
def _generate_output_path(self, input_path: str, format: str) -> str:
|
||||
@staticmethod
|
||||
def _generate_output_path(input_path: str, format: str) -> str:
|
||||
"""Generates output path based on input path and format."""
|
||||
if os.path.isfile(input_path):
|
||||
base_name = os.path.splitext(os.path.basename(input_path))[
|
||||
@@ -94,7 +95,8 @@ class FileCompressorTool(BaseTool):
|
||||
base_name = os.path.basename(os.path.normpath(input_path)) # Directory name
|
||||
return os.path.join(os.getcwd(), f"{base_name}.{format}")
|
||||
|
||||
def _prepare_output(self, output_path: str, overwrite: bool) -> bool:
|
||||
@staticmethod
|
||||
def _prepare_output(output_path: str, overwrite: bool) -> bool:
|
||||
"""Ensures output path is ready for writing."""
|
||||
output_dir = os.path.dirname(output_path)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
@@ -103,7 +105,8 @@ class FileCompressorTool(BaseTool):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _compress_zip(self, input_path: str, output_path: str):
|
||||
@staticmethod
|
||||
def _compress_zip(input_path: str, output_path: str):
|
||||
"""Compresses input into a zip archive."""
|
||||
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||
if os.path.isfile(input_path):
|
||||
@@ -115,7 +118,8 @@ class FileCompressorTool(BaseTool):
|
||||
arcname = os.path.relpath(full_path, start=input_path)
|
||||
zipf.write(full_path, arcname)
|
||||
|
||||
def _compress_tar(self, input_path: str, output_path: str, format: str):
|
||||
@staticmethod
|
||||
def _compress_tar(input_path: str, output_path: str, format: str):
|
||||
"""Compresses input into a tar archive with the given format."""
|
||||
format_mode = {
|
||||
"tar": "w",
|
||||
@@ -129,6 +133,6 @@ class FileCompressorTool(BaseTool):
|
||||
|
||||
mode = format_mode[format]
|
||||
|
||||
with tarfile.open(output_path, mode) as tarf:
|
||||
with tarfile.open(output_path, mode) as tarf: # type: ignore[call-overload]
|
||||
arcname = os.path.basename(input_path)
|
||||
tarf.add(input_path, arcname=arcname)
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
|
||||
FIRECRAWL_AVAILABLE = True
|
||||
except ImportError:
|
||||
@@ -59,7 +61,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
},
|
||||
}
|
||||
)
|
||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||
_firecrawl: FirecrawlApp | None = PrivateAttr(None)
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["firecrawl-py"])
|
||||
env_vars: list[EnvVar] = Field(
|
||||
default_factory=lambda: [
|
||||
@@ -114,7 +116,7 @@ try:
|
||||
# Only rebuild if the class hasn't been initialized yet
|
||||
if not hasattr(FirecrawlCrawlWebsiteTool, "_model_rebuilt"):
|
||||
FirecrawlCrawlWebsiteTool.model_rebuild()
|
||||
FirecrawlCrawlWebsiteTool._model_rebuilt = True
|
||||
FirecrawlCrawlWebsiteTool._model_rebuilt = True # type: ignore[attr-defined]
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user