mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-05 22:28:29 +00:00
Compare commits
16 Commits
feat/bump-
...
1.1.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d28daa26cd | ||
|
|
a850813f2b | ||
|
|
5944a39629 | ||
|
|
c594859ed0 | ||
|
|
2ee27efca7 | ||
|
|
f6e13eb890 | ||
|
|
e7b3ce27ca | ||
|
|
dba27cf8b5 | ||
|
|
6469f224f6 | ||
|
|
f3a63be215 | ||
|
|
01d8c189f0 | ||
|
|
cc83c1ead5 | ||
|
|
7578901f6d | ||
|
|
d1343b96ed | ||
|
|
42f2b4d551 | ||
|
|
0229390ad1 |
23
.github/codeql/codeql-config.yml
vendored
23
.github/codeql/codeql-config.yml
vendored
@@ -2,20 +2,27 @@ name: "CodeQL Config"
|
||||
|
||||
paths-ignore:
|
||||
# Ignore template files - these are boilerplate code that shouldn't be analyzed
|
||||
- "src/crewai/cli/templates/**"
|
||||
- "lib/crewai/src/crewai/cli/templates/**"
|
||||
# Ignore test cassettes - these are test fixtures/recordings
|
||||
- "tests/cassettes/**"
|
||||
- "lib/crewai/tests/cassettes/**"
|
||||
- "lib/crewai-tools/tests/cassettes/**"
|
||||
# Ignore cache and build artifacts
|
||||
- ".cache/**"
|
||||
# Ignore documentation build artifacts
|
||||
- "docs/.cache/**"
|
||||
|
||||
# Ignore experimental code
|
||||
- "lib/crewai/src/crewai/experimental/a2a/**"
|
||||
|
||||
paths:
|
||||
# Include all Python source code
|
||||
- "src/**"
|
||||
# Include tests (but exclude cassettes)
|
||||
- "tests/**"
|
||||
# Include all Python source code from workspace packages
|
||||
- "lib/crewai/src/**"
|
||||
- "lib/crewai-tools/src/**"
|
||||
- "lib/devtools/src/**"
|
||||
# Include tests (but exclude cassettes via paths-ignore)
|
||||
- "lib/crewai/tests/**"
|
||||
- "lib/crewai-tools/tests/**"
|
||||
- "lib/devtools/tests/**"
|
||||
|
||||
# Configure specific queries or packs if needed
|
||||
# queries:
|
||||
# - uses: security-and-quality
|
||||
# - uses: security-and-quality
|
||||
|
||||
4
.github/workflows/publish.yml
vendored
4
.github/workflows/publish.yml
vendored
@@ -7,7 +7,6 @@ on:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: github.event.release.prerelease == true
|
||||
name: Build packages
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
@@ -25,7 +24,7 @@ jobs:
|
||||
|
||||
- name: Build packages
|
||||
run: |
|
||||
uv build --prerelease="allow" --all-packages
|
||||
uv build --all-packages
|
||||
rm dist/.gitignore
|
||||
|
||||
- name: Upload artifacts
|
||||
@@ -35,7 +34,6 @@ jobs:
|
||||
path: dist/
|
||||
|
||||
publish:
|
||||
if: github.event.release.prerelease == true
|
||||
name: Publish to PyPI
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -3,19 +3,24 @@ repos:
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff
|
||||
entry: uv run ruff check
|
||||
entry: bash -c 'source .venv/bin/activate && uv run ruff check --config pyproject.toml "$@"' --
|
||||
language: system
|
||||
pass_filenames: true
|
||||
types: [python]
|
||||
exclude: ^lib/crewai/
|
||||
- id: ruff-format
|
||||
name: ruff-format
|
||||
entry: uv run ruff format
|
||||
entry: bash -c 'source .venv/bin/activate && uv run ruff format --config pyproject.toml "$@"' --
|
||||
language: system
|
||||
pass_filenames: true
|
||||
types: [python]
|
||||
exclude: ^lib/crewai/
|
||||
- id: mypy
|
||||
name: mypy
|
||||
entry: uv run mypy
|
||||
entry: bash -c 'source .venv/bin/activate && uv run mypy --config-file pyproject.toml "$@"' --
|
||||
language: system
|
||||
pass_filenames: true
|
||||
types: [python]
|
||||
exclude: ^lib/crewai/
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
rev: 0.9.3
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
|
||||
|
||||
@@ -134,6 +134,7 @@
|
||||
"group": "MCP Integration",
|
||||
"pages": [
|
||||
"en/mcp/overview",
|
||||
"en/mcp/dsl-integration",
|
||||
"en/mcp/stdio",
|
||||
"en/mcp/sse",
|
||||
"en/mcp/streamable-http",
|
||||
@@ -570,6 +571,7 @@
|
||||
"group": "Integração MCP",
|
||||
"pages": [
|
||||
"pt-BR/mcp/overview",
|
||||
"pt-BR/mcp/dsl-integration",
|
||||
"pt-BR/mcp/stdio",
|
||||
"pt-BR/mcp/sse",
|
||||
"pt-BR/mcp/streamable-http",
|
||||
@@ -989,6 +991,7 @@
|
||||
"group": "MCP 통합",
|
||||
"pages": [
|
||||
"ko/mcp/overview",
|
||||
"ko/mcp/dsl-integration",
|
||||
"ko/mcp/stdio",
|
||||
"ko/mcp/sse",
|
||||
"ko/mcp/streamable-http",
|
||||
|
||||
@@ -7,7 +7,7 @@ mode: "wide"
|
||||
|
||||
## Overview
|
||||
|
||||
CrewAI integrates with multiple LLM providers through LiteLLM, giving you the flexibility to choose the right model for your specific use case. This guide will help you understand how to configure and use different LLM providers in your CrewAI projects.
|
||||
CrewAI integrates with multiple LLM providers through providers native sdks, giving you the flexibility to choose the right model for your specific use case. This guide will help you understand how to configure and use different LLM providers in your CrewAI projects.
|
||||
|
||||
|
||||
## What are LLMs?
|
||||
@@ -113,44 +113,104 @@ In this section, you'll find detailed examples that help you select, configure,
|
||||
|
||||
<AccordionGroup>
|
||||
<Accordion title="OpenAI">
|
||||
Set the following environment variables in your `.env` file:
|
||||
CrewAI provides native integration with OpenAI through the OpenAI Python SDK.
|
||||
|
||||
```toml Code
|
||||
# Required
|
||||
OPENAI_API_KEY=sk-...
|
||||
|
||||
# Optional
|
||||
OPENAI_API_BASE=<custom-base-url>
|
||||
OPENAI_ORGANIZATION=<your-org-id>
|
||||
OPENAI_BASE_URL=<custom-base-url>
|
||||
```
|
||||
|
||||
Example usage in your CrewAI project:
|
||||
**Basic Usage:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="openai/gpt-4", # call model by provider/model_name
|
||||
temperature=0.8,
|
||||
max_tokens=150,
|
||||
model="openai/gpt-4o",
|
||||
api_key="your-api-key", # Or set OPENAI_API_KEY
|
||||
temperature=0.7,
|
||||
max_tokens=4000
|
||||
)
|
||||
```
|
||||
|
||||
**Advanced Configuration:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="openai/gpt-4o",
|
||||
api_key="your-api-key",
|
||||
base_url="https://api.openai.com/v1", # Optional custom endpoint
|
||||
organization="org-...", # Optional organization ID
|
||||
project="proj_...", # Optional project ID
|
||||
temperature=0.7,
|
||||
max_tokens=4000,
|
||||
max_completion_tokens=4000, # For newer models
|
||||
top_p=0.9,
|
||||
frequency_penalty=0.1,
|
||||
presence_penalty=0.1,
|
||||
stop=["END"],
|
||||
seed=42
|
||||
seed=42, # For reproducible outputs
|
||||
stream=True, # Enable streaming
|
||||
timeout=60.0, # Request timeout in seconds
|
||||
max_retries=3, # Maximum retry attempts
|
||||
logprobs=True, # Return log probabilities
|
||||
top_logprobs=5, # Number of most likely tokens
|
||||
reasoning_effort="medium" # For o1 models: low, medium, high
|
||||
)
|
||||
```
|
||||
|
||||
OpenAI is one of the leading providers of LLMs with a wide range of models and features.
|
||||
**Structured Outputs:**
|
||||
```python Code
|
||||
from pydantic import BaseModel
|
||||
from crewai import LLM
|
||||
|
||||
class ResponseFormat(BaseModel):
|
||||
name: str
|
||||
age: int
|
||||
summary: str
|
||||
|
||||
llm = LLM(
|
||||
model="openai/gpt-4o",
|
||||
)
|
||||
```
|
||||
|
||||
**Supported Environment Variables:**
|
||||
- `OPENAI_API_KEY`: Your OpenAI API key (required)
|
||||
- `OPENAI_BASE_URL`: Custom base URL for OpenAI API (optional)
|
||||
|
||||
**Features:**
|
||||
- Native function calling support (except o1 models)
|
||||
- Structured outputs with JSON schema
|
||||
- Streaming support for real-time responses
|
||||
- Token usage tracking
|
||||
- Stop sequences support (except o1 models)
|
||||
- Log probabilities for token-level insights
|
||||
- Reasoning effort control for o1 models
|
||||
|
||||
**Supported Models:**
|
||||
|
||||
| Model | Context Window | Best For |
|
||||
|---------------------|------------------|-----------------------------------------------|
|
||||
| GPT-4 | 8,192 tokens | High-accuracy tasks, complex reasoning |
|
||||
| GPT-4 Turbo | 128,000 tokens | Long-form content, document analysis |
|
||||
| GPT-4o & GPT-4o-mini | 128,000 tokens | Cost-effective large context processing |
|
||||
| o3-mini | 200,000 tokens | Fast reasoning, complex reasoning |
|
||||
| o1-mini | 128,000 tokens | Fast reasoning, complex reasoning |
|
||||
| o1-preview | 128,000 tokens | Fast reasoning, complex reasoning |
|
||||
| o1 | 200,000 tokens | Fast reasoning, complex reasoning |
|
||||
| gpt-4.1 | 1M tokens | Latest model with enhanced capabilities |
|
||||
| gpt-4.1-mini | 1M tokens | Efficient version with large context |
|
||||
| gpt-4.1-nano | 1M tokens | Ultra-efficient variant |
|
||||
| gpt-4o | 128,000 tokens | Optimized for speed and intelligence |
|
||||
| gpt-4o-mini | 200,000 tokens | Cost-effective with large context |
|
||||
| gpt-4-turbo | 128,000 tokens | Long-form content, document analysis |
|
||||
| gpt-4 | 8,192 tokens | High-accuracy tasks, complex reasoning |
|
||||
| o1 | 200,000 tokens | Advanced reasoning, complex problem-solving |
|
||||
| o1-preview | 128,000 tokens | Preview of reasoning capabilities |
|
||||
| o1-mini | 128,000 tokens | Efficient reasoning model |
|
||||
| o3-mini | 200,000 tokens | Lightweight reasoning model |
|
||||
| o4-mini | 200,000 tokens | Next-gen efficient reasoning |
|
||||
|
||||
**Note:** To use OpenAI, install the required dependencies:
|
||||
```bash
|
||||
uv add "crewai[openai]"
|
||||
```
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Meta-Llama">
|
||||
@@ -187,69 +247,186 @@ In this section, you'll find detailed examples that help you select, configure,
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Anthropic">
|
||||
CrewAI provides native integration with Anthropic through the Anthropic Python SDK.
|
||||
|
||||
```toml Code
|
||||
# Required
|
||||
ANTHROPIC_API_KEY=sk-ant-...
|
||||
|
||||
# Optional
|
||||
ANTHROPIC_API_BASE=<custom-base-url>
|
||||
```
|
||||
|
||||
Example usage in your CrewAI project:
|
||||
**Basic Usage:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="anthropic/claude-3-sonnet-20240229-v1:0",
|
||||
temperature=0.7
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
api_key="your-api-key", # Or set ANTHROPIC_API_KEY
|
||||
max_tokens=4096 # Required for Anthropic
|
||||
)
|
||||
```
|
||||
|
||||
**Advanced Configuration:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
api_key="your-api-key",
|
||||
base_url="https://api.anthropic.com", # Optional custom endpoint
|
||||
temperature=0.7,
|
||||
max_tokens=4096, # Required parameter
|
||||
top_p=0.9,
|
||||
stop_sequences=["END", "STOP"], # Anthropic uses stop_sequences
|
||||
stream=True, # Enable streaming
|
||||
timeout=60.0, # Request timeout in seconds
|
||||
max_retries=3 # Maximum retry attempts
|
||||
)
|
||||
```
|
||||
|
||||
**Supported Environment Variables:**
|
||||
- `ANTHROPIC_API_KEY`: Your Anthropic API key (required)
|
||||
|
||||
**Features:**
|
||||
- Native tool use support for Claude 3+ models
|
||||
- Streaming support for real-time responses
|
||||
- Automatic system message handling
|
||||
- Stop sequences for controlled output
|
||||
- Token usage tracking
|
||||
- Multi-turn tool use conversations
|
||||
|
||||
**Important Notes:**
|
||||
- `max_tokens` is a **required** parameter for all Anthropic models
|
||||
- Claude uses `stop_sequences` instead of `stop`
|
||||
- System messages are handled separately from conversation messages
|
||||
- First message must be from the user (automatically handled)
|
||||
- Messages must alternate between user and assistant
|
||||
|
||||
**Supported Models:**
|
||||
|
||||
| Model | Context Window | Best For |
|
||||
|------------------------------|----------------|-----------------------------------------------|
|
||||
| claude-3-7-sonnet | 200,000 tokens | Advanced reasoning and agentic tasks |
|
||||
| claude-3-5-sonnet-20241022 | 200,000 tokens | Latest Sonnet with best performance |
|
||||
| claude-3-5-haiku | 200,000 tokens | Fast, compact model for quick responses |
|
||||
| claude-3-opus | 200,000 tokens | Most capable for complex tasks |
|
||||
| claude-3-sonnet | 200,000 tokens | Balanced intelligence and speed |
|
||||
| claude-3-haiku | 200,000 tokens | Fastest for simple tasks |
|
||||
| claude-2.1 | 200,000 tokens | Extended context, reduced hallucinations |
|
||||
| claude-2 | 100,000 tokens | Versatile model for various tasks |
|
||||
| claude-instant | 100,000 tokens | Fast, cost-effective for everyday tasks |
|
||||
|
||||
**Note:** To use Anthropic, install the required dependencies:
|
||||
```bash
|
||||
uv add "crewai[anthropic]"
|
||||
```
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Google (Gemini API)">
|
||||
Set your API key in your `.env` file. If you need a key, or need to find an
|
||||
existing key, check [AI Studio](https://aistudio.google.com/apikey).
|
||||
CrewAI provides native integration with Google Gemini through the Google Gen AI Python SDK.
|
||||
|
||||
Set your API key in your `.env` file. If you need a key, check [AI Studio](https://aistudio.google.com/apikey).
|
||||
|
||||
```toml .env
|
||||
# https://ai.google.dev/gemini-api/docs/api-key
|
||||
# Required (one of the following)
|
||||
GOOGLE_API_KEY=<your-api-key>
|
||||
GEMINI_API_KEY=<your-api-key>
|
||||
|
||||
# Optional - for Vertex AI
|
||||
GOOGLE_CLOUD_PROJECT=<your-project-id>
|
||||
GOOGLE_CLOUD_LOCATION=<location> # Defaults to us-central1
|
||||
GOOGLE_GENAI_USE_VERTEXAI=true # Set to use Vertex AI
|
||||
```
|
||||
|
||||
Example usage in your CrewAI project:
|
||||
**Basic Usage:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="gemini/gemini-2.0-flash",
|
||||
temperature=0.7,
|
||||
api_key="your-api-key", # Or set GOOGLE_API_KEY/GEMINI_API_KEY
|
||||
temperature=0.7
|
||||
)
|
||||
```
|
||||
|
||||
### Gemini models
|
||||
**Advanced Configuration:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="gemini/gemini-2.5-flash",
|
||||
api_key="your-api-key",
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
top_k=40, # Top-k sampling parameter
|
||||
max_output_tokens=8192,
|
||||
stop_sequences=["END", "STOP"],
|
||||
stream=True, # Enable streaming
|
||||
safety_settings={
|
||||
"HARM_CATEGORY_HARASSMENT": "BLOCK_NONE",
|
||||
"HARM_CATEGORY_HATE_SPEECH": "BLOCK_NONE"
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
**Vertex AI Configuration:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="gemini/gemini-1.5-pro",
|
||||
project="your-gcp-project-id",
|
||||
location="us-central1" # GCP region
|
||||
)
|
||||
```
|
||||
|
||||
**Supported Environment Variables:**
|
||||
- `GOOGLE_API_KEY` or `GEMINI_API_KEY`: Your Google API key (required for Gemini API)
|
||||
- `GOOGLE_CLOUD_PROJECT`: Google Cloud project ID (for Vertex AI)
|
||||
- `GOOGLE_CLOUD_LOCATION`: GCP location (defaults to `us-central1`)
|
||||
- `GOOGLE_GENAI_USE_VERTEXAI`: Set to `true` to use Vertex AI
|
||||
|
||||
**Features:**
|
||||
- Native function calling support for Gemini 1.5+ and 2.x models
|
||||
- Streaming support for real-time responses
|
||||
- Multimodal capabilities (text, images, video)
|
||||
- Safety settings configuration
|
||||
- Support for both Gemini API and Vertex AI
|
||||
- Automatic system instruction handling
|
||||
- Token usage tracking
|
||||
|
||||
**Gemini Models:**
|
||||
|
||||
Google offers a range of powerful models optimized for different use cases.
|
||||
|
||||
| Model | Context Window | Best For |
|
||||
|--------------------------------|----------------|-------------------------------------------------------------------|
|
||||
| gemini-2.5-flash-preview-04-17 | 1M tokens | Adaptive thinking, cost efficiency |
|
||||
| gemini-2.5-pro-preview-05-06 | 1M tokens | Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more |
|
||||
| gemini-2.0-flash | 1M tokens | Next generation features, speed, thinking, and realtime streaming |
|
||||
| gemini-2.5-flash | 1M tokens | Adaptive thinking, cost efficiency |
|
||||
| gemini-2.5-pro | 1M tokens | Enhanced thinking and reasoning, multimodal understanding |
|
||||
| gemini-2.0-flash | 1M tokens | Next generation features, speed, thinking |
|
||||
| gemini-2.0-flash-thinking | 32,768 tokens | Advanced reasoning with thinking process |
|
||||
| gemini-2.0-flash-lite | 1M tokens | Cost efficiency and low latency |
|
||||
| gemini-1.5-pro | 2M tokens | Best performing, logical reasoning, coding |
|
||||
| gemini-1.5-flash | 1M tokens | Balanced multimodal model, good for most tasks |
|
||||
| gemini-1.5-flash-8B | 1M tokens | Fastest, most cost-efficient, good for high-frequency tasks |
|
||||
| gemini-1.5-pro | 2M tokens | Best performing, wide variety of reasoning tasks including logical reasoning, coding, and creative collaboration |
|
||||
| gemini-1.5-flash-8b | 1M tokens | Fastest, most cost-efficient |
|
||||
| gemini-1.0-pro | 32,768 tokens | Earlier generation model |
|
||||
|
||||
**Gemma Models:**
|
||||
|
||||
The Gemini API also supports [Gemma models](https://ai.google.dev/gemma/docs) hosted on Google infrastructure.
|
||||
|
||||
| Model | Context Window | Best For |
|
||||
|----------------|----------------|------------------------------------|
|
||||
| gemma-3-1b | 32,000 tokens | Ultra-lightweight tasks |
|
||||
| gemma-3-4b | 128,000 tokens | Efficient general-purpose tasks |
|
||||
| gemma-3-12b | 128,000 tokens | Balanced performance and efficiency|
|
||||
| gemma-3-27b | 128,000 tokens | High-performance tasks |
|
||||
|
||||
**Note:** To use Google Gemini, install the required dependencies:
|
||||
```bash
|
||||
uv add "crewai[google-genai]"
|
||||
```
|
||||
|
||||
The full list of models is available in the [Gemini model docs](https://ai.google.dev/gemini-api/docs/models).
|
||||
|
||||
### Gemma
|
||||
|
||||
The Gemini API also allows you to use your API key to access [Gemma models](https://ai.google.dev/gemma/docs) hosted on Google infrastructure.
|
||||
|
||||
| Model | Context Window |
|
||||
|----------------|----------------|
|
||||
| gemma-3-1b-it | 32k tokens |
|
||||
| gemma-3-4b-it | 32k tokens |
|
||||
| gemma-3-12b-it | 32k tokens |
|
||||
| gemma-3-27b-it | 128k tokens |
|
||||
|
||||
</Accordion>
|
||||
<Accordion title="Google (Vertex AI)">
|
||||
Get credentials from your Google Cloud Console and save it to a JSON file, then load it with the following code:
|
||||
@@ -291,43 +468,146 @@ In this section, you'll find detailed examples that help you select, configure,
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Azure">
|
||||
CrewAI provides native integration with Azure AI Inference and Azure OpenAI through the Azure AI Inference Python SDK.
|
||||
|
||||
```toml Code
|
||||
# Required
|
||||
AZURE_API_KEY=<your-api-key>
|
||||
AZURE_API_BASE=<your-resource-url>
|
||||
AZURE_API_VERSION=<api-version>
|
||||
AZURE_ENDPOINT=<your-endpoint-url>
|
||||
|
||||
# Optional
|
||||
AZURE_AD_TOKEN=<your-azure-ad-token>
|
||||
AZURE_API_TYPE=<your-azure-api-type>
|
||||
AZURE_API_VERSION=<api-version> # Defaults to 2024-06-01
|
||||
```
|
||||
|
||||
Example usage in your CrewAI project:
|
||||
**Endpoint URL Formats:**
|
||||
|
||||
For Azure OpenAI deployments:
|
||||
```
|
||||
https://<resource-name>.openai.azure.com/openai/deployments/<deployment-name>
|
||||
```
|
||||
|
||||
For Azure AI Inference endpoints:
|
||||
```
|
||||
https://<resource-name>.inference.azure.com
|
||||
```
|
||||
|
||||
**Basic Usage:**
|
||||
```python Code
|
||||
llm = LLM(
|
||||
model="azure/gpt-4",
|
||||
api_version="2023-05-15"
|
||||
api_key="<your-api-key>", # Or set AZURE_API_KEY
|
||||
endpoint="<your-endpoint-url>",
|
||||
api_version="2024-06-01"
|
||||
)
|
||||
```
|
||||
|
||||
**Advanced Configuration:**
|
||||
```python Code
|
||||
llm = LLM(
|
||||
model="azure/gpt-4o",
|
||||
temperature=0.7,
|
||||
max_tokens=4000,
|
||||
top_p=0.9,
|
||||
frequency_penalty=0.0,
|
||||
presence_penalty=0.0,
|
||||
stop=["END"],
|
||||
stream=True,
|
||||
timeout=60.0,
|
||||
max_retries=3
|
||||
)
|
||||
```
|
||||
|
||||
**Supported Environment Variables:**
|
||||
- `AZURE_API_KEY`: Your Azure API key (required)
|
||||
- `AZURE_ENDPOINT`: Your Azure endpoint URL (required, also checks `AZURE_OPENAI_ENDPOINT` and `AZURE_API_BASE`)
|
||||
- `AZURE_API_VERSION`: API version (optional, defaults to `2024-06-01`)
|
||||
|
||||
**Features:**
|
||||
- Native function calling support for Azure OpenAI models (gpt-4, gpt-4o, gpt-3.5-turbo, etc.)
|
||||
- Streaming support for real-time responses
|
||||
- Automatic endpoint URL validation and correction
|
||||
- Comprehensive error handling with retry logic
|
||||
- Token usage tracking
|
||||
|
||||
**Note:** To use Azure AI Inference, install the required dependencies:
|
||||
```bash
|
||||
uv add "crewai[azure-ai-inference]"
|
||||
```
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="AWS Bedrock">
|
||||
CrewAI provides native integration with AWS Bedrock through the boto3 SDK using the Converse API.
|
||||
|
||||
```toml Code
|
||||
# Required
|
||||
AWS_ACCESS_KEY_ID=<your-access-key>
|
||||
AWS_SECRET_ACCESS_KEY=<your-secret-key>
|
||||
AWS_DEFAULT_REGION=<your-region>
|
||||
|
||||
# Optional
|
||||
AWS_SESSION_TOKEN=<your-session-token> # For temporary credentials
|
||||
AWS_DEFAULT_REGION=<your-region> # Defaults to us-east-1
|
||||
```
|
||||
|
||||
Example usage in your CrewAI project:
|
||||
**Basic Usage:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
llm = LLM(
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
region_name="us-east-1"
|
||||
)
|
||||
```
|
||||
|
||||
Before using Amazon Bedrock, make sure you have boto3 installed in your environment
|
||||
**Advanced Configuration:**
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
|
||||
[Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html) is a managed service that provides access to multiple foundation models from top AI companies through a unified API, enabling secure and responsible AI application development.
|
||||
llm = LLM(
|
||||
model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
aws_access_key_id="your-access-key", # Or set AWS_ACCESS_KEY_ID
|
||||
aws_secret_access_key="your-secret-key", # Or set AWS_SECRET_ACCESS_KEY
|
||||
aws_session_token="your-session-token", # For temporary credentials
|
||||
region_name="us-east-1",
|
||||
temperature=0.7,
|
||||
max_tokens=4096,
|
||||
top_p=0.9,
|
||||
top_k=250, # For Claude models
|
||||
stop_sequences=["END", "STOP"],
|
||||
stream=True, # Enable streaming
|
||||
guardrail_config={ # Optional content filtering
|
||||
"guardrailIdentifier": "your-guardrail-id",
|
||||
"guardrailVersion": "1"
|
||||
},
|
||||
additional_model_request_fields={ # Model-specific parameters
|
||||
"top_k": 250
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
**Supported Environment Variables:**
|
||||
- `AWS_ACCESS_KEY_ID`: AWS access key (required)
|
||||
- `AWS_SECRET_ACCESS_KEY`: AWS secret key (required)
|
||||
- `AWS_SESSION_TOKEN`: AWS session token for temporary credentials (optional)
|
||||
- `AWS_DEFAULT_REGION`: AWS region (defaults to `us-east-1`)
|
||||
|
||||
**Features:**
|
||||
- Native tool calling support via Converse API
|
||||
- Streaming and non-streaming responses
|
||||
- Comprehensive error handling with retry logic
|
||||
- Guardrail configuration for content filtering
|
||||
- Model-specific parameters via `additional_model_request_fields`
|
||||
- Token usage tracking and stop reason logging
|
||||
- Support for all Bedrock foundation models
|
||||
- Automatic conversation format handling
|
||||
|
||||
**Important Notes:**
|
||||
- Uses the modern Converse API for unified model access
|
||||
- Automatic handling of model-specific conversation requirements
|
||||
- System messages are handled separately from conversation
|
||||
- First message must be from user (automatically handled)
|
||||
- Some models (like Cohere) require conversation to end with user message
|
||||
|
||||
[Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html) is a managed service that provides access to multiple foundation models from top AI companies through a unified API.
|
||||
|
||||
| Model | Context Window | Best For |
|
||||
|-------------------------|----------------------|-------------------------------------------------------------------|
|
||||
@@ -357,7 +637,12 @@ In this section, you'll find detailed examples that help you select, configure,
|
||||
| Jamba-Instruct | Up to 256k tokens | Model with extended context window optimized for cost-effective text generation, summarization, and Q&A. |
|
||||
| Mistral 7B Instruct | Up to 32k tokens | This LLM follows instructions, completes requests, and generates creative text. |
|
||||
| Mistral 8x7B Instruct | Up to 32k tokens | An MOE LLM that follows instructions, completes requests, and generates creative text. |
|
||||
| DeepSeek R1 | 32,768 tokens | Advanced reasoning model |
|
||||
|
||||
**Note:** To use AWS Bedrock, install the required dependencies:
|
||||
```bash
|
||||
uv add "crewai[bedrock]"
|
||||
```
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Amazon SageMaker">
|
||||
@@ -899,7 +1184,7 @@ Learn how to get the most out of your LLM configuration:
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Drop Additional Parameters">
|
||||
CrewAI internally uses Litellm for LLM calls, which allows you to drop additional parameters that are not needed for your specific use case. This can help simplify your code and reduce the complexity of your LLM configuration.
|
||||
CrewAI internally uses native sdks for LLM calls, which allows you to drop additional parameters that are not needed for your specific use case. This can help simplify your code and reduce the complexity of your LLM configuration.
|
||||
For example, if you don't need to send the <code>stop</code> parameter, you can simply omit it from your LLM call:
|
||||
|
||||
```python
|
||||
|
||||
344
docs/en/mcp/dsl-integration.mdx
Normal file
344
docs/en/mcp/dsl-integration.mdx
Normal file
@@ -0,0 +1,344 @@
|
||||
---
|
||||
title: MCP DSL Integration
|
||||
description: Learn how to use CrewAI's simple DSL syntax to integrate MCP servers directly with your agents using the mcps field.
|
||||
icon: code
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
CrewAI's MCP DSL (Domain Specific Language) integration provides the **simplest way** to connect your agents to MCP (Model Context Protocol) servers. Just add an `mcps` field to your agent and CrewAI handles all the complexity automatically.
|
||||
|
||||
<Info>
|
||||
This is the **recommended approach** for most MCP use cases. For advanced scenarios requiring manual connection management, see [MCPServerAdapter](/en/mcp/overview#advanced-mcpserveradapter).
|
||||
</Info>
|
||||
|
||||
## Basic Usage
|
||||
|
||||
Add MCP servers to your agent using the `mcps` field:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="Research Assistant",
|
||||
goal="Help with research and analysis tasks",
|
||||
backstory="Expert assistant with access to advanced research tools",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=research"
|
||||
]
|
||||
)
|
||||
|
||||
# MCP tools are now automatically available!
|
||||
# No need for manual connection management or tool configuration
|
||||
```
|
||||
|
||||
## Supported Reference Formats
|
||||
|
||||
### External MCP Remote Servers
|
||||
|
||||
```python
|
||||
# Basic HTTPS server
|
||||
"https://api.example.com/mcp"
|
||||
|
||||
# Server with authentication
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=your_profile"
|
||||
|
||||
# Server with custom path
|
||||
"https://services.company.com/api/v1/mcp"
|
||||
```
|
||||
|
||||
### Specific Tool Selection
|
||||
|
||||
Use the `#` syntax to select specific tools from a server:
|
||||
|
||||
```python
|
||||
# Get only the forecast tool from weather server
|
||||
"https://weather.api.com/mcp#get_forecast"
|
||||
|
||||
# Get only the search tool from Exa
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key#web_search_exa"
|
||||
```
|
||||
|
||||
### CrewAI AMP Marketplace
|
||||
|
||||
Access tools from the CrewAI AMP marketplace:
|
||||
|
||||
```python
|
||||
# Full service with all tools
|
||||
"crewai-amp:financial-data"
|
||||
|
||||
# Specific tool from AMP service
|
||||
"crewai-amp:research-tools#pubmed_search"
|
||||
|
||||
# Multiple AMP services
|
||||
mcps=[
|
||||
"crewai-amp:weather-insights",
|
||||
"crewai-amp:market-analysis",
|
||||
"crewai-amp:social-media-monitoring"
|
||||
]
|
||||
```
|
||||
|
||||
## Complete Example
|
||||
|
||||
Here's a complete example using multiple MCP servers:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, Process
|
||||
|
||||
# Create agent with multiple MCP sources
|
||||
multi_source_agent = Agent(
|
||||
role="Multi-Source Research Analyst",
|
||||
goal="Conduct comprehensive research using multiple data sources",
|
||||
backstory="""Expert researcher with access to web search, weather data,
|
||||
financial information, and academic research tools""",
|
||||
mcps=[
|
||||
# External MCP servers
|
||||
"https://mcp.exa.ai/mcp?api_key=your_exa_key&profile=research",
|
||||
"https://weather.api.com/mcp#get_current_conditions",
|
||||
|
||||
# CrewAI AMP marketplace
|
||||
"crewai-amp:financial-insights",
|
||||
"crewai-amp:academic-research#pubmed_search",
|
||||
"crewai-amp:market-intelligence#competitor_analysis"
|
||||
]
|
||||
)
|
||||
|
||||
# Create comprehensive research task
|
||||
research_task = Task(
|
||||
description="""Research the impact of AI agents on business productivity.
|
||||
Include current weather impacts on remote work, financial market trends,
|
||||
and recent academic publications on AI agent frameworks.""",
|
||||
expected_output="""Comprehensive report covering:
|
||||
1. AI agent business impact analysis
|
||||
2. Weather considerations for remote work
|
||||
3. Financial market trends related to AI
|
||||
4. Academic research citations and insights
|
||||
5. Competitive landscape analysis""",
|
||||
agent=multi_source_agent
|
||||
)
|
||||
|
||||
# Create and execute crew
|
||||
research_crew = Crew(
|
||||
agents=[multi_source_agent],
|
||||
tasks=[research_task],
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
result = research_crew.kickoff()
|
||||
print(f"Research completed with {len(multi_source_agent.mcps)} MCP data sources")
|
||||
```
|
||||
|
||||
## Tool Naming and Organization
|
||||
|
||||
CrewAI automatically handles tool naming to prevent conflicts:
|
||||
|
||||
```python
|
||||
# Original MCP server has tools: "search", "analyze"
|
||||
# CrewAI creates tools: "mcp_exa_ai_search", "mcp_exa_ai_analyze"
|
||||
|
||||
agent = Agent(
|
||||
role="Tool Organization Demo",
|
||||
goal="Show how tool naming works",
|
||||
backstory="Demonstrates automatic tool organization",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=key", # Tools: mcp_exa_ai_*
|
||||
"https://weather.service.com/mcp", # Tools: weather_service_com_*
|
||||
"crewai-amp:financial-data" # Tools: financial_data_*
|
||||
]
|
||||
)
|
||||
|
||||
# Each server's tools get unique prefixes based on the server name
|
||||
# This prevents naming conflicts between different MCP servers
|
||||
```
|
||||
|
||||
## Error Handling and Resilience
|
||||
|
||||
The MCP DSL is designed to be robust and user-friendly:
|
||||
|
||||
### Graceful Server Failures
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
role="Resilient Researcher",
|
||||
goal="Research despite server issues",
|
||||
backstory="Experienced researcher who adapts to available tools",
|
||||
mcps=[
|
||||
"https://primary-server.com/mcp", # Primary data source
|
||||
"https://backup-server.com/mcp", # Backup if primary fails
|
||||
"https://unreachable-server.com/mcp", # Will be skipped with warning
|
||||
"crewai-amp:reliable-service" # Reliable AMP service
|
||||
]
|
||||
)
|
||||
|
||||
# Agent will:
|
||||
# 1. Successfully connect to working servers
|
||||
# 2. Log warnings for failing servers
|
||||
# 3. Continue with available tools
|
||||
# 4. Not crash or hang on server failures
|
||||
```
|
||||
|
||||
### Timeout Protection
|
||||
|
||||
All MCP operations have built-in timeouts:
|
||||
|
||||
- **Connection timeout**: 10 seconds
|
||||
- **Tool execution timeout**: 30 seconds
|
||||
- **Discovery timeout**: 15 seconds
|
||||
|
||||
```python
|
||||
# These servers will timeout gracefully if unresponsive
|
||||
mcps=[
|
||||
"https://slow-server.com/mcp", # Will timeout after 10s if unresponsive
|
||||
"https://overloaded-api.com/mcp" # Will timeout if discovery takes > 15s
|
||||
]
|
||||
```
|
||||
|
||||
## Performance Features
|
||||
|
||||
### Automatic Caching
|
||||
|
||||
Tool schemas are cached for 5 minutes to improve performance:
|
||||
|
||||
```python
|
||||
# First agent creation - discovers tools from server
|
||||
agent1 = Agent(role="First", goal="Test", backstory="Test",
|
||||
mcps=["https://api.example.com/mcp"])
|
||||
|
||||
# Second agent creation (within 5 minutes) - uses cached tool schemas
|
||||
agent2 = Agent(role="Second", goal="Test", backstory="Test",
|
||||
mcps=["https://api.example.com/mcp"]) # Much faster!
|
||||
```
|
||||
|
||||
### On-Demand Connections
|
||||
|
||||
Tool connections are established only when tools are actually used:
|
||||
|
||||
```python
|
||||
# Agent creation is fast - no MCP connections made yet
|
||||
agent = Agent(
|
||||
role="On-Demand Agent",
|
||||
goal="Use tools efficiently",
|
||||
backstory="Efficient agent that connects only when needed",
|
||||
mcps=["https://api.example.com/mcp"]
|
||||
)
|
||||
|
||||
# MCP connection is made only when a tool is actually executed
|
||||
# This minimizes connection overhead and improves startup performance
|
||||
```
|
||||
|
||||
## Integration with Existing Features
|
||||
|
||||
MCP tools work seamlessly with other CrewAI features:
|
||||
|
||||
```python
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
class CustomTool(BaseTool):
|
||||
name: str = "custom_analysis"
|
||||
description: str = "Custom analysis tool"
|
||||
|
||||
def _run(self, **kwargs):
|
||||
return "Custom analysis result"
|
||||
|
||||
agent = Agent(
|
||||
role="Full-Featured Agent",
|
||||
goal="Use all available tool types",
|
||||
backstory="Agent with comprehensive tool access",
|
||||
|
||||
# All tool types work together
|
||||
tools=[CustomTool()], # Custom tools
|
||||
apps=["gmail", "slack"], # Platform integrations
|
||||
mcps=[ # MCP servers
|
||||
"https://mcp.exa.ai/mcp?api_key=key",
|
||||
"crewai-amp:research-tools"
|
||||
],
|
||||
|
||||
verbose=True,
|
||||
max_iter=15
|
||||
)
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Use Specific Tools When Possible
|
||||
|
||||
```python
|
||||
# Good - only get the tools you need
|
||||
mcps=["https://weather.api.com/mcp#get_forecast"]
|
||||
|
||||
# Less efficient - gets all tools from server
|
||||
mcps=["https://weather.api.com/mcp"]
|
||||
```
|
||||
|
||||
### 2. Handle Authentication Securely
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
# Store API keys in environment variables
|
||||
exa_key = os.getenv("EXA_API_KEY")
|
||||
exa_profile = os.getenv("EXA_PROFILE")
|
||||
|
||||
agent = Agent(
|
||||
role="Secure Agent",
|
||||
goal="Use MCP tools securely",
|
||||
backstory="Security-conscious agent",
|
||||
mcps=[f"https://mcp.exa.ai/mcp?api_key={exa_key}&profile={exa_profile}"]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Plan for Server Failures
|
||||
|
||||
```python
|
||||
# Always include backup options
|
||||
mcps=[
|
||||
"https://primary-api.com/mcp", # Primary choice
|
||||
"https://backup-api.com/mcp", # Backup option
|
||||
"crewai-amp:reliable-service" # AMP fallback
|
||||
]
|
||||
```
|
||||
|
||||
### 4. Use Descriptive Agent Roles
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
role="Weather-Enhanced Market Analyst",
|
||||
goal="Analyze markets considering weather impacts",
|
||||
backstory="Financial analyst with access to weather data for agricultural market insights",
|
||||
mcps=[
|
||||
"https://weather.service.com/mcp#get_forecast",
|
||||
"crewai-amp:financial-data#stock_analysis"
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**No tools discovered:**
|
||||
```python
|
||||
# Check your MCP server URL and authentication
|
||||
# Verify the server is running and accessible
|
||||
mcps=["https://mcp.example.com/mcp?api_key=valid_key"]
|
||||
```
|
||||
|
||||
**Connection timeouts:**
|
||||
```python
|
||||
# Server may be slow or overloaded
|
||||
# CrewAI will log warnings and continue with other servers
|
||||
# Check server status or try backup servers
|
||||
```
|
||||
|
||||
**Authentication failures:**
|
||||
```python
|
||||
# Verify API keys and credentials
|
||||
# Check server documentation for required parameters
|
||||
# Ensure query parameters are properly URL encoded
|
||||
```
|
||||
|
||||
## Advanced: MCPServerAdapter
|
||||
|
||||
For complex scenarios requiring manual connection management, use the `MCPServerAdapter` class from `crewai-tools`. Using a Python context manager (`with` statement) is the recommended approach as it automatically handles starting and stopping the connection to the MCP server.
|
||||
@@ -8,14 +8,39 @@ mode: "wide"
|
||||
## Overview
|
||||
|
||||
The [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP) provides a standardized way for AI agents to provide context to LLMs by communicating with external services, known as MCP Servers.
|
||||
The `crewai-tools` library extends CrewAI's capabilities by allowing you to seamlessly integrate tools from these MCP servers into your agents.
|
||||
This gives your crews access to a vast ecosystem of functionalities.
|
||||
|
||||
CrewAI offers **two approaches** for MCP integration:
|
||||
|
||||
### Simple DSL Integration** (Recommended)
|
||||
|
||||
Use the `mcps` field directly on agents for seamless MCP tool integration:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="Research Analyst",
|
||||
goal="Research and analyze information",
|
||||
backstory="Expert researcher with access to external tools",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key", # External MCP server
|
||||
"https://api.weather.com/mcp#get_forecast", # Specific tool from server
|
||||
"crewai-amp:financial-data", # CrewAI AMP marketplace
|
||||
"crewai-amp:research-tools#pubmed_search" # Specific AMP tool
|
||||
]
|
||||
)
|
||||
# MCP tools are now automatically available to your agent!
|
||||
```
|
||||
|
||||
### 🔧 **Advanced: MCPServerAdapter** (For Complex Scenarios)
|
||||
|
||||
For advanced use cases requiring manual connection management, the `crewai-tools` library provides the `MCPServerAdapter` class.
|
||||
|
||||
We currently support the following transport mechanisms:
|
||||
|
||||
- **Stdio**: for local servers (communication via standard input/output between processes on the same machine)
|
||||
- **Server-Sent Events (SSE)**: for remote servers (unidirectional, real-time data streaming from server to client over HTTP)
|
||||
- **Streamable HTTP**: for remote servers (flexible, potentially bi-directional communication over HTTP, often utilizing SSE for server-to-client streams)
|
||||
- **Streamable HTTPS**: for remote servers (flexible, potentially bi-directional communication over HTTPS, often utilizing SSE for server-to-client streams)
|
||||
|
||||
## Video Tutorial
|
||||
Watch this video tutorial for a comprehensive guide on MCP integration with CrewAI:
|
||||
@@ -31,17 +56,125 @@ Watch this video tutorial for a comprehensive guide on MCP integration with Crew
|
||||
|
||||
## Installation
|
||||
|
||||
Before you start using MCP with `crewai-tools`, you need to install the `mcp` extra `crewai-tools` dependency with the following command:
|
||||
CrewAI MCP integration requires the `mcp` library:
|
||||
|
||||
```shell
|
||||
# For Simple DSL Integration (Recommended)
|
||||
uv add mcp
|
||||
|
||||
# For Advanced MCPServerAdapter usage
|
||||
uv pip install 'crewai-tools[mcp]'
|
||||
```
|
||||
|
||||
## Key Concepts & Getting Started
|
||||
## Quick Start: Simple DSL Integration
|
||||
|
||||
The `MCPServerAdapter` class from `crewai-tools` is the primary way to connect to an MCP server and make its tools available to your CrewAI agents. It supports different transport mechanisms and simplifies connection management.
|
||||
The easiest way to integrate MCP servers is using the `mcps` field on your agents:
|
||||
|
||||
Using a Python context manager (`with` statement) is the **recommended approach** for `MCPServerAdapter`. It automatically handles starting and stopping the connection to the MCP server.
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
# Create agent with MCP tools
|
||||
research_agent = Agent(
|
||||
role="Research Analyst",
|
||||
goal="Find and analyze information using advanced search tools",
|
||||
backstory="Expert researcher with access to multiple data sources",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=your_profile",
|
||||
"crewai-amp:weather-service#current_conditions"
|
||||
]
|
||||
)
|
||||
|
||||
# Create task
|
||||
research_task = Task(
|
||||
description="Research the latest developments in AI agent frameworks",
|
||||
expected_output="Comprehensive research report with citations",
|
||||
agent=research_agent
|
||||
)
|
||||
|
||||
# Create and run crew
|
||||
crew = Crew(agents=[research_agent], tasks=[research_task])
|
||||
result = crew.kickoff()
|
||||
```
|
||||
|
||||
That's it! The MCP tools are automatically discovered and available to your agent.
|
||||
|
||||
## MCP Reference Formats
|
||||
|
||||
The `mcps` field supports various reference formats for maximum flexibility:
|
||||
|
||||
### External MCP Servers
|
||||
|
||||
```python
|
||||
mcps=[
|
||||
# Full server - get all available tools
|
||||
"https://mcp.example.com/api",
|
||||
|
||||
# Specific tool from server using # syntax
|
||||
"https://api.weather.com/mcp#get_current_weather",
|
||||
|
||||
# Server with authentication parameters
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=your_profile"
|
||||
]
|
||||
```
|
||||
|
||||
### CrewAI AMP Marketplace
|
||||
|
||||
```python
|
||||
mcps=[
|
||||
# Full AMP MCP service - get all available tools
|
||||
"crewai-amp:financial-data",
|
||||
|
||||
# Specific tool from AMP service using # syntax
|
||||
"crewai-amp:research-tools#pubmed_search",
|
||||
|
||||
# Multiple AMP services
|
||||
"crewai-amp:weather-service",
|
||||
"crewai-amp:market-analysis"
|
||||
]
|
||||
```
|
||||
|
||||
### Mixed References
|
||||
|
||||
```python
|
||||
mcps=[
|
||||
"https://external-api.com/mcp", # External server
|
||||
"https://weather.service.com/mcp#forecast", # Specific external tool
|
||||
"crewai-amp:financial-insights", # AMP service
|
||||
"crewai-amp:data-analysis#sentiment_tool" # Specific AMP tool
|
||||
]
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
- 🔄 **Automatic Tool Discovery**: Tools are automatically discovered and integrated
|
||||
- 🏷️ **Name Collision Prevention**: Server names are prefixed to tool names
|
||||
- ⚡ **Performance Optimized**: On-demand connections with schema caching
|
||||
- 🛡️ **Error Resilience**: Graceful handling of unavailable servers
|
||||
- ⏱️ **Timeout Protection**: Built-in timeouts prevent hanging connections
|
||||
- 📊 **Transparent Integration**: Works seamlessly with existing CrewAI features
|
||||
|
||||
## Error Handling
|
||||
|
||||
The MCP DSL integration is designed to be resilient:
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
role="Resilient Agent",
|
||||
goal="Continue working despite server issues",
|
||||
backstory="Agent that handles failures gracefully",
|
||||
mcps=[
|
||||
"https://reliable-server.com/mcp", # Will work
|
||||
"https://unreachable-server.com/mcp", # Will be skipped gracefully
|
||||
"https://slow-server.com/mcp", # Will timeout gracefully
|
||||
"crewai-amp:working-service" # Will work
|
||||
]
|
||||
)
|
||||
# Agent will use tools from working servers and log warnings for failing ones
|
||||
```
|
||||
|
||||
## Advanced: MCPServerAdapter
|
||||
|
||||
For complex scenarios requiring manual connection management, use the `MCPServerAdapter` class from `crewai-tools`. Using a Python context manager (`with` statement) is the recommended approach as it automatically handles starting and stopping the connection to the MCP server.
|
||||
|
||||
## Connection Configuration
|
||||
|
||||
@@ -241,11 +374,19 @@ class CrewWithCustomTimeout:
|
||||
## Explore MCP Integrations
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card
|
||||
title="Simple DSL Integration"
|
||||
icon="code"
|
||||
href="/en/mcp/dsl-integration"
|
||||
color="#3B82F6"
|
||||
>
|
||||
**Recommended**: Use the simple `mcps=[]` field syntax for effortless MCP integration.
|
||||
</Card>
|
||||
<Card
|
||||
title="Stdio Transport"
|
||||
icon="server"
|
||||
href="/en/mcp/stdio"
|
||||
color="#3B82F6"
|
||||
color="#10B981"
|
||||
>
|
||||
Connect to local MCP servers via standard input/output. Ideal for scripts and local executables.
|
||||
</Card>
|
||||
@@ -253,7 +394,7 @@ class CrewWithCustomTimeout:
|
||||
title="SSE Transport"
|
||||
icon="wifi"
|
||||
href="/en/mcp/sse"
|
||||
color="#10B981"
|
||||
color="#F59E0B"
|
||||
>
|
||||
Integrate with remote MCP servers using Server-Sent Events for real-time data streaming.
|
||||
</Card>
|
||||
@@ -261,7 +402,7 @@ class CrewWithCustomTimeout:
|
||||
title="Streamable HTTP Transport"
|
||||
icon="globe"
|
||||
href="/en/mcp/streamable-http"
|
||||
color="#F59E0B"
|
||||
color="#8B5CF6"
|
||||
>
|
||||
Utilize flexible Streamable HTTP for robust communication with remote MCP servers.
|
||||
</Card>
|
||||
@@ -269,7 +410,7 @@ class CrewWithCustomTimeout:
|
||||
title="Connecting to Multiple Servers"
|
||||
icon="layer-group"
|
||||
href="/en/mcp/multiple-servers"
|
||||
color="#8B5CF6"
|
||||
color="#EF4444"
|
||||
>
|
||||
Aggregate tools from several MCP servers simultaneously using a single adapter.
|
||||
</Card>
|
||||
@@ -277,7 +418,7 @@ class CrewWithCustomTimeout:
|
||||
title="Security Considerations"
|
||||
icon="lock"
|
||||
href="/en/mcp/security"
|
||||
color="#EF4444"
|
||||
color="#DC2626"
|
||||
>
|
||||
Review important security best practices for MCP integration to keep your agents safe.
|
||||
</Card>
|
||||
|
||||
@@ -11,7 +11,7 @@ mode: "wide"
|
||||
<Card
|
||||
title="Bedrock Invoke Agent Tool"
|
||||
icon="cloud"
|
||||
href="/en/tools/tool-integrations/bedrockinvokeagenttool"
|
||||
href="/en/tools/integration/bedrockinvokeagenttool"
|
||||
color="#0891B2"
|
||||
>
|
||||
Invoke Amazon Bedrock Agents from CrewAI to orchestrate actions across AWS services.
|
||||
@@ -20,7 +20,7 @@ mode: "wide"
|
||||
<Card
|
||||
title="CrewAI Automation Tool"
|
||||
icon="bolt"
|
||||
href="/en/tools/tool-integrations/crewaiautomationtool"
|
||||
href="/en/tools/integration/crewaiautomationtool"
|
||||
color="#7C3AED"
|
||||
>
|
||||
Automate deployment and operations by integrating CrewAI with external platforms and workflows.
|
||||
|
||||
232
docs/ko/mcp/dsl-integration.mdx
Normal file
232
docs/ko/mcp/dsl-integration.mdx
Normal file
@@ -0,0 +1,232 @@
|
||||
---
|
||||
title: MCP DSL 통합
|
||||
description: CrewAI의 간단한 DSL 구문을 사용하여 mcps 필드로 MCP 서버를 에이전트와 직접 통합하는 방법을 알아보세요.
|
||||
icon: code
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
## 개요
|
||||
|
||||
CrewAI의 MCP DSL(Domain Specific Language) 통합은 에이전트를 MCP(Model Context Protocol) 서버에 연결하는 **가장 간단한 방법**을 제공합니다. 에이전트에 `mcps` 필드만 추가하면 CrewAI가 모든 복잡성을 자동으로 처리합니다.
|
||||
|
||||
<Info>
|
||||
이는 대부분의 MCP 사용 사례에 **권장되는 접근 방식**입니다. 수동 연결 관리가 필요한 고급 시나리오의 경우 [MCPServerAdapter](/ko/mcp/overview#advanced-mcpserveradapter)를 참조하세요.
|
||||
</Info>
|
||||
|
||||
## 기본 사용법
|
||||
|
||||
`mcps` 필드를 사용하여 에이전트에 MCP 서버를 추가하세요:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="연구 보조원",
|
||||
goal="연구 및 분석 업무 지원",
|
||||
backstory="고급 연구 도구에 접근할 수 있는 전문가 보조원",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=research"
|
||||
]
|
||||
)
|
||||
|
||||
# MCP 도구들이 이제 자동으로 사용 가능합니다!
|
||||
# 수동 연결 관리나 도구 구성이 필요 없습니다
|
||||
```
|
||||
|
||||
## 지원되는 참조 형식
|
||||
|
||||
### 외부 MCP 원격 서버
|
||||
|
||||
```python
|
||||
# 기본 HTTPS 서버
|
||||
"https://api.example.com/mcp"
|
||||
|
||||
# 인증이 포함된 서버
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key&profile=your_profile"
|
||||
|
||||
# 사용자 정의 경로가 있는 서버
|
||||
"https://services.company.com/api/v1/mcp"
|
||||
```
|
||||
|
||||
### 특정 도구 선택
|
||||
|
||||
`#` 구문을 사용하여 서버에서 특정 도구를 선택하세요:
|
||||
|
||||
```python
|
||||
# 날씨 서버에서 예보 도구만 가져오기
|
||||
"https://weather.api.com/mcp#get_forecast"
|
||||
|
||||
# Exa에서 검색 도구만 가져오기
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key#web_search_exa"
|
||||
```
|
||||
|
||||
### CrewAI AMP 마켓플레이스
|
||||
|
||||
CrewAI AMP 마켓플레이스의 도구에 액세스하세요:
|
||||
|
||||
```python
|
||||
# 모든 도구가 포함된 전체 서비스
|
||||
"crewai-amp:financial-data"
|
||||
|
||||
# AMP 서비스의 특정 도구
|
||||
"crewai-amp:research-tools#pubmed_search"
|
||||
|
||||
# 다중 AMP 서비스
|
||||
mcps=[
|
||||
"crewai-amp:weather-insights",
|
||||
"crewai-amp:market-analysis",
|
||||
"crewai-amp:social-media-monitoring"
|
||||
]
|
||||
```
|
||||
|
||||
## 완전한 예제
|
||||
|
||||
다음은 여러 MCP 서버를 사용하는 완전한 예제입니다:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, Process
|
||||
|
||||
# 다중 MCP 소스를 가진 에이전트 생성
|
||||
multi_source_agent = Agent(
|
||||
role="다중 소스 연구 분석가",
|
||||
goal="다중 데이터 소스를 사용한 종합적인 연구 수행",
|
||||
backstory="""웹 검색, 날씨 데이터, 금융 정보,
|
||||
학술 연구 도구에 접근할 수 있는 전문가 연구원""",
|
||||
mcps=[
|
||||
# 외부 MCP 서버
|
||||
"https://mcp.exa.ai/mcp?api_key=your_exa_key&profile=research",
|
||||
"https://weather.api.com/mcp#get_current_conditions",
|
||||
|
||||
# CrewAI AMP 마켓플레이스
|
||||
"crewai-amp:financial-insights",
|
||||
"crewai-amp:academic-research#pubmed_search",
|
||||
"crewai-amp:market-intelligence#competitor_analysis"
|
||||
]
|
||||
)
|
||||
|
||||
# 종합적인 연구 작업 생성
|
||||
research_task = Task(
|
||||
description="""AI 에이전트가 비즈니스 생산성에 미치는 영향을 연구하세요.
|
||||
원격 근무에 대한 현재 날씨 영향, 금융 시장 트렌드,
|
||||
AI 에이전트 프레임워크에 대한 최근 학술 발표를 포함하세요.""",
|
||||
expected_output="""다음을 다루는 종합 보고서:
|
||||
1. AI 에이전트 비즈니스 영향 분석
|
||||
2. 원격 근무를 위한 날씨 고려사항
|
||||
3. AI 관련 금융 시장 트렌드
|
||||
4. 학술 연구 인용 및 통찰
|
||||
5. 경쟁 환경 분석""",
|
||||
agent=multi_source_agent
|
||||
)
|
||||
|
||||
# crew 생성 및 실행
|
||||
research_crew = Crew(
|
||||
agents=[multi_source_agent],
|
||||
tasks=[research_task],
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
result = research_crew.kickoff()
|
||||
print(f"{len(multi_source_agent.mcps)}개의 MCP 데이터 소스로 연구 완료")
|
||||
```
|
||||
|
||||
## 주요 기능
|
||||
|
||||
- 🔄 **자동 도구 발견**: 도구들이 자동으로 발견되고 통합됩니다
|
||||
- 🏷️ **이름 충돌 방지**: 서버 이름이 도구 이름에 접두사로 붙습니다
|
||||
- ⚡ **성능 최적화**: 스키마 캐싱과 온디맨드 연결
|
||||
- 🛡️ **오류 복원력**: 사용할 수 없는 서버의 우아한 처리
|
||||
- ⏱️ **타임아웃 보호**: 내장 타임아웃으로 연결 중단 방지
|
||||
- 📊 **투명한 통합**: 기존 CrewAI 기능과 완벽한 연동
|
||||
|
||||
## 오류 처리
|
||||
|
||||
MCP DSL 통합은 복원력 있게 설계되었습니다:
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
role="복원력 있는 에이전트",
|
||||
goal="서버 문제에도 불구하고 작업 계속",
|
||||
backstory="장애를 우아하게 처리하는 에이전트",
|
||||
mcps=[
|
||||
"https://reliable-server.com/mcp", # 작동할 것
|
||||
"https://unreachable-server.com/mcp", # 우아하게 건너뛸 것
|
||||
"https://slow-server.com/mcp", # 우아하게 타임아웃될 것
|
||||
"crewai-amp:working-service" # 작동할 것
|
||||
]
|
||||
)
|
||||
# 에이전트는 작동하는 서버의 도구를 사용하고 실패한 서버에 대한 경고를 로그에 남깁니다
|
||||
```
|
||||
|
||||
## 성능 기능
|
||||
|
||||
### 자동 캐싱
|
||||
|
||||
도구 스키마는 성능 향상을 위해 5분간 캐시됩니다:
|
||||
|
||||
```python
|
||||
# 첫 번째 에이전트 생성 - 서버에서 도구 발견
|
||||
agent1 = Agent(role="첫 번째", goal="테스트", backstory="테스트",
|
||||
mcps=["https://api.example.com/mcp"])
|
||||
|
||||
# 두 번째 에이전트 생성 (5분 이내) - 캐시된 도구 스키마 사용
|
||||
agent2 = Agent(role="두 번째", goal="테스트", backstory="테스트",
|
||||
mcps=["https://api.example.com/mcp"]) # 훨씬 빠릅니다!
|
||||
```
|
||||
|
||||
### 온디맨드 연결
|
||||
|
||||
도구 연결은 실제로 사용될 때만 설정됩니다:
|
||||
|
||||
```python
|
||||
# 에이전트 생성은 빠름 - 아직 MCP 연결을 만들지 않음
|
||||
agent = Agent(
|
||||
role="온디맨드 에이전트",
|
||||
goal="도구를 효율적으로 사용",
|
||||
backstory="필요할 때만 연결하는 효율적인 에이전트",
|
||||
mcps=["https://api.example.com/mcp"]
|
||||
)
|
||||
|
||||
# MCP 연결은 도구가 실제로 실행될 때만 만들어집니다
|
||||
# 이는 연결 오버헤드를 최소화하고 시작 성능을 개선합니다
|
||||
```
|
||||
|
||||
## 모범 사례
|
||||
|
||||
### 1. 가능하면 특정 도구 사용
|
||||
|
||||
```python
|
||||
# 좋음 - 필요한 도구만 가져오기
|
||||
mcps=["https://weather.api.com/mcp#get_forecast"]
|
||||
|
||||
# 덜 효율적 - 서버의 모든 도구 가져오기
|
||||
mcps=["https://weather.api.com/mcp"]
|
||||
```
|
||||
|
||||
### 2. 인증을 안전하게 처리
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
# 환경 변수에 API 키 저장
|
||||
exa_key = os.getenv("EXA_API_KEY")
|
||||
exa_profile = os.getenv("EXA_PROFILE")
|
||||
|
||||
agent = Agent(
|
||||
role="안전한 에이전트",
|
||||
goal="MCP 도구를 안전하게 사용",
|
||||
backstory="보안을 고려하는 에이전트",
|
||||
mcps=[f"https://mcp.exa.ai/mcp?api_key={exa_key}&profile={exa_profile}"]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. 서버 장애 계획
|
||||
|
||||
```python
|
||||
# 항상 백업 옵션 포함
|
||||
mcps=[
|
||||
"https://primary-api.com/mcp", # 주요 선택
|
||||
"https://backup-api.com/mcp", # 백업 옵션
|
||||
"crewai-amp:reliable-service" # AMP 폴백
|
||||
]
|
||||
```
|
||||
@@ -8,12 +8,37 @@ mode: "wide"
|
||||
## 개요
|
||||
|
||||
[Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP)는 AI 에이전트가 MCP 서버로 알려진 외부 서비스와 통신함으로써 LLM에 컨텍스트를 제공할 수 있도록 표준화된 방식을 제공합니다.
|
||||
`crewai-tools` 라이브러리는 CrewAI의 기능을 확장하여, 이러한 MCP 서버에서 제공하는 툴을 에이전트에 원활하게 통합할 수 있도록 해줍니다.
|
||||
이를 통해 여러분의 crew는 방대한 기능 에코시스템에 접근할 수 있습니다.
|
||||
|
||||
CrewAI는 MCP 통합을 위한 **두 가지 접근 방식**을 제공합니다:
|
||||
|
||||
### 🚀 **새로운 기능: 간단한 DSL 통합** (권장)
|
||||
|
||||
에이전트에 `mcps` 필드를 직접 사용하여 완벽한 MCP 도구 통합을 구현하세요:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="연구 분석가",
|
||||
goal="정보를 연구하고 분석",
|
||||
backstory="외부 도구에 접근할 수 있는 전문가 연구원",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=your_key", # 외부 MCP 서버
|
||||
"https://api.weather.com/mcp#get_forecast", # 서버의 특정 도구
|
||||
"crewai-amp:financial-data", # CrewAI AMP 마켓플레이스
|
||||
"crewai-amp:research-tools#pubmed_search" # 특정 AMP 도구
|
||||
]
|
||||
)
|
||||
# MCP 도구들이 이제 자동으로 에이전트에서 사용 가능합니다!
|
||||
```
|
||||
|
||||
### 🔧 **고급: MCPServerAdapter** (복잡한 시나리오용)
|
||||
|
||||
수동 연결 관리가 필요한 고급 사용 사례의 경우 `crewai-tools` 라이브러리는 `MCPServerAdapter` 클래스를 제공합니다.
|
||||
|
||||
현재 다음과 같은 전송 메커니즘을 지원합니다:
|
||||
|
||||
- **Stdio**: 로컬 서버용 (동일 머신 내 프로세스 간 표준 입력/출력을 통한 통신)
|
||||
- **HTTPS**: 원격 서버용 (HTTPS를 통한 보안 통신)
|
||||
- **Server-Sent Events (SSE)**: 원격 서버용 (서버에서 클라이언트로의 일방향, 실시간 데이터 스트리밍, HTTP 기반)
|
||||
- **Streamable HTTP**: 원격 서버용 (유연하며 잠재적으로 양방향 통신이 가능, 주로 SSE를 활용한 서버-클라이언트 스트림 제공, HTTP 기반)
|
||||
|
||||
|
||||
232
docs/pt-BR/mcp/dsl-integration.mdx
Normal file
232
docs/pt-BR/mcp/dsl-integration.mdx
Normal file
@@ -0,0 +1,232 @@
|
||||
---
|
||||
title: Integração DSL MCP
|
||||
description: Aprenda a usar a sintaxe DSL simples do CrewAI para integrar servidores MCP diretamente com seus agentes usando o campo mcps.
|
||||
icon: code
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
## Visão Geral
|
||||
|
||||
A integração DSL (Domain Specific Language) MCP do CrewAI oferece a **forma mais simples** de conectar seus agentes aos servidores MCP (Model Context Protocol). Basta adicionar um campo `mcps` ao seu agente e o CrewAI cuida de toda a complexidade automaticamente.
|
||||
|
||||
<Info>
|
||||
Esta é a **abordagem recomendada** para a maioria dos casos de uso de MCP. Para cenários avançados que requerem gerenciamento manual de conexão, veja [MCPServerAdapter](/pt-BR/mcp/overview#advanced-mcpserveradapter).
|
||||
</Info>
|
||||
|
||||
## Uso Básico
|
||||
|
||||
Adicione servidores MCP ao seu agente usando o campo `mcps`:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="Assistente de Pesquisa",
|
||||
goal="Ajudar com tarefas de pesquisa e análise",
|
||||
backstory="Assistente especialista com acesso a ferramentas avançadas de pesquisa",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave&profile=pesquisa"
|
||||
]
|
||||
)
|
||||
|
||||
# As ferramentas MCP agora estão automaticamente disponíveis!
|
||||
# Não é necessário gerenciamento manual de conexão ou configuração de ferramentas
|
||||
```
|
||||
|
||||
## Formatos de Referência Suportados
|
||||
|
||||
### Servidores MCP Remotos Externos
|
||||
|
||||
```python
|
||||
# Servidor HTTPS básico
|
||||
"https://api.example.com/mcp"
|
||||
|
||||
# Servidor com autenticação
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave&profile=seu_perfil"
|
||||
|
||||
# Servidor com caminho personalizado
|
||||
"https://services.company.com/api/v1/mcp"
|
||||
```
|
||||
|
||||
### Seleção de Ferramentas Específicas
|
||||
|
||||
Use a sintaxe `#` para selecionar ferramentas específicas de um servidor:
|
||||
|
||||
```python
|
||||
# Obter apenas a ferramenta de previsão do servidor meteorológico
|
||||
"https://weather.api.com/mcp#get_forecast"
|
||||
|
||||
# Obter apenas a ferramenta de busca do Exa
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave#web_search_exa"
|
||||
```
|
||||
|
||||
### Marketplace CrewAI AMP
|
||||
|
||||
Acesse ferramentas do marketplace CrewAI AMP:
|
||||
|
||||
```python
|
||||
# Serviço completo com todas as ferramentas
|
||||
"crewai-amp:financial-data"
|
||||
|
||||
# Ferramenta específica do serviço AMP
|
||||
"crewai-amp:research-tools#pubmed_search"
|
||||
|
||||
# Múltiplos serviços AMP
|
||||
mcps=[
|
||||
"crewai-amp:weather-insights",
|
||||
"crewai-amp:market-analysis",
|
||||
"crewai-amp:social-media-monitoring"
|
||||
]
|
||||
```
|
||||
|
||||
## Exemplo Completo
|
||||
|
||||
Aqui está um exemplo completo usando múltiplos servidores MCP:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, Process
|
||||
|
||||
# Criar agente com múltiplas fontes MCP
|
||||
agente_multi_fonte = Agent(
|
||||
role="Analista de Pesquisa Multi-Fonte",
|
||||
goal="Conduzir pesquisa abrangente usando múltiplas fontes de dados",
|
||||
backstory="""Pesquisador especialista com acesso a busca web, dados meteorológicos,
|
||||
informações financeiras e ferramentas de pesquisa acadêmica""",
|
||||
mcps=[
|
||||
# Servidores MCP externos
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave_exa&profile=pesquisa",
|
||||
"https://weather.api.com/mcp#get_current_conditions",
|
||||
|
||||
# Marketplace CrewAI AMP
|
||||
"crewai-amp:financial-insights",
|
||||
"crewai-amp:academic-research#pubmed_search",
|
||||
"crewai-amp:market-intelligence#competitor_analysis"
|
||||
]
|
||||
)
|
||||
|
||||
# Criar tarefa de pesquisa abrangente
|
||||
tarefa_pesquisa = Task(
|
||||
description="""Pesquisar o impacto dos agentes de IA na produtividade empresarial.
|
||||
Incluir impactos climáticos atuais no trabalho remoto, tendências do mercado financeiro,
|
||||
e publicações acadêmicas recentes sobre frameworks de agentes de IA.""",
|
||||
expected_output="""Relatório abrangente cobrindo:
|
||||
1. Análise do impacto dos agentes de IA nos negócios
|
||||
2. Considerações climáticas para trabalho remoto
|
||||
3. Tendências do mercado financeiro relacionadas à IA
|
||||
4. Citações e insights de pesquisa acadêmica
|
||||
5. Análise do cenário competitivo""",
|
||||
agent=agente_multi_fonte
|
||||
)
|
||||
|
||||
# Criar e executar crew
|
||||
crew_pesquisa = Crew(
|
||||
agents=[agente_multi_fonte],
|
||||
tasks=[tarefa_pesquisa],
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
resultado = crew_pesquisa.kickoff()
|
||||
print(f"Pesquisa concluída com {len(agente_multi_fonte.mcps)} fontes de dados MCP")
|
||||
```
|
||||
|
||||
## Recursos Principais
|
||||
|
||||
- 🔄 **Descoberta Automática de Ferramentas**: Ferramentas são descobertas e integradas automaticamente
|
||||
- 🏷️ **Prevenção de Colisão de Nomes**: Nomes de servidor são prefixados aos nomes das ferramentas
|
||||
- ⚡ **Otimizado para Performance**: Conexões sob demanda com cache de esquemas
|
||||
- 🛡️ **Resiliência a Erros**: Tratamento gracioso de servidores indisponíveis
|
||||
- ⏱️ **Proteção por Timeout**: Timeouts integrados previnem conexões travadas
|
||||
- 📊 **Integração Transparente**: Funciona perfeitamente com recursos existentes do CrewAI
|
||||
|
||||
## Tratamento de Erros
|
||||
|
||||
A integração DSL MCP é projetada para ser resiliente:
|
||||
|
||||
```python
|
||||
agente = Agent(
|
||||
role="Agente Resiliente",
|
||||
goal="Continuar trabalhando apesar de problemas no servidor",
|
||||
backstory="Agente que lida graciosamente com falhas",
|
||||
mcps=[
|
||||
"https://servidor-confiavel.com/mcp", # Vai funcionar
|
||||
"https://servidor-inalcancavel.com/mcp", # Será ignorado graciosamente
|
||||
"https://servidor-lento.com/mcp", # Timeout gracioso
|
||||
"crewai-amp:servico-funcionando" # Vai funcionar
|
||||
]
|
||||
)
|
||||
# O agente usará ferramentas de servidores funcionais e registrará avisos para os que falharem
|
||||
```
|
||||
|
||||
## Recursos de Performance
|
||||
|
||||
### Cache Automático
|
||||
|
||||
Esquemas de ferramentas são cacheados por 5 minutos para melhorar a performance:
|
||||
|
||||
```python
|
||||
# Primeira criação de agente - descobre ferramentas do servidor
|
||||
agente1 = Agent(role="Primeiro", goal="Teste", backstory="Teste",
|
||||
mcps=["https://api.example.com/mcp"])
|
||||
|
||||
# Segunda criação de agente (dentro de 5 minutos) - usa esquemas cacheados
|
||||
agente2 = Agent(role="Segundo", goal="Teste", backstory="Teste",
|
||||
mcps=["https://api.example.com/mcp"]) # Muito mais rápido!
|
||||
```
|
||||
|
||||
### Conexões Sob Demanda
|
||||
|
||||
Conexões de ferramentas são estabelecidas apenas quando as ferramentas são realmente usadas:
|
||||
|
||||
```python
|
||||
# Criação do agente é rápida - nenhuma conexão MCP feita ainda
|
||||
agente = Agent(
|
||||
role="Agente Sob Demanda",
|
||||
goal="Usar ferramentas eficientemente",
|
||||
backstory="Agente eficiente que conecta apenas quando necessário",
|
||||
mcps=["https://api.example.com/mcp"]
|
||||
)
|
||||
|
||||
# Conexão MCP é feita apenas quando uma ferramenta é realmente executada
|
||||
# Isso minimiza o overhead de conexão e melhora a performance de inicialização
|
||||
```
|
||||
|
||||
## Melhores Práticas
|
||||
|
||||
### 1. Use Ferramentas Específicas Quando Possível
|
||||
|
||||
```python
|
||||
# Bom - obter apenas as ferramentas necessárias
|
||||
mcps=["https://weather.api.com/mcp#get_forecast"]
|
||||
|
||||
# Menos eficiente - obter todas as ferramentas do servidor
|
||||
mcps=["https://weather.api.com/mcp"]
|
||||
```
|
||||
|
||||
### 2. Lidar com Autenticação de Forma Segura
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
# Armazenar chaves API em variáveis de ambiente
|
||||
exa_key = os.getenv("EXA_API_KEY")
|
||||
exa_profile = os.getenv("EXA_PROFILE")
|
||||
|
||||
agente = Agent(
|
||||
role="Agente Seguro",
|
||||
goal="Usar ferramentas MCP com segurança",
|
||||
backstory="Agente consciente da segurança",
|
||||
mcps=[f"https://mcp.exa.ai/mcp?api_key={exa_key}&profile={exa_profile}"]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Planejar para Falhas de Servidor
|
||||
|
||||
```python
|
||||
# Sempre incluir opções de backup
|
||||
mcps=[
|
||||
"https://api-principal.com/mcp", # Escolha principal
|
||||
"https://api-backup.com/mcp", # Opção de backup
|
||||
"crewai-amp:servico-confiavel" # Fallback AMP
|
||||
]
|
||||
```
|
||||
@@ -8,12 +8,37 @@ mode: "wide"
|
||||
## Visão Geral
|
||||
|
||||
O [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP) fornece uma maneira padronizada para agentes de IA fornecerem contexto para LLMs comunicando-se com serviços externos, conhecidos como Servidores MCP.
|
||||
A biblioteca `crewai-tools` expande as capacidades do CrewAI permitindo que você integre facilmente ferramentas desses servidores MCP em seus agentes.
|
||||
Isso oferece às suas crews acesso a um vasto ecossistema de funcionalidades.
|
||||
|
||||
O CrewAI oferece **duas abordagens** para integração MCP:
|
||||
|
||||
### 🚀 **Novo: Integração DSL Simples** (Recomendado)
|
||||
|
||||
Use o campo `mcps` diretamente nos agentes para integração perfeita de ferramentas MCP:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
|
||||
agent = Agent(
|
||||
role="Analista de Pesquisa",
|
||||
goal="Pesquisar e analisar informações",
|
||||
backstory="Pesquisador especialista com acesso a ferramentas externas",
|
||||
mcps=[
|
||||
"https://mcp.exa.ai/mcp?api_key=sua_chave", # Servidor MCP externo
|
||||
"https://api.weather.com/mcp#get_forecast", # Ferramenta específica do servidor
|
||||
"crewai-amp:financial-data", # Marketplace CrewAI AMP
|
||||
"crewai-amp:research-tools#pubmed_search" # Ferramenta AMP específica
|
||||
]
|
||||
)
|
||||
# Ferramentas MCP agora estão automaticamente disponíveis para seu agente!
|
||||
```
|
||||
|
||||
### 🔧 **Avançado: MCPServerAdapter** (Para Cenários Complexos)
|
||||
|
||||
Para casos de uso avançados que requerem gerenciamento manual de conexão, a biblioteca `crewai-tools` fornece a classe `MCPServerAdapter`.
|
||||
|
||||
Atualmente, suportamos os seguintes mecanismos de transporte:
|
||||
|
||||
- **Stdio**: para servidores locais (comunicação via entrada/saída padrão entre processos na mesma máquina)
|
||||
- **HTTPS**: para servidores remotos (comunicação segura via HTTPS)
|
||||
- **Server-Sent Events (SSE)**: para servidores remotos (transmissão de dados unidirecional em tempo real do servidor para o cliente via HTTP)
|
||||
- **Streamable HTTP**: para servidores remotos (comunicação flexível e potencialmente bidirecional via HTTP, geralmente utilizando SSE para streams do servidor para o cliente)
|
||||
|
||||
|
||||
@@ -12,10 +12,9 @@ dependencies = [
|
||||
"pytube>=15.0.0",
|
||||
"requests>=2.32.5",
|
||||
"docker>=7.1.0",
|
||||
"crewai==1.0.0b1",
|
||||
"crewai==1.1.0",
|
||||
"lancedb>=0.5.4",
|
||||
"tiktoken>=0.8.0",
|
||||
"stagehand>=0.4.1",
|
||||
"beautifulsoup4>=4.13.4",
|
||||
"pypdf>=5.9.0",
|
||||
"python-docx>=1.2.0",
|
||||
|
||||
@@ -43,9 +43,6 @@ from crewai_tools.tools.contextualai_rerank_tool.contextual_rerank_tool import (
|
||||
from crewai_tools.tools.couchbase_tool.couchbase_tool import (
|
||||
CouchbaseFTSVectorSearchTool,
|
||||
)
|
||||
from crewai_tools.tools.crewai_enterprise_tools.crewai_enterprise_tools import (
|
||||
CrewaiEnterpriseTools,
|
||||
)
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_tools import (
|
||||
CrewaiPlatformTools,
|
||||
)
|
||||
@@ -214,7 +211,6 @@ __all__ = [
|
||||
"ContextualAIQueryTool",
|
||||
"ContextualAIRerankTool",
|
||||
"CouchbaseFTSVectorSearchTool",
|
||||
"CrewaiEnterpriseTools",
|
||||
"CrewaiPlatformTools",
|
||||
"DOCXSearchTool",
|
||||
"DallETool",
|
||||
@@ -291,4 +287,4 @@ __all__ = [
|
||||
"ZapierActionTools",
|
||||
]
|
||||
|
||||
__version__ = "1.0.0b1"
|
||||
__version__ = "1.1.0"
|
||||
|
||||
@@ -3,13 +3,16 @@
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Any, TypeAlias, TypedDict
|
||||
import uuid
|
||||
|
||||
from crewai.rag.config.types import RagConfigType
|
||||
from crewai.rag.config.utils import get_rag_client
|
||||
from crewai.rag.core.base_client import BaseClient
|
||||
from crewai.rag.factory import create_client
|
||||
from crewai.rag.qdrant.config import QdrantConfig
|
||||
from crewai.rag.types import BaseRecord, SearchResult
|
||||
from pydantic import PrivateAttr
|
||||
from qdrant_client.models import VectorParams
|
||||
from typing_extensions import Unpack
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
@@ -52,7 +55,11 @@ class CrewAIRagAdapter(Adapter):
|
||||
self._client = create_client(self.config)
|
||||
else:
|
||||
self._client = get_rag_client()
|
||||
self._client.get_or_create_collection(collection_name=self.collection_name)
|
||||
collection_params: dict[str, Any] = {"collection_name": self.collection_name}
|
||||
if isinstance(self.config, QdrantConfig) and self.config.vectors_config:
|
||||
if isinstance(self.config.vectors_config, VectorParams):
|
||||
collection_params["vectors_config"] = self.config.vectors_config
|
||||
self._client.get_or_create_collection(**collection_params)
|
||||
|
||||
def query(
|
||||
self,
|
||||
@@ -76,6 +83,8 @@ class CrewAIRagAdapter(Adapter):
|
||||
if similarity_threshold is not None
|
||||
else self.similarity_threshold
|
||||
)
|
||||
if self._client is None:
|
||||
raise ValueError("Client is not initialized")
|
||||
|
||||
results: list[SearchResult] = self._client.search(
|
||||
collection_name=self.collection_name,
|
||||
@@ -201,9 +210,10 @@ class CrewAIRagAdapter(Adapter):
|
||||
if isinstance(arg, dict):
|
||||
file_metadata.update(arg.get("metadata", {}))
|
||||
|
||||
chunk_id = hashlib.sha256(
|
||||
chunk_hash = hashlib.sha256(
|
||||
f"{file_result.doc_id}_{chunk_idx}_{file_chunk}".encode()
|
||||
).hexdigest()
|
||||
chunk_id = str(uuid.UUID(chunk_hash[:32]))
|
||||
|
||||
documents.append(
|
||||
{
|
||||
@@ -251,9 +261,10 @@ class CrewAIRagAdapter(Adapter):
|
||||
if isinstance(arg, dict):
|
||||
chunk_metadata.update(arg.get("metadata", {}))
|
||||
|
||||
chunk_id = hashlib.sha256(
|
||||
chunk_hash = hashlib.sha256(
|
||||
f"{loader_result.doc_id}_{i}_{chunk}".encode()
|
||||
).hexdigest()
|
||||
chunk_id = str(uuid.UUID(chunk_hash[:32]))
|
||||
|
||||
documents.append(
|
||||
{
|
||||
@@ -264,6 +275,8 @@ class CrewAIRagAdapter(Adapter):
|
||||
)
|
||||
|
||||
if documents:
|
||||
if self._client is None:
|
||||
raise ValueError("Client is not initialized")
|
||||
self._client.add_documents(
|
||||
collection_name=self.collection_name, documents=documents
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Literal, Optional, Union, cast, get_origin
|
||||
from typing import Any, Literal, Optional, Union, _SpecialForm, cast, get_origin
|
||||
import warnings
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
@@ -41,7 +41,7 @@ class EnterpriseActionTool(BaseTool):
|
||||
action_schema: dict[str, Any],
|
||||
enterprise_api_base_url: str | None = None,
|
||||
):
|
||||
self._model_registry = {}
|
||||
self._model_registry = {} # type: ignore[var-annotated]
|
||||
self._base_name = self._sanitize_name(name)
|
||||
|
||||
schema_props, required = self._extract_schema_info(action_schema)
|
||||
@@ -67,7 +67,7 @@ class EnterpriseActionTool(BaseTool):
|
||||
# Create the model
|
||||
if field_definitions:
|
||||
try:
|
||||
args_schema = create_model(
|
||||
args_schema = create_model( # type: ignore[call-overload]
|
||||
f"{self._base_name}Schema", **field_definitions
|
||||
)
|
||||
except Exception:
|
||||
@@ -110,7 +110,9 @@ class EnterpriseActionTool(BaseTool):
|
||||
)
|
||||
return schema_props, required
|
||||
|
||||
def _process_schema_type(self, schema: dict[str, Any], type_name: str) -> type[Any]:
|
||||
def _process_schema_type(
|
||||
self, schema: dict[str, Any], type_name: str
|
||||
) -> type[Any] | _SpecialForm:
|
||||
"""Process a JSON schema and return appropriate Python type."""
|
||||
if "anyOf" in schema:
|
||||
any_of_types = schema["anyOf"]
|
||||
@@ -139,7 +141,7 @@ class EnterpriseActionTool(BaseTool):
|
||||
if json_type == "array":
|
||||
items_schema = schema.get("items", {"type": "string"})
|
||||
item_type = self._process_schema_type(items_schema, f"{type_name}Item")
|
||||
return list[item_type]
|
||||
return list[item_type] # type: ignore[valid-type]
|
||||
|
||||
if json_type == "object":
|
||||
return self._create_nested_model(schema, type_name)
|
||||
@@ -174,18 +176,20 @@ class EnterpriseActionTool(BaseTool):
|
||||
prop_type = str
|
||||
|
||||
field_definitions[prop_name] = self._create_field_definition(
|
||||
prop_type, is_required, prop_desc
|
||||
prop_type,
|
||||
is_required,
|
||||
prop_desc, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
try:
|
||||
nested_model = create_model(full_model_name, **field_definitions)
|
||||
nested_model = create_model(full_model_name, **field_definitions) # type: ignore[call-overload]
|
||||
self._model_registry[full_model_name] = nested_model
|
||||
return nested_model
|
||||
except Exception:
|
||||
return dict
|
||||
|
||||
def _create_field_definition(
|
||||
self, field_type: type[Any], is_required: bool, description: str
|
||||
self, field_type: type[Any] | _SpecialForm, is_required: bool, description: str
|
||||
) -> tuple:
|
||||
"""Create Pydantic field definition based on type and requirement."""
|
||||
if is_required:
|
||||
@@ -276,7 +280,7 @@ class EnterpriseActionKitToolAdapter:
|
||||
):
|
||||
"""Initialize the adapter with an enterprise action token."""
|
||||
self._set_enterprise_action_token(enterprise_action_token)
|
||||
self._actions_schema = {}
|
||||
self._actions_schema = {} # type: ignore[var-annotated]
|
||||
self._tools = None
|
||||
self.enterprise_api_base_url = (
|
||||
enterprise_api_base_url or get_enterprise_api_base_url()
|
||||
|
||||
@@ -2,8 +2,11 @@ from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from lancedb import DBConnection as LanceDBConnection, connect as lancedb_connect
|
||||
from lancedb.table import Table as LanceDBTable
|
||||
from lancedb import ( # type: ignore[import-untyped]
|
||||
DBConnection as LanceDBConnection,
|
||||
connect as lancedb_connect,
|
||||
)
|
||||
from lancedb.table import Table as LanceDBTable # type: ignore[import-untyped]
|
||||
from openai import Client as OpenAIClient
|
||||
from pydantic import Field, PrivateAttr
|
||||
|
||||
@@ -37,7 +40,7 @@ class LanceDBAdapter(Adapter):
|
||||
|
||||
super().model_post_init(__context)
|
||||
|
||||
def query(self, question: str) -> str:
|
||||
def query(self, question: str) -> str: # type: ignore[override]
|
||||
query = self.embedding_function([question])[0]
|
||||
results = (
|
||||
self._table.search(query, vector_column_name=self.vector_column_name)
|
||||
|
||||
@@ -27,7 +27,7 @@ class RAGAdapter(Adapter):
|
||||
embedding_config=embedding_config,
|
||||
)
|
||||
|
||||
def query(self, question: str) -> str:
|
||||
def query(self, question: str) -> str: # type: ignore[override]
|
||||
return self._adapter.query(question)
|
||||
|
||||
def add(
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import Generic, TypeVar
|
||||
|
||||
@@ -29,7 +31,7 @@ class ToolCollection(list, Generic[T]):
|
||||
def _build_name_cache(self) -> None:
|
||||
self._name_cache = {tool.name.lower(): tool for tool in self}
|
||||
|
||||
def __getitem__(self, key: int | str) -> T:
|
||||
def __getitem__(self, key: int | str) -> T: # type: ignore[override]
|
||||
if isinstance(key, str):
|
||||
return self._name_cache[key.lower()]
|
||||
return super().__getitem__(key)
|
||||
@@ -38,11 +40,11 @@ class ToolCollection(list, Generic[T]):
|
||||
super().append(tool)
|
||||
self._name_cache[tool.name.lower()] = tool
|
||||
|
||||
def extend(self, tools: list[T]) -> None:
|
||||
def extend(self, tools: list[T]) -> None: # type: ignore[override]
|
||||
super().extend(tools)
|
||||
self._build_name_cache()
|
||||
|
||||
def insert(self, index: int, tool: T) -> None:
|
||||
def insert(self, index: int, tool: T) -> None: # type: ignore[override]
|
||||
super().insert(index, tool)
|
||||
self._name_cache[tool.name.lower()] = tool
|
||||
|
||||
@@ -51,13 +53,13 @@ class ToolCollection(list, Generic[T]):
|
||||
if tool.name.lower() in self._name_cache:
|
||||
del self._name_cache[tool.name.lower()]
|
||||
|
||||
def pop(self, index: int = -1) -> T:
|
||||
def pop(self, index: int = -1) -> T: # type: ignore[override]
|
||||
tool = super().pop(index)
|
||||
if tool.name.lower() in self._name_cache:
|
||||
del self._name_cache[tool.name.lower()]
|
||||
return tool
|
||||
|
||||
def filter_by_names(self, names: list[str] | None = None) -> "ToolCollection[T]":
|
||||
def filter_by_names(self, names: list[str] | None = None) -> ToolCollection[T]:
|
||||
if names is None:
|
||||
return self
|
||||
|
||||
@@ -69,7 +71,7 @@ class ToolCollection(list, Generic[T]):
|
||||
]
|
||||
)
|
||||
|
||||
def filter_where(self, func: Callable[[T], bool]) -> "ToolCollection[T]":
|
||||
def filter_where(self, func: Callable[[T], bool]) -> ToolCollection[T]:
|
||||
return ToolCollection([tool for tool in self if func(tool)])
|
||||
|
||||
def clear(self) -> None:
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import Final, Literal
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import Field, create_model
|
||||
import requests
|
||||
|
||||
|
||||
ACTIONS_URL = "https://actions.zapier.com/api/v2/ai-actions"
|
||||
ACTIONS_URL: Final[Literal["https://actions.zapier.com/api/v2/ai-actions"]] = (
|
||||
"https://actions.zapier.com/api/v2/ai-actions"
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -55,8 +58,6 @@ class ZapierActionTool(BaseTool):
|
||||
class ZapierActionsAdapter:
|
||||
"""Adapter for Zapier Actions."""
|
||||
|
||||
api_key: str
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
self.api_key = api_key or os.getenv("ZAPIER_API_KEY")
|
||||
if not self.api_key:
|
||||
@@ -77,7 +78,7 @@ class ZapierActionsAdapter:
|
||||
|
||||
return response.json()
|
||||
|
||||
def tools(self) -> list[BaseTool]:
|
||||
def tools(self) -> list[ZapierActionTool]:
|
||||
"""Convert Zapier actions to BaseTool instances."""
|
||||
actions_response = self.get_zapier_actions()
|
||||
tools = []
|
||||
@@ -91,12 +92,12 @@ class ZapierActionsAdapter:
|
||||
)
|
||||
|
||||
params = action.get("params", {})
|
||||
args_fields = {}
|
||||
|
||||
args_fields["instructions"] = (
|
||||
str,
|
||||
Field(description="Instructions for how to execute this action"),
|
||||
)
|
||||
args_fields = {
|
||||
"instructions": (
|
||||
str,
|
||||
Field(description="Instructions for how to execute this action"),
|
||||
)
|
||||
}
|
||||
|
||||
for param_name, param_info in params.items():
|
||||
field_type = (
|
||||
@@ -112,7 +113,7 @@ class ZapierActionsAdapter:
|
||||
Field(description=field_description),
|
||||
)
|
||||
|
||||
args_schema = create_model(f"{tool_name.title()}Schema", **args_fields)
|
||||
args_schema = create_model(f"{tool_name.title()}Schema", **args_fields) # type: ignore[call-overload]
|
||||
|
||||
tool = ZapierActionTool(
|
||||
name=tool_name,
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from .bedrock import (
|
||||
from crewai_tools.aws.bedrock import (
|
||||
BedrockInvokeAgentTool,
|
||||
BedrockKBRetrieverTool,
|
||||
create_browser_toolkit,
|
||||
create_code_interpreter_toolkit,
|
||||
)
|
||||
from .s3 import S3ReaderTool, S3WriterTool
|
||||
from crewai_tools.aws.s3 import S3ReaderTool, S3WriterTool
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from .agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
from .browser import create_browser_toolkit
|
||||
from .code_interpreter import create_code_interpreter_toolkit
|
||||
from .knowledge_base.retriever_tool import BedrockKBRetrieverTool
|
||||
from crewai_tools.aws.bedrock.agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
from crewai_tools.aws.bedrock.browser import create_browser_toolkit
|
||||
from crewai_tools.aws.bedrock.code_interpreter import create_code_interpreter_toolkit
|
||||
from crewai_tools.aws.bedrock.knowledge_base.retriever_tool import (
|
||||
BedrockKBRetrieverTool,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .invoke_agent_tool import BedrockInvokeAgentTool
|
||||
from crewai_tools.aws.bedrock.agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
|
||||
|
||||
__all__ = ["BedrockInvokeAgentTool"]
|
||||
|
||||
@@ -7,7 +7,10 @@ from crewai.tools import BaseTool
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..exceptions import BedrockAgentError, BedrockValidationError
|
||||
from crewai_tools.aws.bedrock.exceptions import (
|
||||
BedrockAgentError,
|
||||
BedrockValidationError,
|
||||
)
|
||||
|
||||
|
||||
# Load environment variables from .env file
|
||||
@@ -24,9 +27,9 @@ class BedrockInvokeAgentTool(BaseTool):
|
||||
name: str = "Bedrock Agent Invoke Tool"
|
||||
description: str = "An agent responsible for policy analysis."
|
||||
args_schema: type[BaseModel] = BedrockInvokeAgentToolInput
|
||||
agent_id: str = None
|
||||
agent_alias_id: str = None
|
||||
session_id: str = None
|
||||
agent_id: str | None = None
|
||||
agent_alias_id: str | None = None
|
||||
session_id: str | None = None
|
||||
enable_trace: bool = False
|
||||
end_session: bool = False
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["boto3"])
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
from .browser_toolkit import BrowserToolkit, create_browser_toolkit
|
||||
from crewai_tools.aws.bedrock.browser.browser_toolkit import (
|
||||
BrowserToolkit,
|
||||
create_browser_toolkit,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["BrowserToolkit", "create_browser_toolkit"]
|
||||
|
||||
@@ -9,8 +9,10 @@ from urllib.parse import urlparse
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .browser_session_manager import BrowserSessionManager
|
||||
from .utils import aget_current_page, get_current_page
|
||||
from crewai_tools.aws.bedrock.browser.browser_session_manager import (
|
||||
BrowserSessionManager,
|
||||
)
|
||||
from crewai_tools.aws.bedrock.browser.utils import aget_current_page, get_current_page
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -80,9 +82,9 @@ class CurrentWebPageToolInput(BaseModel):
|
||||
class BrowserBaseTool(BaseTool):
|
||||
"""Base class for browser tools."""
|
||||
|
||||
def __init__(self, session_manager: BrowserSessionManager):
|
||||
def __init__(self, session_manager: BrowserSessionManager): # type: ignore[call-arg]
|
||||
"""Initialize with a session manager."""
|
||||
super().__init__()
|
||||
super().__init__() # type: ignore[call-arg]
|
||||
self._session_manager = session_manager
|
||||
|
||||
if self._is_in_asyncio_loop() and hasattr(self, "_arun"):
|
||||
@@ -91,7 +93,7 @@ class BrowserBaseTool(BaseTool):
|
||||
# Override _run to use _arun when in an asyncio loop
|
||||
def patched_run(*args, **kwargs):
|
||||
try:
|
||||
import nest_asyncio
|
||||
import nest_asyncio # type: ignore[import-untyped]
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
nest_asyncio.apply(loop)
|
||||
@@ -101,7 +103,7 @@ class BrowserBaseTool(BaseTool):
|
||||
except Exception as e:
|
||||
return f"Error in patched _run: {e!s}"
|
||||
|
||||
self._run = patched_run
|
||||
self._run = patched_run # type: ignore[method-assign]
|
||||
|
||||
async def get_async_page(self, thread_id: str) -> Any:
|
||||
"""Get or create a page for the specified thread."""
|
||||
@@ -356,7 +358,7 @@ class ExtractHyperlinksTool(BrowserBaseTool):
|
||||
for link in soup.find_all("a", href=True):
|
||||
text = link.get_text().strip()
|
||||
href = link["href"]
|
||||
if href.startswith(("http", "https")):
|
||||
if href.startswith(("http", "https")): # type: ignore[union-attr]
|
||||
links.append({"text": text, "url": href})
|
||||
|
||||
if not links:
|
||||
@@ -388,7 +390,7 @@ class ExtractHyperlinksTool(BrowserBaseTool):
|
||||
for link in soup.find_all("a", href=True):
|
||||
text = link.get_text().strip()
|
||||
href = link["href"]
|
||||
if href.startswith(("http", "https")):
|
||||
if href.startswith(("http", "https")): # type: ignore[union-attr]
|
||||
links.append({"text": text, "url": href})
|
||||
|
||||
if not links:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .code_interpreter_toolkit import (
|
||||
from crewai_tools.aws.bedrock.code_interpreter.code_interpreter_toolkit import (
|
||||
CodeInterpreterToolkit,
|
||||
create_code_interpreter_toolkit,
|
||||
)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
from .retriever_tool import BedrockKBRetrieverTool
|
||||
from crewai_tools.aws.bedrock.knowledge_base.retriever_tool import (
|
||||
BedrockKBRetrieverTool,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["BedrockKBRetrieverTool"]
|
||||
|
||||
@@ -6,7 +6,10 @@ from crewai.tools import BaseTool
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..exceptions import BedrockKnowledgeBaseError, BedrockValidationError
|
||||
from crewai_tools.aws.bedrock.exceptions import (
|
||||
BedrockKnowledgeBaseError,
|
||||
BedrockValidationError,
|
||||
)
|
||||
|
||||
|
||||
# Load environment variables from .env file
|
||||
@@ -27,7 +30,7 @@ class BedrockKBRetrieverTool(BaseTool):
|
||||
"Retrieves information from an Amazon Bedrock Knowledge Base given a query"
|
||||
)
|
||||
args_schema: type[BaseModel] = BedrockKBRetrieverToolInput
|
||||
knowledge_base_id: str = None
|
||||
knowledge_base_id: str = None # type: ignore[assignment]
|
||||
number_of_results: int | None = 5
|
||||
retrieval_configuration: dict[str, Any] | None = None
|
||||
guardrail_configuration: dict[str, Any] | None = None
|
||||
@@ -55,7 +58,7 @@ class BedrockKBRetrieverTool(BaseTool):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Get knowledge_base_id from environment variable if not provided
|
||||
self.knowledge_base_id = knowledge_base_id or os.getenv("BEDROCK_KB_ID")
|
||||
self.knowledge_base_id = knowledge_base_id or os.getenv("BEDROCK_KB_ID") # type: ignore[assignment]
|
||||
self.number_of_results = number_of_results
|
||||
self.guardrail_configuration = guardrail_configuration
|
||||
self.next_token = next_token
|
||||
@@ -239,7 +242,7 @@ class BedrockKBRetrieverTool(BaseTool):
|
||||
if results:
|
||||
response_object["results"] = results
|
||||
else:
|
||||
response_object["message"] = "No results found for the given query."
|
||||
response_object["message"] = "No results found for the given query." # type: ignore[assignment]
|
||||
|
||||
if "nextToken" in response:
|
||||
response_object["nextToken"] = response["nextToken"]
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
from .reader_tool import S3ReaderTool as S3ReaderTool
|
||||
from .writer_tool import S3WriterTool as S3WriterTool
|
||||
from crewai_tools.aws.s3.reader_tool import S3ReaderTool as S3ReaderTool
|
||||
from crewai_tools.aws.s3.writer_tool import S3WriterTool as S3WriterTool
|
||||
|
||||
@@ -17,14 +17,15 @@ class LoaderResult(BaseModel):
|
||||
|
||||
|
||||
class BaseLoader(ABC):
|
||||
def __init__(self, config: dict[str, Any] | None = None):
|
||||
def __init__(self, config: dict[str, Any] | None = None) -> None:
|
||||
self.config = config or {}
|
||||
|
||||
@abstractmethod
|
||||
def load(self, content: SourceContent, **kwargs) -> LoaderResult: ...
|
||||
|
||||
@staticmethod
|
||||
def generate_doc_id(
|
||||
self, source_ref: str | None = None, content: str | None = None
|
||||
source_ref: str | None = None, content: str | None = None
|
||||
) -> str:
|
||||
"""Generate a unique document id based on the source reference and content.
|
||||
If the source reference is not provided, the content is used as the source reference.
|
||||
|
||||
@@ -10,7 +10,7 @@ class RecursiveCharacterTextSplitter:
|
||||
chunk_overlap: int = 200,
|
||||
separators: list[str] | None = None,
|
||||
keep_separator: bool = True,
|
||||
):
|
||||
) -> None:
|
||||
"""Initialize the RecursiveCharacterTextSplitter.
|
||||
|
||||
Args:
|
||||
@@ -36,6 +36,14 @@ class RecursiveCharacterTextSplitter:
|
||||
]
|
||||
|
||||
def split_text(self, text: str) -> list[str]:
|
||||
"""Split the input text into chunks.
|
||||
|
||||
Args:
|
||||
text: The text to split.
|
||||
|
||||
Returns:
|
||||
A list of text chunks.
|
||||
"""
|
||||
return self._split_text(text, self._separators)
|
||||
|
||||
def _split_text(self, text: str, separators: list[str]) -> list[str]:
|
||||
@@ -99,8 +107,8 @@ class RecursiveCharacterTextSplitter:
|
||||
|
||||
def _merge_splits(self, splits: list[str], separator: str) -> list[str]:
|
||||
"""Merge splits into chunks with proper overlap."""
|
||||
docs = []
|
||||
current_doc = []
|
||||
docs: list[str] = []
|
||||
current_doc: list[str] = []
|
||||
total = 0
|
||||
|
||||
for split in splits:
|
||||
@@ -152,7 +160,7 @@ class BaseChunker:
|
||||
chunk_overlap: int = 200,
|
||||
separators: list[str] | None = None,
|
||||
keep_separator: bool = True,
|
||||
):
|
||||
) -> None:
|
||||
"""Initialize the Chunker.
|
||||
|
||||
Args:
|
||||
@@ -169,6 +177,14 @@ class BaseChunker:
|
||||
)
|
||||
|
||||
def chunk(self, text: str) -> list[str]:
|
||||
"""Chunk the input text into smaller pieces.
|
||||
|
||||
Args:
|
||||
text: The text to chunk.
|
||||
|
||||
Returns:
|
||||
A list of text chunks.
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
|
||||
@@ -4,12 +4,12 @@ from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
import chromadb
|
||||
import litellm
|
||||
from pydantic import BaseModel, Field, PrivateAttr
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader
|
||||
from crewai_tools.rag.chunkers.base_chunker import BaseChunker
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
from crewai_tools.rag.embedding_service import EmbeddingService
|
||||
from crewai_tools.rag.misc import compute_sha256
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
from crewai_tools.tools.rag.rag_tool import Adapter
|
||||
@@ -18,31 +18,6 @@ from crewai_tools.tools.rag.rag_tool import Adapter
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
def __init__(self, model: str = "text-embedding-3-small", **kwargs):
|
||||
self.model = model
|
||||
self.kwargs = kwargs
|
||||
|
||||
def embed_text(self, text: str) -> list[float]:
|
||||
try:
|
||||
response = litellm.embedding(model=self.model, input=[text], **self.kwargs)
|
||||
return response.data[0]["embedding"]
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating embedding: {e}")
|
||||
raise
|
||||
|
||||
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
try:
|
||||
response = litellm.embedding(model=self.model, input=texts, **self.kwargs)
|
||||
return [data["embedding"] for data in response.data]
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating batch embeddings: {e}")
|
||||
raise
|
||||
|
||||
|
||||
class Document(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
content: str
|
||||
@@ -54,6 +29,7 @@ class Document(BaseModel):
|
||||
class RAG(Adapter):
|
||||
collection_name: str = "crewai_knowledge_base"
|
||||
persist_directory: str | None = None
|
||||
embedding_provider: str = "openai"
|
||||
embedding_model: str = "text-embedding-3-large"
|
||||
summarize: bool = False
|
||||
top_k: int = 5
|
||||
@@ -79,7 +55,9 @@ class RAG(Adapter):
|
||||
)
|
||||
|
||||
self._embedding_service = EmbeddingService(
|
||||
model=self.embedding_model, **self.embedding_config
|
||||
provider=self.embedding_provider,
|
||||
model=self.embedding_model,
|
||||
**self.embedding_config,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize ChromaDB: {e}")
|
||||
@@ -181,7 +159,7 @@ class RAG(Adapter):
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add documents to ChromaDB: {e}")
|
||||
|
||||
def query(self, question: str, where: dict[str, Any] | None = None) -> str:
|
||||
def query(self, question: str, where: dict[str, Any] | None = None) -> str: # type: ignore
|
||||
try:
|
||||
question_embedding = self._embedding_service.embed_text(question)
|
||||
|
||||
@@ -240,8 +218,9 @@ class RAG(Adapter):
|
||||
logger.error(f"Failed to get collection info: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
@staticmethod
|
||||
def _get_data_type(
|
||||
self, content: SourceContent, data_type: str | DataType | None = None
|
||||
content: SourceContent, data_type: str | DataType | None = None
|
||||
) -> DataType:
|
||||
try:
|
||||
if isinstance(data_type, str):
|
||||
|
||||
@@ -116,7 +116,7 @@ class DataTypes:
|
||||
if isinstance(content, str):
|
||||
try:
|
||||
url = urlparse(content)
|
||||
is_url = (url.scheme and url.netloc) or url.scheme == "file"
|
||||
is_url = bool(url.scheme and url.netloc) or url.scheme == "file"
|
||||
except Exception: # noqa: S110
|
||||
pass
|
||||
|
||||
|
||||
511
lib/crewai-tools/src/crewai_tools/rag/embedding_service.py
Normal file
511
lib/crewai-tools/src/crewai_tools/rag/embedding_service.py
Normal file
@@ -0,0 +1,511 @@
|
||||
"""
|
||||
Enhanced embedding service that leverages CrewAI's existing embedding providers.
|
||||
This replaces the litellm-based EmbeddingService with a more flexible architecture.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingConfig(BaseModel):
|
||||
"""Configuration for embedding providers."""
|
||||
|
||||
provider: str = Field(description="Embedding provider name")
|
||||
model: str = Field(description="Model name to use")
|
||||
api_key: str | None = Field(default=None, description="API key for the provider")
|
||||
timeout: float | None = Field(
|
||||
default=30.0, description="Request timeout in seconds"
|
||||
)
|
||||
max_retries: int = Field(default=3, description="Maximum number of retries")
|
||||
batch_size: int = Field(
|
||||
default=100, description="Batch size for processing multiple texts"
|
||||
)
|
||||
extra_config: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Additional provider-specific configuration"
|
||||
)
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
"""
|
||||
Enhanced embedding service that uses CrewAI's existing embedding providers.
|
||||
|
||||
Supports multiple providers:
|
||||
- openai: OpenAI embeddings (text-embedding-3-small, text-embedding-3-large, etc.)
|
||||
- voyageai: Voyage AI embeddings (voyage-2, voyage-large-2, etc.)
|
||||
- cohere: Cohere embeddings (embed-english-v3.0, embed-multilingual-v3.0, etc.)
|
||||
- google-generativeai: Google Gemini embeddings (models/embedding-001, etc.)
|
||||
- google-vertex: Google Vertex embeddings (models/embedding-001, etc.)
|
||||
- huggingface: Hugging Face embeddings (sentence-transformers/all-MiniLM-L6-v2, etc.)
|
||||
- jina: Jina embeddings (jina-embeddings-v2-base-en, etc.)
|
||||
- ollama: Ollama embeddings (nomic-embed-text, etc.)
|
||||
- openai: OpenAI embeddings (text-embedding-3-small, text-embedding-3-large, etc.)
|
||||
- roboflow: Roboflow embeddings (roboflow-embeddings-v2-base-en, etc.)
|
||||
- voyageai: Voyage AI embeddings (voyage-2, voyage-large-2, etc.)
|
||||
- watsonx: Watson X embeddings (ibm/slate-125m-english-rtrvr, etc.)
|
||||
- custom: Custom embeddings (embedding_callable, etc.)
|
||||
- sentence-transformer: Sentence Transformers embeddings (all-MiniLM-L6-v2, etc.)
|
||||
- text2vec: Text2Vec embeddings (text2vec-base-en, etc.)
|
||||
- openclip: OpenClip embeddings (openclip-large-v2, etc.)
|
||||
- instructor: Instructor embeddings (hkunlp/instructor-large, etc.)
|
||||
- onnx: ONNX embeddings (onnx-large-v2, etc.)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
provider: str = "openai",
|
||||
model: str = "text-embedding-3-small",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
Initialize the embedding service.
|
||||
|
||||
Args:
|
||||
provider: The embedding provider to use
|
||||
model: The model name
|
||||
api_key: API key (if not provided, will look for environment variables)
|
||||
**kwargs: Additional configuration options
|
||||
"""
|
||||
self.config = EmbeddingConfig(
|
||||
provider=provider,
|
||||
model=model,
|
||||
api_key=api_key or self._get_default_api_key(provider),
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self._embedding_function = None
|
||||
self._initialize_embedding_function()
|
||||
|
||||
@staticmethod
|
||||
def _get_default_api_key(provider: str) -> str | None:
|
||||
"""Get default API key from environment variables."""
|
||||
env_key_map = {
|
||||
"azure": "AZURE_OPENAI_API_KEY",
|
||||
"amazon-bedrock": "AWS_ACCESS_KEY_ID", # or AWS_PROFILE
|
||||
"cohere": "COHERE_API_KEY",
|
||||
"google-generativeai": "GOOGLE_API_KEY",
|
||||
"google-vertex": "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
"huggingface": "HUGGINGFACE_API_KEY",
|
||||
"jina": "JINA_API_KEY",
|
||||
"ollama": None, # Ollama typically runs locally without API key
|
||||
"openai": "OPENAI_API_KEY",
|
||||
"roboflow": "ROBOFLOW_API_KEY",
|
||||
"voyageai": "VOYAGE_API_KEY",
|
||||
"watsonx": "WATSONX_API_KEY",
|
||||
}
|
||||
|
||||
env_key = env_key_map.get(provider)
|
||||
if env_key:
|
||||
return os.getenv(env_key)
|
||||
return None
|
||||
|
||||
def _initialize_embedding_function(self):
|
||||
"""Initialize the embedding function using CrewAI's factory."""
|
||||
try:
|
||||
from crewai.rag.embeddings.factory import build_embedder
|
||||
|
||||
# Build the configuration for CrewAI's factory
|
||||
config = self._build_provider_config()
|
||||
|
||||
# Create the embedding function
|
||||
self._embedding_function = build_embedder(config)
|
||||
|
||||
logger.info(
|
||||
f"Initialized {self.config.provider} embedding service with model "
|
||||
f"{self.config.model}"
|
||||
)
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
f"CrewAI embedding providers not available. "
|
||||
f"Make sure crewai is installed: {e}"
|
||||
) from e
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize embedding function: {e}")
|
||||
raise RuntimeError(
|
||||
f"Failed to initialize {self.config.provider} embedding service: {e}"
|
||||
) from e
|
||||
|
||||
def _build_provider_config(self) -> dict[str, Any]:
|
||||
"""Build configuration dictionary for CrewAI's embedding factory."""
|
||||
base_config = {"provider": self.config.provider, "config": {}}
|
||||
|
||||
# Provider-specific configuration mapping
|
||||
if self.config.provider == "openai":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "azure":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "voyageai":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model": self.config.model,
|
||||
"max_retries": self.config.max_retries,
|
||||
"timeout": self.config.timeout,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "cohere":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider in ["google-generativeai", "google-vertex"]:
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "amazon-bedrock":
|
||||
base_config["config"] = {
|
||||
"aws_access_key_id": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "huggingface":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "jina":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "ollama":
|
||||
base_config["config"] = {
|
||||
"model": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "sentence-transformer":
|
||||
base_config["config"] = {
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "instructor":
|
||||
base_config["config"] = {
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "onnx":
|
||||
base_config["config"] = {
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "roboflow":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "openclip":
|
||||
base_config["config"] = {
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "text2vec":
|
||||
base_config["config"] = {
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "watsonx":
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model_name": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
elif self.config.provider == "custom":
|
||||
# Custom provider requires embedding_callable in extra_config
|
||||
base_config["config"] = {
|
||||
**self.config.extra_config,
|
||||
}
|
||||
else:
|
||||
# Generic configuration for any unlisted providers
|
||||
base_config["config"] = {
|
||||
"api_key": self.config.api_key,
|
||||
"model": self.config.model,
|
||||
**self.config.extra_config,
|
||||
}
|
||||
|
||||
return base_config
|
||||
|
||||
def embed_text(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding for a single text.
|
||||
|
||||
Args:
|
||||
text: Text to embed
|
||||
|
||||
Returns:
|
||||
List of floats representing the embedding
|
||||
|
||||
Raises:
|
||||
RuntimeError: If embedding generation fails
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
logger.warning("Empty text provided for embedding")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Use ChromaDB's embedding function interface
|
||||
embeddings = self._embedding_function([text]) # type: ignore
|
||||
return embeddings[0] if embeddings else []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating embedding for text: {e}")
|
||||
raise RuntimeError(f"Failed to generate embedding: {e}") from e
|
||||
|
||||
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of embedding vectors
|
||||
|
||||
Raises:
|
||||
RuntimeError: If embedding generation fails
|
||||
"""
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
# Filter out empty texts
|
||||
valid_texts = [text for text in texts if text and text.strip()]
|
||||
if not valid_texts:
|
||||
logger.warning("No valid texts provided for batch embedding")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Process in batches to avoid API limits
|
||||
all_embeddings = []
|
||||
|
||||
for i in range(0, len(valid_texts), self.config.batch_size):
|
||||
batch = valid_texts[i : i + self.config.batch_size]
|
||||
batch_embeddings = self._embedding_function(batch) # type: ignore
|
||||
all_embeddings.extend(batch_embeddings)
|
||||
|
||||
return all_embeddings
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating batch embeddings: {e}")
|
||||
raise RuntimeError(f"Failed to generate batch embeddings: {e}") from e
|
||||
|
||||
def get_embedding_dimension(self) -> int | None:
|
||||
"""
|
||||
Get the dimension of embeddings produced by this service.
|
||||
|
||||
Returns:
|
||||
Embedding dimension or None if unknown
|
||||
"""
|
||||
# Try to get dimension by generating a test embedding
|
||||
try:
|
||||
test_embedding = self.embed_text("test")
|
||||
return len(test_embedding) if test_embedding else None
|
||||
except Exception:
|
||||
logger.warning("Could not determine embedding dimension")
|
||||
return None
|
||||
|
||||
def validate_connection(self) -> bool:
|
||||
"""
|
||||
Validate that the embedding service is working correctly.
|
||||
|
||||
Returns:
|
||||
True if the service is working, False otherwise
|
||||
"""
|
||||
try:
|
||||
test_embedding = self.embed_text("test connection")
|
||||
return len(test_embedding) > 0
|
||||
except Exception as e:
|
||||
logger.error(f"Connection validation failed: {e}")
|
||||
return False
|
||||
|
||||
def get_service_info(self) -> dict[str, Any]:
|
||||
"""
|
||||
Get information about the current embedding service.
|
||||
|
||||
Returns:
|
||||
Dictionary with service information
|
||||
"""
|
||||
return {
|
||||
"provider": self.config.provider,
|
||||
"model": self.config.model,
|
||||
"embedding_dimension": self.get_embedding_dimension(),
|
||||
"batch_size": self.config.batch_size,
|
||||
"is_connected": self.validate_connection(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def list_supported_providers(cls) -> list[str]:
|
||||
"""
|
||||
List all supported embedding providers.
|
||||
|
||||
Returns:
|
||||
List of supported provider names
|
||||
"""
|
||||
return [
|
||||
"azure",
|
||||
"amazon-bedrock",
|
||||
"cohere",
|
||||
"custom",
|
||||
"google-generativeai",
|
||||
"google-vertex",
|
||||
"huggingface",
|
||||
"instructor",
|
||||
"jina",
|
||||
"ollama",
|
||||
"onnx",
|
||||
"openai",
|
||||
"openclip",
|
||||
"roboflow",
|
||||
"sentence-transformer",
|
||||
"text2vec",
|
||||
"voyageai",
|
||||
"watsonx",
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def create_openai_service(
|
||||
cls,
|
||||
model: str = "text-embedding-3-small",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an OpenAI embedding service."""
|
||||
return cls(provider="openai", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_voyage_service(
|
||||
cls, model: str = "voyage-2", api_key: str | None = None, **kwargs: Any
|
||||
) -> EmbeddingService:
|
||||
"""Create a Voyage AI embedding service."""
|
||||
return cls(provider="voyageai", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_cohere_service(
|
||||
cls,
|
||||
model: str = "embed-english-v3.0",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Cohere embedding service."""
|
||||
return cls(provider="cohere", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_gemini_service(
|
||||
cls,
|
||||
model: str = "models/embedding-001",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Google Gemini embedding service."""
|
||||
return cls(
|
||||
provider="google-generativeai", model=model, api_key=api_key, **kwargs
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def create_azure_service(
|
||||
cls,
|
||||
model: str = "text-embedding-ada-002",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an Azure OpenAI embedding service."""
|
||||
return cls(provider="azure", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_bedrock_service(
|
||||
cls,
|
||||
model: str = "amazon.titan-embed-text-v1",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an Amazon Bedrock embedding service."""
|
||||
return cls(provider="amazon-bedrock", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_huggingface_service(
|
||||
cls,
|
||||
model: str = "sentence-transformers/all-MiniLM-L6-v2",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Hugging Face embedding service."""
|
||||
return cls(provider="huggingface", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_sentence_transformer_service(
|
||||
cls,
|
||||
model: str = "all-MiniLM-L6-v2",
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Sentence Transformers embedding service (local)."""
|
||||
return cls(provider="sentence-transformer", model=model, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_ollama_service(
|
||||
cls,
|
||||
model: str = "nomic-embed-text",
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an Ollama embedding service (local)."""
|
||||
return cls(provider="ollama", model=model, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_jina_service(
|
||||
cls,
|
||||
model: str = "jina-embeddings-v2-base-en",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Jina AI embedding service."""
|
||||
return cls(provider="jina", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_instructor_service(
|
||||
cls,
|
||||
model: str = "hkunlp/instructor-large",
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create an Instructor embedding service."""
|
||||
return cls(provider="instructor", model=model, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_watsonx_service(
|
||||
cls,
|
||||
model: str = "ibm/slate-125m-english-rtrvr",
|
||||
api_key: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a Watson X embedding service."""
|
||||
return cls(provider="watsonx", model=model, api_key=api_key, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_custom_service(
|
||||
cls,
|
||||
embedding_callable: Any,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingService:
|
||||
"""Create a custom embedding service with your own embedding function."""
|
||||
return cls(
|
||||
provider="custom",
|
||||
model="custom",
|
||||
extra_config={"embedding_callable": embedding_callable},
|
||||
**kwargs,
|
||||
)
|
||||
@@ -2,41 +2,30 @@ import csv
|
||||
from io import StringIO
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.loaders.utils import load_from_url
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class CSVLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
source_ref = source_content.source_ref
|
||||
|
||||
content_str = source_content.source
|
||||
if source_content.is_url():
|
||||
content_str = self._load_from_url(content_str, kwargs)
|
||||
content_str = load_from_url(
|
||||
content_str,
|
||||
kwargs,
|
||||
accept_header="text/csv, application/csv, text/plain",
|
||||
loader_name="CSVLoader",
|
||||
)
|
||||
elif source_content.path_exists():
|
||||
content_str = self._load_from_file(content_str)
|
||||
|
||||
return self._parse_csv(content_str, source_ref)
|
||||
|
||||
def _load_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": "text/csv, application/csv, text/plain",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools CSVLoader)",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching CSV from URL {url}: {e!s}") from e
|
||||
|
||||
def _load_from_file(self, path: str) -> str:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
@staticmethod
|
||||
def _load_from_file(path: str) -> str:
|
||||
with open(path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
def _parse_csv(self, content: str, source_ref: str) -> LoaderResult:
|
||||
|
||||
@@ -6,11 +6,11 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class DirectoryLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and process all files from a directory recursively.
|
||||
|
||||
Args:
|
||||
source: Directory path or URL to a directory listing
|
||||
source_content: Directory path or URL to a directory listing
|
||||
**kwargs: Additional options:
|
||||
- recursive: bool (default True) - Whether to search recursively
|
||||
- include_extensions: list - Only include files with these extensions
|
||||
@@ -33,16 +33,16 @@ class DirectoryLoader(BaseLoader):
|
||||
return self._process_directory(source_ref, kwargs)
|
||||
|
||||
def _process_directory(self, dir_path: str, kwargs: dict) -> LoaderResult:
|
||||
recursive = kwargs.get("recursive", True)
|
||||
include_extensions = kwargs.get("include_extensions", None)
|
||||
exclude_extensions = kwargs.get("exclude_extensions", None)
|
||||
max_files = kwargs.get("max_files", None)
|
||||
recursive: bool = kwargs.get("recursive", True)
|
||||
include_extensions: list[str] | None = kwargs.get("include_extensions", None)
|
||||
exclude_extensions: list[str] | None = kwargs.get("exclude_extensions", None)
|
||||
max_files: int | None = kwargs.get("max_files", None)
|
||||
|
||||
files = self._find_files(
|
||||
dir_path, recursive, include_extensions, exclude_extensions
|
||||
)
|
||||
|
||||
if max_files and len(files) > max_files:
|
||||
if max_files is not None and len(files) > max_files:
|
||||
files = files[:max_files]
|
||||
|
||||
all_contents = []
|
||||
@@ -115,8 +115,8 @@ class DirectoryLoader(BaseLoader):
|
||||
|
||||
return sorted(files)
|
||||
|
||||
@staticmethod
|
||||
def _should_include_file(
|
||||
self,
|
||||
filename: str,
|
||||
include_ext: list[str] | None = None,
|
||||
exclude_ext: list[str] | None = None,
|
||||
@@ -141,7 +141,8 @@ class DirectoryLoader(BaseLoader):
|
||||
|
||||
return True
|
||||
|
||||
def _process_single_file(self, file_path: str) -> LoaderResult:
|
||||
@staticmethod
|
||||
def _process_single_file(file_path: str) -> LoaderResult:
|
||||
from crewai_tools.rag.data_types import DataTypes
|
||||
|
||||
data_type = DataTypes.from_content(Path(file_path))
|
||||
|
||||
@@ -12,7 +12,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class DocsSiteLoader(BaseLoader):
|
||||
"""Loader for documentation websites."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load content from a documentation site.
|
||||
|
||||
Args:
|
||||
@@ -40,7 +40,6 @@ class DocsSiteLoader(BaseLoader):
|
||||
title = soup.find("title")
|
||||
title_text = title.get_text(strip=True) if title else "Documentation"
|
||||
|
||||
main_content = None
|
||||
for selector in [
|
||||
"main",
|
||||
"article",
|
||||
@@ -82,8 +81,10 @@ class DocsSiteLoader(BaseLoader):
|
||||
if nav:
|
||||
links = nav.find_all("a", href=True)
|
||||
for link in links[:20]:
|
||||
href = link["href"]
|
||||
if not href.startswith(("http://", "https://", "mailto:", "#")):
|
||||
href = link.get("href", "")
|
||||
if isinstance(href, str) and not href.startswith(
|
||||
("http://", "https://", "mailto:", "#")
|
||||
):
|
||||
full_url = urljoin(docs_url, href)
|
||||
nav_links.append(f"- {link.get_text(strip=True)}: {full_url}")
|
||||
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class DOCXLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
try:
|
||||
from docx import Document as DocxDocument
|
||||
except ImportError as e:
|
||||
@@ -29,9 +32,8 @@ class DOCXLoader(BaseLoader):
|
||||
f"Source must be a valid file path or URL, got: {source_content.source}"
|
||||
)
|
||||
|
||||
def _download_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
@staticmethod
|
||||
def _download_from_url(url: str, kwargs: dict) -> str:
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
@@ -49,13 +51,13 @@ class DOCXLoader(BaseLoader):
|
||||
temp_file.write(response.content)
|
||||
return temp_file.name
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching DOCX from URL {url}: {e!s}") from e
|
||||
raise ValueError(f"Error fetching content from URL {url}: {e!s}") from e
|
||||
|
||||
def _load_from_file(
|
||||
self,
|
||||
file_path: str,
|
||||
source_ref: str,
|
||||
DocxDocument, # noqa: N803
|
||||
DocxDocument: Any, # noqa: N803
|
||||
) -> LoaderResult:
|
||||
try:
|
||||
doc = DocxDocument(file_path)
|
||||
|
||||
@@ -9,7 +9,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class GithubLoader(BaseLoader):
|
||||
"""Loader for GitHub repository content."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load content from a GitHub repository.
|
||||
|
||||
Args:
|
||||
@@ -54,9 +54,7 @@ class GithubLoader(BaseLoader):
|
||||
try:
|
||||
readme = repo.get_readme()
|
||||
all_content.append("README:")
|
||||
all_content.append(
|
||||
readme.decoded_content.decode("utf-8", errors="ignore")
|
||||
)
|
||||
all_content.append(readme.decoded_content.decode(errors="ignore"))
|
||||
all_content.append("")
|
||||
except GithubException:
|
||||
pass
|
||||
|
||||
@@ -1,52 +1,30 @@
|
||||
import json
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.loaders.utils import load_from_url
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class JSONLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
source_ref = source_content.source_ref
|
||||
content = source_content.source
|
||||
|
||||
if source_content.is_url():
|
||||
content = self._load_from_url(source_ref, kwargs)
|
||||
content = load_from_url(
|
||||
source_ref,
|
||||
kwargs,
|
||||
accept_header="application/json",
|
||||
loader_name="JSONLoader",
|
||||
)
|
||||
elif source_content.path_exists():
|
||||
content = self._load_from_file(source_ref)
|
||||
|
||||
return self._parse_json(content, source_ref)
|
||||
|
||||
def _load_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools JSONLoader)",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return (
|
||||
response.text
|
||||
if not self._is_json_response(response)
|
||||
else json.dumps(response.json(), indent=2)
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching JSON from URL {url}: {e!s}") from e
|
||||
|
||||
def _is_json_response(self, response) -> bool:
|
||||
try:
|
||||
response.json()
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def _load_from_file(self, path: str) -> str:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
@staticmethod
|
||||
def _load_from_file(path: str) -> str:
|
||||
with open(path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
def _parse_json(self, content: str, source_ref: str) -> LoaderResult:
|
||||
|
||||
@@ -1,61 +1,55 @@
|
||||
import re
|
||||
from typing import Final
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.loaders.utils import load_from_url
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
_IMPORT_PATTERN: Final[re.Pattern[str]] = re.compile(r"^import\s+.*?\n", re.MULTILINE)
|
||||
_EXPORT_PATTERN: Final[re.Pattern[str]] = re.compile(
|
||||
r"^export\s+.*?(?:\n|$)", re.MULTILINE
|
||||
)
|
||||
_JSX_TAG_PATTERN: Final[re.Pattern[str]] = re.compile(r"<[^>]+>")
|
||||
_EXTRA_NEWLINES_PATTERN: Final[re.Pattern[str]] = re.compile(r"\n\s*\n\s*\n")
|
||||
|
||||
|
||||
class MDXLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
source_ref = source_content.source_ref
|
||||
content = source_content.source
|
||||
|
||||
if source_content.is_url():
|
||||
content = self._load_from_url(source_ref, kwargs)
|
||||
content = load_from_url(
|
||||
source_ref,
|
||||
kwargs,
|
||||
accept_header="text/markdown, text/x-markdown, text/plain",
|
||||
loader_name="MDXLoader",
|
||||
)
|
||||
elif source_content.path_exists():
|
||||
content = self._load_from_file(source_ref)
|
||||
|
||||
return self._parse_mdx(content, source_ref)
|
||||
|
||||
def _load_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": "text/markdown, text/x-markdown, text/plain",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools MDXLoader)",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching MDX from URL {url}: {e!s}") from e
|
||||
|
||||
def _load_from_file(self, path: str) -> str:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
@staticmethod
|
||||
def _load_from_file(path: str) -> str:
|
||||
with open(path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
def _parse_mdx(self, content: str, source_ref: str) -> LoaderResult:
|
||||
cleaned_content = content
|
||||
|
||||
# Remove import statements
|
||||
cleaned_content = re.sub(
|
||||
r"^import\s+.*?\n", "", cleaned_content, flags=re.MULTILINE
|
||||
)
|
||||
cleaned_content = _IMPORT_PATTERN.sub("", cleaned_content)
|
||||
|
||||
# Remove export statements
|
||||
cleaned_content = re.sub(
|
||||
r"^export\s+.*?(?:\n|$)", "", cleaned_content, flags=re.MULTILINE
|
||||
)
|
||||
cleaned_content = _EXPORT_PATTERN.sub("", cleaned_content)
|
||||
|
||||
# Remove JSX tags (simple approach)
|
||||
cleaned_content = re.sub(r"<[^>]+>", "", cleaned_content)
|
||||
cleaned_content = _JSX_TAG_PATTERN.sub("", cleaned_content)
|
||||
|
||||
# Clean up extra whitespace
|
||||
cleaned_content = re.sub(r"\n\s*\n\s*\n", "\n\n", cleaned_content)
|
||||
cleaned_content = _EXTRA_NEWLINES_PATTERN.sub("\n\n", cleaned_content)
|
||||
cleaned_content = cleaned_content.strip()
|
||||
|
||||
metadata = {"format": "mdx"}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
"""MySQL database loader."""
|
||||
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pymysql
|
||||
from pymysql import Error, connect
|
||||
from pymysql.cursors import DictCursor
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
@@ -11,7 +13,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class MySQLLoader(BaseLoader):
|
||||
"""Loader for MySQL database content."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs: Any) -> LoaderResult: # type: ignore[override]
|
||||
"""Load content from a MySQL database table.
|
||||
|
||||
Args:
|
||||
@@ -40,14 +42,14 @@ class MySQLLoader(BaseLoader):
|
||||
"password": parsed.password,
|
||||
"database": parsed.path.lstrip("/") if parsed.path else None,
|
||||
"charset": "utf8mb4",
|
||||
"cursorclass": pymysql.cursors.DictCursor,
|
||||
"cursorclass": DictCursor,
|
||||
}
|
||||
|
||||
if not connection_params["database"]:
|
||||
raise ValueError("Database name is required in the URI")
|
||||
|
||||
try:
|
||||
connection = pymysql.connect(**connection_params)
|
||||
connection = connect(**connection_params)
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(query)
|
||||
@@ -94,7 +96,7 @@ class MySQLLoader(BaseLoader):
|
||||
)
|
||||
finally:
|
||||
connection.close()
|
||||
except pymysql.Error as e:
|
||||
except Error as e:
|
||||
raise ValueError(f"MySQL database error: {e}") from e
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to load data from MySQL: {e}") from e
|
||||
|
||||
@@ -11,7 +11,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class PDFLoader(BaseLoader):
|
||||
"""Loader for PDF files."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and extract text from a PDF file.
|
||||
|
||||
Args:
|
||||
@@ -28,7 +28,7 @@ class PDFLoader(BaseLoader):
|
||||
import pypdf
|
||||
except ImportError:
|
||||
try:
|
||||
import PyPDF2 as pypdf # noqa: N813
|
||||
import PyPDF2 as pypdf # type: ignore[import-not-found,no-redef] # noqa: N813
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"PDF support requires pypdf or PyPDF2. Install with: uv add pypdf"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import psycopg2
|
||||
from psycopg2 import Error, connect
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
@@ -12,7 +12,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class PostgresLoader(BaseLoader):
|
||||
"""Loader for PostgreSQL database content."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load content from a PostgreSQL database table.
|
||||
|
||||
Args:
|
||||
@@ -47,7 +47,7 @@ class PostgresLoader(BaseLoader):
|
||||
raise ValueError("Database name is required in the URI")
|
||||
|
||||
try:
|
||||
connection = psycopg2.connect(**connection_params)
|
||||
connection = connect(**connection_params)
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(query)
|
||||
@@ -94,7 +94,7 @@ class PostgresLoader(BaseLoader):
|
||||
)
|
||||
finally:
|
||||
connection.close()
|
||||
except psycopg2.Error as e:
|
||||
except Error as e:
|
||||
raise ValueError(f"PostgreSQL database error: {e}") from e
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to load data from PostgreSQL: {e}") from e
|
||||
|
||||
@@ -3,14 +3,14 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class TextFileLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
source_ref = source_content.source_ref
|
||||
if not source_content.path_exists():
|
||||
raise FileNotFoundError(
|
||||
f"The following file does not exist: {source_content.source}"
|
||||
)
|
||||
|
||||
with open(source_content.source, "r", encoding="utf-8") as file:
|
||||
with open(source_content.source, encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
|
||||
return LoaderResult(
|
||||
@@ -21,7 +21,7 @@ class TextFileLoader(BaseLoader):
|
||||
|
||||
|
||||
class TextLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
return LoaderResult(
|
||||
content=source_content.source,
|
||||
source=source_content.source_ref,
|
||||
|
||||
36
lib/crewai-tools/src/crewai_tools/rag/loaders/utils.py
Normal file
36
lib/crewai-tools/src/crewai_tools/rag/loaders/utils.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""Utility functions for RAG loaders."""
|
||||
|
||||
|
||||
def load_from_url(
|
||||
url: str, kwargs: dict, accept_header: str = "*/*", loader_name: str = "Loader"
|
||||
) -> str:
|
||||
"""Load content from a URL.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch content from
|
||||
kwargs: Additional keyword arguments (can include 'headers' override)
|
||||
accept_header: The Accept header value for the request
|
||||
loader_name: The name of the loader for the User-Agent header
|
||||
|
||||
Returns:
|
||||
The text content from the URL
|
||||
|
||||
Raises:
|
||||
ValueError: If there's an error fetching the URL
|
||||
"""
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": accept_header,
|
||||
"User-Agent": f"Mozilla/5.0 (compatible; crewai-tools {loader_name})",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching content from URL {url}: {e!s}") from e
|
||||
@@ -1,4 +1,5 @@
|
||||
import re
|
||||
from typing import Final
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
@@ -7,8 +8,12 @@ from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
_SPACES_PATTERN: Final[re.Pattern[str]] = re.compile(r"[ \t]+")
|
||||
_NEWLINE_PATTERN: Final[re.Pattern[str]] = re.compile(r"\s+\n\s+")
|
||||
|
||||
|
||||
class WebPageLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
url = source_content.source
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
@@ -29,8 +34,8 @@ class WebPageLoader(BaseLoader):
|
||||
script.decompose()
|
||||
|
||||
text = soup.get_text(" ")
|
||||
text = re.sub("[ \t]+", " ", text)
|
||||
text = re.sub("\\s+\n\\s+", "\n", text)
|
||||
text = _SPACES_PATTERN.sub(" ", text)
|
||||
text = _NEWLINE_PATTERN.sub("\n", text)
|
||||
text = text.strip()
|
||||
|
||||
title = (
|
||||
|
||||
@@ -1,49 +1,48 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Any
|
||||
from xml.etree.ElementTree import ParseError, fromstring, parse
|
||||
|
||||
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
|
||||
from crewai_tools.rag.loaders.utils import load_from_url
|
||||
from crewai_tools.rag.source_content import SourceContent
|
||||
|
||||
|
||||
class XMLLoader(BaseLoader):
|
||||
def load(self, source_content: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source_content: SourceContent, **kwargs: Any) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and parse XML content from various sources.
|
||||
|
||||
Args:
|
||||
source_content: SourceContent: The source content to load.
|
||||
**kwargs: Additional keyword arguments for loading from URL.
|
||||
|
||||
Returns:
|
||||
LoaderResult: The result of loading and parsing the XML content.
|
||||
"""
|
||||
source_ref = source_content.source_ref
|
||||
content = source_content.source
|
||||
|
||||
if source_content.is_url():
|
||||
content = self._load_from_url(source_ref, kwargs)
|
||||
content = load_from_url(
|
||||
source_ref,
|
||||
kwargs,
|
||||
accept_header="application/xml, text/xml, text/plain",
|
||||
loader_name="XMLLoader",
|
||||
)
|
||||
elif source_content.path_exists():
|
||||
content = self._load_from_file(source_ref)
|
||||
|
||||
return self._parse_xml(content, source_ref)
|
||||
|
||||
def _load_from_url(self, url: str, kwargs: dict) -> str:
|
||||
import requests
|
||||
|
||||
headers = kwargs.get(
|
||||
"headers",
|
||||
{
|
||||
"Accept": "application/xml, text/xml, text/plain",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; crewai-tools XMLLoader)",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error fetching XML from URL {url}: {e!s}") from e
|
||||
|
||||
def _load_from_file(self, path: str) -> str:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
@staticmethod
|
||||
def _load_from_file(path: str) -> str:
|
||||
with open(path, encoding="utf-8") as file:
|
||||
return file.read()
|
||||
|
||||
def _parse_xml(self, content: str, source_ref: str) -> LoaderResult:
|
||||
try:
|
||||
if content.strip().startswith("<"):
|
||||
root = ET.fromstring(content) # noqa: S314
|
||||
root = fromstring(content) # noqa: S314
|
||||
else:
|
||||
root = ET.parse(source_ref).getroot() # noqa: S314
|
||||
root = parse(source_ref).getroot() # noqa: S314
|
||||
|
||||
text_parts = []
|
||||
for text_content in root.itertext():
|
||||
@@ -52,7 +51,7 @@ class XMLLoader(BaseLoader):
|
||||
|
||||
text = "\n".join(text_parts)
|
||||
metadata = {"format": "xml", "root_tag": root.tag}
|
||||
except ET.ParseError as e:
|
||||
except ParseError as e:
|
||||
text = content
|
||||
metadata = {"format": "xml", "parse_error": str(e)}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class YoutubeChannelLoader(BaseLoader):
|
||||
"""Loader for YouTube channels."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and extract content from a YouTube channel.
|
||||
|
||||
Args:
|
||||
@@ -24,7 +24,7 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
ValueError: If the URL is not a valid YouTube channel URL
|
||||
"""
|
||||
try:
|
||||
from pytube import Channel
|
||||
from pytube import Channel # type: ignore[import-untyped]
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"YouTube channel support requires pytube. Install with: uv add pytube"
|
||||
@@ -89,7 +89,6 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
try:
|
||||
api = YouTubeTranscriptApi()
|
||||
transcript_list = api.list(video_id)
|
||||
transcript = None
|
||||
|
||||
try:
|
||||
transcript = transcript_list.find_transcript(["en"])
|
||||
@@ -101,7 +100,7 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
transcript = next(iter(transcript_list), None)
|
||||
transcript = next(iter(transcript_list))
|
||||
|
||||
if transcript:
|
||||
transcript_data = transcript.fetch()
|
||||
@@ -148,7 +147,8 @@ class YoutubeChannelLoader(BaseLoader):
|
||||
doc_id=self.generate_doc_id(source_ref=channel_url, content=content),
|
||||
)
|
||||
|
||||
def _extract_video_id(self, url: str) -> str | None:
|
||||
@staticmethod
|
||||
def _extract_video_id(url: str) -> str | None:
|
||||
"""Extract video ID from YouTube URL."""
|
||||
patterns = [
|
||||
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([^&\n?#]+)",
|
||||
|
||||
@@ -11,7 +11,7 @@ from crewai_tools.rag.source_content import SourceContent
|
||||
class YoutubeVideoLoader(BaseLoader):
|
||||
"""Loader for YouTube videos."""
|
||||
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult:
|
||||
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
|
||||
"""Load and extract transcript from a YouTube video.
|
||||
|
||||
Args:
|
||||
@@ -48,7 +48,6 @@ class YoutubeVideoLoader(BaseLoader):
|
||||
api = YouTubeTranscriptApi()
|
||||
transcript_list = api.list(video_id)
|
||||
|
||||
transcript = None
|
||||
try:
|
||||
transcript = transcript_list.find_transcript(["en"])
|
||||
except Exception:
|
||||
@@ -72,7 +71,7 @@ class YoutubeVideoLoader(BaseLoader):
|
||||
content = " ".join(text_content)
|
||||
|
||||
try:
|
||||
from pytube import YouTube
|
||||
from pytube import YouTube # type: ignore[import-untyped]
|
||||
|
||||
yt = YouTube(video_url)
|
||||
metadata["title"] = yt.title
|
||||
@@ -103,7 +102,8 @@ class YoutubeVideoLoader(BaseLoader):
|
||||
doc_id=self.generate_doc_id(source_ref=video_url, content=content),
|
||||
)
|
||||
|
||||
def _extract_video_id(self, url: str) -> str | None:
|
||||
@staticmethod
|
||||
def _extract_video_id(url: str) -> str | None:
|
||||
"""Extract video ID from various YouTube URL formats."""
|
||||
patterns = [
|
||||
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([^&\n?#]+)",
|
||||
|
||||
@@ -3,7 +3,15 @@ from typing import Any
|
||||
|
||||
|
||||
def compute_sha256(content: str) -> str:
|
||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
"""Compute the SHA-256 hash of the given content.
|
||||
|
||||
Args:
|
||||
content: The content to hash.
|
||||
|
||||
Returns:
|
||||
The SHA-256 hash of the content as a hexadecimal string.
|
||||
"""
|
||||
return hashlib.sha256(content.encode()).hexdigest()
|
||||
|
||||
|
||||
def sanitize_metadata_for_chromadb(metadata: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import cached_property
|
||||
import os
|
||||
from pathlib import Path
|
||||
@@ -28,7 +30,7 @@ class SourceContent:
|
||||
return os.path.exists(self.source)
|
||||
|
||||
@cached_property
|
||||
def data_type(self) -> "DataType":
|
||||
def data_type(self) -> DataType:
|
||||
from crewai_tools.rag.data_types import DataTypes
|
||||
|
||||
return DataTypes.from_content(self.source)
|
||||
|
||||
@@ -32,9 +32,6 @@ from crewai_tools.tools.contextualai_rerank_tool.contextual_rerank_tool import (
|
||||
from crewai_tools.tools.couchbase_tool.couchbase_tool import (
|
||||
CouchbaseFTSVectorSearchTool,
|
||||
)
|
||||
from crewai_tools.tools.crewai_enterprise_tools.crewai_enterprise_tools import (
|
||||
CrewaiEnterpriseTools,
|
||||
)
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_tools import (
|
||||
CrewaiPlatformTools,
|
||||
)
|
||||
@@ -199,7 +196,6 @@ __all__ = [
|
||||
"ContextualAIQueryTool",
|
||||
"ContextualAIRerankTool",
|
||||
"CouchbaseFTSVectorSearchTool",
|
||||
"CrewaiEnterpriseTools",
|
||||
"CrewaiPlatformTools",
|
||||
"DOCXSearchTool",
|
||||
"DallETool",
|
||||
|
||||
@@ -4,6 +4,7 @@ from typing import Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from openai import OpenAI
|
||||
from openai.types.chat import ChatCompletion
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@@ -30,7 +31,7 @@ class AIMindTool(BaseTool):
|
||||
)
|
||||
args_schema: type[BaseModel] = AIMindToolInputSchema
|
||||
api_key: str | None = None
|
||||
datasources: list[dict[str, Any]] | None = None
|
||||
datasources: list[dict[str, Any]] = Field(default_factory=list)
|
||||
mind_name: str | None = None
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["minds-sdk"])
|
||||
env_vars: list[EnvVar] = Field(
|
||||
@@ -87,10 +88,15 @@ class AIMindTool(BaseTool):
|
||||
base_url=AIMindToolConstants.MINDS_API_BASE_URL, api_key=self.api_key
|
||||
)
|
||||
|
||||
if self.mind_name is None:
|
||||
raise ValueError("Mind name is not set.")
|
||||
|
||||
completion = openai_client.chat.completions.create(
|
||||
model=self.mind_name,
|
||||
messages=[{"role": "user", "content": query}],
|
||||
stream=False,
|
||||
)
|
||||
if not isinstance(completion, ChatCompletion):
|
||||
raise ValueError("Invalid response from AI-Mind")
|
||||
|
||||
return completion.choices[0].message.content
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
@@ -49,7 +51,7 @@ class ApifyActorsTool(BaseTool):
|
||||
print(f"URL: {result['metadata']['url']}")
|
||||
print(f"Content: {result.get('markdown', 'N/A')[:100]}...")
|
||||
"""
|
||||
actor_tool: "_ApifyActorsTool" = Field(description="Apify Actor Tool")
|
||||
actor_tool: _ApifyActorsTool = Field(description="Apify Actor Tool")
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["langchain-apify"])
|
||||
|
||||
def __init__(self, actor_name: str, *args: Any, **kwargs: Any) -> None:
|
||||
|
||||
@@ -36,14 +36,9 @@ class ArxivPaperTool(BaseTool):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["pydantic"])
|
||||
env_vars: list[EnvVar] = Field(default_factory=list)
|
||||
|
||||
def __init__(
|
||||
self, download_pdfs=False, save_dir="./arxiv_pdfs", use_title_as_filename=False
|
||||
):
|
||||
super().__init__()
|
||||
self.download_pdfs = download_pdfs
|
||||
self.save_dir = save_dir
|
||||
self.use_title_as_filename = use_title_as_filename
|
||||
download_pdfs: bool = False
|
||||
save_dir: str = "./arxiv_pdfs"
|
||||
use_title_as_filename: bool = False
|
||||
|
||||
def _run(self, search_query: str, max_results: int = 5) -> str:
|
||||
try:
|
||||
@@ -70,7 +65,7 @@ class ArxivPaperTool(BaseTool):
|
||||
filename = f"{filename_base[:500]}.pdf"
|
||||
save_path = Path(save_dir) / filename
|
||||
|
||||
self.download_pdf(paper["pdf_url"], save_path)
|
||||
self.download_pdf(paper["pdf_url"], save_path) # type: ignore[arg-type]
|
||||
time.sleep(self.SLEEP_DURATION)
|
||||
|
||||
results = [self._format_paper_result(p) for p in papers]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import datetime
|
||||
from datetime import datetime
|
||||
import os
|
||||
import time
|
||||
from typing import Any, ClassVar
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from .brightdata_dataset import BrightDataDatasetTool
|
||||
from .brightdata_serp import BrightDataSearchTool
|
||||
from .brightdata_unlocker import BrightDataWebUnlockerTool
|
||||
from crewai_tools.tools.brightdata_tool.brightdata_dataset import BrightDataDatasetTool
|
||||
from crewai_tools.tools.brightdata_tool.brightdata_serp import BrightDataSearchTool
|
||||
from crewai_tools.tools.brightdata_tool.brightdata_unlocker import (
|
||||
BrightDataWebUnlockerTool,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["BrightDataDatasetTool", "BrightDataSearchTool", "BrightDataWebUnlockerTool"]
|
||||
|
||||
@@ -72,6 +72,6 @@ class BrowserbaseLoadTool(BaseTool):
|
||||
self.proxy = proxy
|
||||
|
||||
def _run(self, url: str):
|
||||
return self.browserbase.load_url(
|
||||
return self.browserbase.load_url( # type: ignore[union-attr]
|
||||
url, self.text_content, self.session_id, self.proxy
|
||||
)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedCodeDocsSearchToolSchema(BaseModel):
|
||||
@@ -38,7 +37,7 @@ class CodeDocsSearchTool(RagTool):
|
||||
def add(self, docs_url: str) -> None:
|
||||
super().add(docs_url, data_type=DataType.DOCS_SITE)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
docs_url: str | None = None,
|
||||
|
||||
@@ -7,18 +7,30 @@ potentially unsafe operations and importing restricted modules.
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import subprocess
|
||||
from types import ModuleType
|
||||
from typing import Any, ClassVar
|
||||
from typing import Any, ClassVar, TypedDict
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from docker import DockerClient, from_env as docker_from_env
|
||||
from docker.errors import ImageNotFound, NotFound
|
||||
from docker.models.containers import Container
|
||||
from docker import ( # type: ignore[import-untyped]
|
||||
DockerClient,
|
||||
from_env as docker_from_env,
|
||||
)
|
||||
from docker.errors import ImageNotFound, NotFound # type: ignore[import-untyped]
|
||||
from docker.models.containers import Container # type: ignore[import-untyped]
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Unpack
|
||||
|
||||
from crewai_tools.printer import Printer
|
||||
|
||||
|
||||
class RunKwargs(TypedDict, total=False):
|
||||
"""Keyword arguments for the _run method."""
|
||||
|
||||
code: str
|
||||
libraries_used: list[str]
|
||||
|
||||
|
||||
class CodeInterpreterSchema(BaseModel):
|
||||
"""Schema for defining inputs to the CodeInterpreterTool.
|
||||
|
||||
@@ -115,14 +127,14 @@ class SandboxPython:
|
||||
return safe_builtins
|
||||
|
||||
@staticmethod
|
||||
def exec(code: str, locals: dict[str, Any]) -> None:
|
||||
def exec(code: str, locals_: dict[str, Any]) -> None:
|
||||
"""Executes Python code in a restricted environment.
|
||||
|
||||
Args:
|
||||
code: The Python code to execute as a string.
|
||||
locals: A dictionary that will be used for local variable storage.
|
||||
locals_: A dictionary that will be used for local variable storage.
|
||||
"""
|
||||
exec(code, {"__builtins__": SandboxPython.safe_builtins()}, locals) # noqa: S102
|
||||
exec(code, {"__builtins__": SandboxPython.safe_builtins()}, locals_) # noqa: S102
|
||||
|
||||
|
||||
class CodeInterpreterTool(BaseTool):
|
||||
@@ -148,8 +160,13 @@ class CodeInterpreterTool(BaseTool):
|
||||
|
||||
Returns:
|
||||
The directory path where the package is installed.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the package cannot be found.
|
||||
"""
|
||||
spec = importlib.util.find_spec("crewai_tools")
|
||||
if spec is None or spec.origin is None:
|
||||
raise RuntimeError("Cannot find crewai_tools package installation path")
|
||||
return os.path.dirname(spec.origin)
|
||||
|
||||
def _verify_docker_image(self) -> None:
|
||||
@@ -189,7 +206,7 @@ class CodeInterpreterTool(BaseTool):
|
||||
rm=True,
|
||||
)
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
def _run(self, **kwargs: Unpack[RunKwargs]) -> str:
|
||||
"""Runs the code interpreter tool with the provided arguments.
|
||||
|
||||
Args:
|
||||
@@ -198,14 +215,18 @@ class CodeInterpreterTool(BaseTool):
|
||||
Returns:
|
||||
The output of the executed code as a string.
|
||||
"""
|
||||
code = kwargs.get("code", self.code)
|
||||
libraries_used = kwargs.get("libraries_used", [])
|
||||
code: str | None = kwargs.get("code", self.code)
|
||||
libraries_used: list[str] = kwargs.get("libraries_used", [])
|
||||
|
||||
if not code:
|
||||
return "No code provided to execute."
|
||||
|
||||
if self.unsafe_mode:
|
||||
return self.run_code_unsafe(code, libraries_used)
|
||||
return self.run_code_safety(code, libraries_used)
|
||||
|
||||
def _install_libraries(self, container: Container, libraries: list[str]) -> None:
|
||||
@staticmethod
|
||||
def _install_libraries(container: Container, libraries: list[str]) -> None:
|
||||
"""Installs required Python libraries in the Docker container.
|
||||
|
||||
Args:
|
||||
@@ -245,7 +266,8 @@ class CodeInterpreterTool(BaseTool):
|
||||
volumes={current_path: {"bind": "/workspace", "mode": "rw"}}, # type: ignore
|
||||
)
|
||||
|
||||
def _check_docker_available(self) -> bool:
|
||||
@staticmethod
|
||||
def _check_docker_available() -> bool:
|
||||
"""Checks if Docker is available and running on the system.
|
||||
|
||||
Attempts to run the 'docker info' command to verify Docker availability.
|
||||
@@ -254,7 +276,6 @@ class CodeInterpreterTool(BaseTool):
|
||||
Returns:
|
||||
True if Docker is available and running, False otherwise.
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
@@ -319,7 +340,8 @@ class CodeInterpreterTool(BaseTool):
|
||||
return f"Something went wrong while running the code: \n{exec_result.output.decode('utf-8')}"
|
||||
return exec_result.output.decode("utf-8")
|
||||
|
||||
def run_code_in_restricted_sandbox(self, code: str) -> str:
|
||||
@staticmethod
|
||||
def run_code_in_restricted_sandbox(code: str) -> str:
|
||||
"""Runs Python code in a restricted sandbox environment.
|
||||
|
||||
Executes the code with restricted access to potentially dangerous modules and
|
||||
@@ -333,14 +355,15 @@ class CodeInterpreterTool(BaseTool):
|
||||
or an error message if execution failed.
|
||||
"""
|
||||
Printer.print("Running code in restricted sandbox", color="yellow")
|
||||
exec_locals = {}
|
||||
exec_locals: dict[str, Any] = {}
|
||||
try:
|
||||
SandboxPython.exec(code=code, locals=exec_locals)
|
||||
SandboxPython.exec(code=code, locals_=exec_locals)
|
||||
return exec_locals.get("result", "No result variable found.")
|
||||
except Exception as e:
|
||||
return f"An error occurred: {e!s}"
|
||||
|
||||
def run_code_unsafe(self, code: str, libraries_used: list[str]) -> str:
|
||||
@staticmethod
|
||||
def run_code_unsafe(code: str, libraries_used: list[str]) -> str:
|
||||
"""Runs code directly on the host machine without any safety restrictions.
|
||||
|
||||
WARNING: This mode is unsafe and should only be used in trusted environments
|
||||
@@ -361,7 +384,7 @@ class CodeInterpreterTool(BaseTool):
|
||||
|
||||
# Execute the code
|
||||
try:
|
||||
exec_locals = {}
|
||||
exec_locals: dict[str, Any] = {}
|
||||
exec(code, {}, exec_locals) # noqa: S102
|
||||
return exec_locals.get("result", "No result variable found.")
|
||||
except Exception as e:
|
||||
|
||||
@@ -124,5 +124,5 @@ class ComposioTool(BaseTool):
|
||||
|
||||
return [
|
||||
cls.from_action(action=action, **kwargs)
|
||||
for action in toolset.find_actions_by_tags(*apps, tags=tags)
|
||||
for action in toolset.find_actions_by_tags(*apps, tags=tags) # type: ignore[arg-type]
|
||||
]
|
||||
|
||||
@@ -82,7 +82,7 @@ class ContextualAIQueryTool(BaseTool):
|
||||
if loop and loop.is_running():
|
||||
# Already inside an event loop
|
||||
try:
|
||||
import nest_asyncio
|
||||
import nest_asyncio # type: ignore[import-untyped]
|
||||
|
||||
nest_asyncio.apply(loop)
|
||||
loop.run_until_complete(
|
||||
|
||||
@@ -4,10 +4,13 @@ from typing import Any
|
||||
|
||||
|
||||
try:
|
||||
from couchbase.cluster import Cluster
|
||||
from couchbase.options import SearchOptions
|
||||
import couchbase.search as search
|
||||
from couchbase.vector_search import VectorQuery, VectorSearch
|
||||
from couchbase.cluster import Cluster # type: ignore[import-untyped]
|
||||
from couchbase.options import SearchOptions # type: ignore[import-untyped]
|
||||
import couchbase.search as search # type: ignore[import-untyped]
|
||||
from couchbase.vector_search import ( # type: ignore[import-untyped]
|
||||
VectorQuery,
|
||||
VectorSearch,
|
||||
)
|
||||
|
||||
COUCHBASE_AVAILABLE = True
|
||||
except ImportError:
|
||||
@@ -38,24 +41,31 @@ class CouchbaseFTSVectorSearchTool(BaseTool):
|
||||
name: str = "CouchbaseFTSVectorSearchTool"
|
||||
description: str = "A tool to search the Couchbase database for relevant information on internal documents."
|
||||
args_schema: type[BaseModel] = CouchbaseToolSchema
|
||||
cluster: SkipValidation[Cluster | None] = None
|
||||
collection_name: str | None = (None,)
|
||||
scope_name: str | None = (None,)
|
||||
bucket_name: str | None = (None,)
|
||||
index_name: str | None = (None,)
|
||||
cluster: SkipValidation[Cluster] = Field(
|
||||
description="An instance of the Couchbase Cluster connected to the desired Couchbase server.",
|
||||
)
|
||||
collection_name: str = Field(
|
||||
description="The name of the Couchbase collection to search",
|
||||
)
|
||||
scope_name: str = Field(
|
||||
description="The name of the Couchbase scope containing the collection to search.",
|
||||
)
|
||||
bucket_name: str = Field(
|
||||
description="The name of the Couchbase bucket to search",
|
||||
)
|
||||
index_name: str = Field(
|
||||
description="The name of the Couchbase index to search",
|
||||
)
|
||||
embedding_key: str | None = Field(
|
||||
default="embedding",
|
||||
description="Name of the field in the search index that stores the vector",
|
||||
)
|
||||
scoped_index: bool | None = (
|
||||
Field(
|
||||
default=True,
|
||||
description="Specify whether the index is scoped. Is True by default.",
|
||||
),
|
||||
scoped_index: bool = Field(
|
||||
default=True,
|
||||
description="Specify whether the index is scoped. Is True by default.",
|
||||
)
|
||||
limit: int | None = Field(default=3)
|
||||
embedding_function: SkipValidation[Callable[[str], list[float]]] = Field(
|
||||
default=None,
|
||||
description="A function that takes a string and returns a list of floats. This is used to embed the query before searching the database.",
|
||||
)
|
||||
|
||||
@@ -112,6 +122,9 @@ class CouchbaseFTSVectorSearchTool(BaseTool):
|
||||
" Please create the index before searching."
|
||||
)
|
||||
else:
|
||||
if not self.cluster:
|
||||
raise ValueError("Cluster instance must be provided")
|
||||
|
||||
all_indexes = [
|
||||
index.name for index in self.cluster.search_indexes().get_all_indexes()
|
||||
]
|
||||
@@ -140,24 +153,6 @@ class CouchbaseFTSVectorSearchTool(BaseTool):
|
||||
super().__init__(**kwargs)
|
||||
if COUCHBASE_AVAILABLE:
|
||||
try:
|
||||
if not self.cluster:
|
||||
raise ValueError("Cluster instance must be provided")
|
||||
|
||||
if not self.bucket_name:
|
||||
raise ValueError("Bucket name must be provided")
|
||||
|
||||
if not self.scope_name:
|
||||
raise ValueError("Scope name must be provided")
|
||||
|
||||
if not self.collection_name:
|
||||
raise ValueError("Collection name must be provided")
|
||||
|
||||
if not self.index_name:
|
||||
raise ValueError("Index name must be provided")
|
||||
|
||||
if not self.embedding_function:
|
||||
raise ValueError("Embedding function must be provided")
|
||||
|
||||
self._bucket = self.cluster.bucket(self.bucket_name)
|
||||
self._scope = self._bucket.scope(self.scope_name)
|
||||
self._collection = self._scope.collection(self.collection_name)
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
"""Crewai Enterprise Tools."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
from crewai_tools.adapters.enterprise_adapter import EnterpriseActionKitToolAdapter
|
||||
from crewai_tools.adapters.tool_collection import ToolCollection
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def CrewaiEnterpriseTools( # noqa: N802
|
||||
enterprise_token: str | None = None,
|
||||
actions_list: list[str] | None = None,
|
||||
enterprise_action_kit_project_id: str | None = None,
|
||||
enterprise_action_kit_project_url: str | None = None,
|
||||
) -> ToolCollection[BaseTool]:
|
||||
"""Factory function that returns crewai enterprise tools.
|
||||
|
||||
Args:
|
||||
enterprise_token: The token for accessing enterprise actions.
|
||||
If not provided, will try to use CREWAI_ENTERPRISE_TOOLS_TOKEN env var.
|
||||
actions_list: Optional list of specific tool names to include.
|
||||
If provided, only tools with these names will be returned.
|
||||
enterprise_action_kit_project_id: Optional ID of the Enterprise Action Kit project.
|
||||
enterprise_action_kit_project_url: Optional URL of the Enterprise Action Kit project.
|
||||
|
||||
Returns:
|
||||
A ToolCollection of BaseTool instances for enterprise actions
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"CrewaiEnterpriseTools will be removed in v1.0.0. Considering use `Agent(apps=[...])` instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if enterprise_token is None or enterprise_token == "":
|
||||
enterprise_token = os.environ.get("CREWAI_ENTERPRISE_TOOLS_TOKEN")
|
||||
if not enterprise_token:
|
||||
logger.warning("No enterprise token provided")
|
||||
|
||||
adapter_kwargs = {"enterprise_action_token": enterprise_token}
|
||||
|
||||
if enterprise_action_kit_project_id is not None:
|
||||
adapter_kwargs["enterprise_action_kit_project_id"] = (
|
||||
enterprise_action_kit_project_id
|
||||
)
|
||||
if enterprise_action_kit_project_url is not None:
|
||||
adapter_kwargs["enterprise_action_kit_project_url"] = (
|
||||
enterprise_action_kit_project_url
|
||||
)
|
||||
|
||||
adapter = EnterpriseActionKitToolAdapter(**adapter_kwargs)
|
||||
all_tools = adapter.tools()
|
||||
parsed_actions_list = _parse_actions_list(actions_list)
|
||||
|
||||
# Filter tools based on the provided list
|
||||
return ToolCollection(all_tools).filter_by_names(parsed_actions_list)
|
||||
|
||||
|
||||
# ENTERPRISE INJECTION ONLY
|
||||
def _parse_actions_list(actions_list: list[str] | None) -> list[str] | None:
|
||||
"""Parse a string representation of a list of tool names to a list of tool names.
|
||||
|
||||
Args:
|
||||
actions_list: A string representation of a list of tool names.
|
||||
|
||||
Returns:
|
||||
A list of tool names.
|
||||
"""
|
||||
if actions_list is not None:
|
||||
return actions_list
|
||||
|
||||
actions_list_from_env = os.environ.get("CREWAI_ENTERPRISE_TOOLS_ACTIONS_LIST")
|
||||
if actions_list_from_env is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
return json.loads(actions_list_from_env)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse actions_list as JSON: {actions_list_from_env}")
|
||||
return None
|
||||
@@ -18,7 +18,7 @@ class CrewaiPlatformToolBuilder:
|
||||
apps: list[str],
|
||||
):
|
||||
self._apps = apps
|
||||
self._actions_schema = {}
|
||||
self._actions_schema = {} # type: ignore[var-annotated]
|
||||
self._tools = None
|
||||
|
||||
def tools(self) -> list[BaseTool]:
|
||||
|
||||
@@ -24,4 +24,4 @@ def CrewaiPlatformTools( # noqa: N802
|
||||
"""
|
||||
builder = CrewaiPlatformToolBuilder(apps=apps)
|
||||
|
||||
return builder.tools()
|
||||
return builder.tools() # type: ignore
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedCSVSearchToolSchema(BaseModel):
|
||||
@@ -38,7 +37,7 @@ class CSVSearchTool(RagTool):
|
||||
def add(self, csv: str) -> None:
|
||||
super().add(csv, data_type=DataType.CSV)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
csv: str | None = None,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import json
|
||||
from typing import Literal
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from openai import OpenAI
|
||||
from openai import Omit, OpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@@ -19,8 +20,22 @@ class DallETool(BaseTool):
|
||||
args_schema: type[BaseModel] = ImagePromptSchema
|
||||
|
||||
model: str = "dall-e-3"
|
||||
size: str = "1024x1024"
|
||||
quality: str = "standard"
|
||||
size: (
|
||||
Literal[
|
||||
"auto",
|
||||
"1024x1024",
|
||||
"1536x1024",
|
||||
"1024x1536",
|
||||
"256x256",
|
||||
"512x512",
|
||||
"1792x1024",
|
||||
"1024x1792",
|
||||
]
|
||||
| None
|
||||
) = "1024x1024"
|
||||
quality: (
|
||||
Literal["standard", "hd", "low", "medium", "high", "auto"] | None | Omit
|
||||
) = "standard"
|
||||
n: int = 1
|
||||
|
||||
env_vars: list[EnvVar] = Field(
|
||||
@@ -49,6 +64,9 @@ class DallETool(BaseTool):
|
||||
n=self.n,
|
||||
)
|
||||
|
||||
if not response or not response.data:
|
||||
return "Failed to generate image."
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"image_url": response.data[0].url,
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, TypeGuard, TypedDict
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
@@ -9,6 +12,28 @@ if TYPE_CHECKING:
|
||||
from databricks.sdk import WorkspaceClient
|
||||
|
||||
|
||||
class ExecutionContext(TypedDict, total=False):
|
||||
catalog: str
|
||||
schema: str
|
||||
|
||||
|
||||
def _has_data_array(result: Any) -> TypeGuard[Any]:
|
||||
"""Type guard to check if result has data_array attribute.
|
||||
|
||||
Args:
|
||||
result: The result object to check.
|
||||
|
||||
Returns:
|
||||
True if result.result.data_array exists and is not None.
|
||||
"""
|
||||
return (
|
||||
hasattr(result, "result")
|
||||
and result.result is not None
|
||||
and hasattr(result.result, "data_array")
|
||||
and result.result.data_array is not None
|
||||
)
|
||||
|
||||
|
||||
class DatabricksQueryToolSchema(BaseModel):
|
||||
"""Input schema for DatabricksQueryTool."""
|
||||
|
||||
@@ -32,7 +57,7 @@ class DatabricksQueryToolSchema(BaseModel):
|
||||
)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_input(self) -> "DatabricksQueryToolSchema":
|
||||
def validate_input(self) -> DatabricksQueryToolSchema:
|
||||
"""Validate the input parameters."""
|
||||
# Ensure the query is not empty
|
||||
if not self.query or not self.query.strip():
|
||||
@@ -72,7 +97,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
default_schema: str | None = None
|
||||
default_warehouse_id: str | None = None
|
||||
|
||||
_workspace_client: Optional["WorkspaceClient"] = None
|
||||
_workspace_client: WorkspaceClient | None = None
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["databricks-sdk"])
|
||||
|
||||
def __init__(
|
||||
@@ -110,7 +135,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
)
|
||||
|
||||
@property
|
||||
def workspace_client(self) -> "WorkspaceClient":
|
||||
def workspace_client(self) -> WorkspaceClient:
|
||||
"""Get or create a Databricks WorkspaceClient instance."""
|
||||
if self._workspace_client is None:
|
||||
try:
|
||||
@@ -209,8 +234,12 @@ class DatabricksQueryTool(BaseTool):
|
||||
db_schema = validated_input.db_schema
|
||||
warehouse_id = validated_input.warehouse_id
|
||||
|
||||
if warehouse_id is None:
|
||||
return "SQL warehouse ID must be provided either as a parameter or as a default."
|
||||
|
||||
# Setup SQL context with catalog/schema if provided
|
||||
context = {}
|
||||
|
||||
context: ExecutionContext = {}
|
||||
if catalog:
|
||||
context["catalog"] = catalog
|
||||
if db_schema:
|
||||
@@ -231,7 +260,6 @@ class DatabricksQueryTool(BaseTool):
|
||||
return f"Error starting query execution: {execute_error!s}"
|
||||
|
||||
# Poll for results with better error handling
|
||||
import time
|
||||
|
||||
result = None
|
||||
timeout = 300 # 5 minutes timeout
|
||||
@@ -239,6 +267,9 @@ class DatabricksQueryTool(BaseTool):
|
||||
poll_count = 0
|
||||
previous_state = None # Track previous state to detect changes
|
||||
|
||||
if statement_id is None:
|
||||
return "Failed to retrieve statement ID after execution."
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
poll_count += 1
|
||||
try:
|
||||
@@ -248,7 +279,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
# Check if finished - be very explicit about state checking
|
||||
if hasattr(result, "status") and hasattr(result.status, "state"):
|
||||
state_value = str(
|
||||
result.status.state
|
||||
result.status.state # type: ignore[union-attr]
|
||||
) # Convert to string to handle both string and enum
|
||||
|
||||
# Track state changes for debugging
|
||||
@@ -265,16 +296,16 @@ class DatabricksQueryTool(BaseTool):
|
||||
# First try direct access to error.message
|
||||
if (
|
||||
hasattr(result.status, "error")
|
||||
and result.status.error
|
||||
and result.status.error # type: ignore[union-attr]
|
||||
):
|
||||
if hasattr(result.status.error, "message"):
|
||||
error_info = result.status.error.message
|
||||
if hasattr(result.status.error, "message"): # type: ignore[union-attr]
|
||||
error_info = result.status.error.message # type: ignore[union-attr,assignment]
|
||||
# Some APIs may have a different structure
|
||||
elif hasattr(result.status.error, "error_message"):
|
||||
error_info = result.status.error.error_message
|
||||
elif hasattr(result.status.error, "error_message"): # type: ignore[union-attr]
|
||||
error_info = result.status.error.error_message # type: ignore[union-attr]
|
||||
# Last resort, try to convert the whole error object to string
|
||||
else:
|
||||
error_info = str(result.status.error)
|
||||
error_info = str(result.status.error) # type: ignore[union-attr]
|
||||
except Exception as err_extract_error:
|
||||
# If all else fails, try to get any info we can
|
||||
error_info = (
|
||||
@@ -302,7 +333,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
return "Query completed but returned an invalid result structure"
|
||||
|
||||
# Convert state to string for comparison
|
||||
state_value = str(result.status.state)
|
||||
state_value = str(result.status.state) # type: ignore[union-attr]
|
||||
if not any(
|
||||
state in state_value for state in ["SUCCEEDED", "FAILED", "CANCELED"]
|
||||
):
|
||||
@@ -323,7 +354,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
if has_schema and has_result:
|
||||
try:
|
||||
# Get schema for column names
|
||||
columns = [col.name for col in result.manifest.schema.columns]
|
||||
columns = [col.name for col in result.manifest.schema.columns] # type: ignore[union-attr]
|
||||
|
||||
# Debug info for schema
|
||||
|
||||
@@ -331,7 +362,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
all_columns = set(columns)
|
||||
|
||||
# Dump the raw structure of result data to help troubleshoot
|
||||
if hasattr(result.result, "data_array"):
|
||||
if _has_data_array(result):
|
||||
# Add defensive check for None data_array
|
||||
if result.result.data_array is None:
|
||||
# Return empty result handling rather than trying to process null data
|
||||
@@ -343,8 +374,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
|
||||
# Only try to analyze sample if data_array exists and has content
|
||||
if (
|
||||
hasattr(result.result, "data_array")
|
||||
and result.result.data_array
|
||||
_has_data_array(result)
|
||||
and len(result.result.data_array) > 0
|
||||
and len(result.result.data_array[0]) > 0
|
||||
):
|
||||
@@ -385,17 +415,17 @@ class DatabricksQueryTool(BaseTool):
|
||||
rows_with_single_item = 0
|
||||
if (
|
||||
hasattr(result.result, "data_array")
|
||||
and result.result.data_array
|
||||
and len(result.result.data_array) > 0
|
||||
and result.result.data_array # type: ignore[union-attr]
|
||||
and len(result.result.data_array) > 0 # type: ignore[union-attr]
|
||||
):
|
||||
sample_size_for_rows = (
|
||||
min(sample_size, len(result.result.data_array[0]))
|
||||
min(sample_size, len(result.result.data_array[0])) # type: ignore[union-attr]
|
||||
if "sample_size" in locals()
|
||||
else min(20, len(result.result.data_array[0]))
|
||||
else min(20, len(result.result.data_array[0])) # type: ignore[union-attr]
|
||||
)
|
||||
rows_with_single_item = sum(
|
||||
1
|
||||
for row in result.result.data_array[0][
|
||||
1 # type: ignore[misc]
|
||||
for row in result.result.data_array[0][ # type: ignore[union-attr]
|
||||
:sample_size_for_rows
|
||||
]
|
||||
if isinstance(row, list) and len(row) == 1
|
||||
@@ -424,13 +454,13 @@ class DatabricksQueryTool(BaseTool):
|
||||
# We're dealing with data where the rows may be incorrectly structured
|
||||
|
||||
# Collect all values into a flat list
|
||||
all_values = []
|
||||
all_values: list[Any] = []
|
||||
if (
|
||||
hasattr(result.result, "data_array")
|
||||
and result.result.data_array
|
||||
and result.result.data_array # type: ignore[union-attr]
|
||||
):
|
||||
# Flatten all values into a single list
|
||||
for chunk in result.result.data_array:
|
||||
for chunk in result.result.data_array: # type: ignore[union-attr]
|
||||
for item in chunk:
|
||||
if isinstance(item, (list, tuple)):
|
||||
all_values.extend(item)
|
||||
@@ -629,7 +659,7 @@ class DatabricksQueryTool(BaseTool):
|
||||
# Fix titles that might still have issues
|
||||
if (
|
||||
isinstance(row.get("Title"), str)
|
||||
and len(row.get("Title")) <= 1
|
||||
and len(row.get("Title")) <= 1 # type: ignore[arg-type]
|
||||
):
|
||||
# This is likely still a fragmented title - mark as potentially incomplete
|
||||
row["Title"] = f"[INCOMPLETE] {row.get('Title')}"
|
||||
@@ -645,11 +675,11 @@ class DatabricksQueryTool(BaseTool):
|
||||
# Check different result structures
|
||||
if (
|
||||
hasattr(result.result, "data_array")
|
||||
and result.result.data_array
|
||||
and result.result.data_array # type: ignore[union-attr]
|
||||
):
|
||||
# Check if data appears to be malformed within chunks
|
||||
for _chunk_idx, chunk in enumerate(
|
||||
result.result.data_array
|
||||
result.result.data_array # type: ignore[union-attr]
|
||||
):
|
||||
# Check if chunk might actually contain individual columns of a single row
|
||||
# This is another way data might be malformed - check the first few values
|
||||
@@ -756,10 +786,10 @@ class DatabricksQueryTool(BaseTool):
|
||||
|
||||
chunk_results.append(row_dict)
|
||||
|
||||
elif hasattr(result.result, "data") and result.result.data:
|
||||
elif hasattr(result.result, "data") and result.result.data: # type: ignore[union-attr]
|
||||
# Alternative data structure
|
||||
|
||||
for _row_idx, row in enumerate(result.result.data):
|
||||
for _row_idx, row in enumerate(result.result.data): # type: ignore[union-attr]
|
||||
# Debug info
|
||||
|
||||
# Safely create dictionary matching column names to values
|
||||
@@ -803,12 +833,12 @@ class DatabricksQueryTool(BaseTool):
|
||||
|
||||
# If we have no results but the query succeeded (e.g., for DDL statements)
|
||||
if not chunk_results and hasattr(result, "status"):
|
||||
state_value = str(result.status.state)
|
||||
state_value = str(result.status.state) # type: ignore[union-attr]
|
||||
if "SUCCEEDED" in state_value:
|
||||
return "Query executed successfully (no results to display)"
|
||||
|
||||
# Format and return results
|
||||
return self._format_results(chunk_results)
|
||||
return self._format_results(chunk_results) # type: ignore[arg-type]
|
||||
|
||||
except Exception as e:
|
||||
# Include more details in the error message to help with debugging
|
||||
|
||||
@@ -35,7 +35,10 @@ class DirectoryReadTool(BaseTool):
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
directory = kwargs.get("directory", self.directory)
|
||||
directory: str | None = kwargs.get("directory", self.directory)
|
||||
if directory is None:
|
||||
raise ValueError("Directory must be provided.")
|
||||
|
||||
if directory[-1] == "/":
|
||||
directory = directory[:-1]
|
||||
files_list = [
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedDirectorySearchToolSchema(BaseModel):
|
||||
@@ -38,7 +37,7 @@ class DirectorySearchTool(RagTool):
|
||||
def add(self, directory: str) -> None:
|
||||
super().add(directory, data_type=DataType.DIRECTORY)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
directory: str | None = None,
|
||||
|
||||
@@ -3,8 +3,7 @@ from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedDOCXSearchToolSchema(BaseModel):
|
||||
@@ -46,7 +45,7 @@ class DOCXSearchTool(RagTool):
|
||||
def add(self, docx: str) -> None:
|
||||
super().add(docx, data_type=DataType.DOCX)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
docx: str | None = None,
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from builtins import type as type_
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
from typing import Any, TypedDict
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import Required
|
||||
|
||||
|
||||
try:
|
||||
from exa_py import Exa
|
||||
class SearchParams(TypedDict, total=False):
|
||||
"""Parameters for Exa search API."""
|
||||
|
||||
EXA_INSTALLED = True
|
||||
except ImportError:
|
||||
Exa = Any
|
||||
EXA_INSTALLED = False
|
||||
type: Required[str | None]
|
||||
start_published_date: str
|
||||
end_published_date: str
|
||||
include_domains: list[str]
|
||||
|
||||
|
||||
class EXABaseToolSchema(BaseModel):
|
||||
@@ -31,8 +35,8 @@ class EXASearchTool(BaseTool):
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
name: str = "EXASearchTool"
|
||||
description: str = "Search the internet using Exa"
|
||||
args_schema: type[BaseModel] = EXABaseToolSchema
|
||||
client: Optional["Exa"] = None
|
||||
args_schema: type_[BaseModel] = EXABaseToolSchema
|
||||
client: Any | None = None
|
||||
content: bool | None = False
|
||||
summary: bool | None = False
|
||||
type: str | None = "auto"
|
||||
@@ -72,7 +76,9 @@ class EXASearchTool(BaseTool):
|
||||
super().__init__(
|
||||
**kwargs,
|
||||
)
|
||||
if not EXA_INSTALLED:
|
||||
try:
|
||||
from exa_py import Exa
|
||||
except ImportError as e:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
@@ -82,11 +88,16 @@ class EXASearchTool(BaseTool):
|
||||
|
||||
subprocess.run(["uv", "add", "exa_py"], check=True) # noqa: S607
|
||||
|
||||
# Re-import after installation
|
||||
from exa_py import Exa
|
||||
else:
|
||||
raise ImportError(
|
||||
"You are missing the 'exa_py' package. Would you like to install it?"
|
||||
)
|
||||
client_kwargs = {"api_key": self.api_key}
|
||||
) from e
|
||||
|
||||
client_kwargs: dict[str, str] = {}
|
||||
if self.api_key:
|
||||
client_kwargs["api_key"] = self.api_key
|
||||
if self.base_url:
|
||||
client_kwargs["base_url"] = self.base_url
|
||||
self.client = Exa(**client_kwargs)
|
||||
@@ -104,7 +115,7 @@ class EXASearchTool(BaseTool):
|
||||
if self.client is None:
|
||||
raise ValueError("Client not initialized")
|
||||
|
||||
search_params = {
|
||||
search_params: SearchParams = {
|
||||
"type": self.type,
|
||||
}
|
||||
|
||||
|
||||
@@ -72,9 +72,9 @@ class FileCompressorTool(BaseTool):
|
||||
"tar.xz": self._compress_tar,
|
||||
}
|
||||
if format == "zip":
|
||||
format_compression[format](input_path, output_path)
|
||||
format_compression[format](input_path, output_path) # type: ignore[operator]
|
||||
else:
|
||||
format_compression[format](input_path, output_path, format)
|
||||
format_compression[format](input_path, output_path, format) # type: ignore[operator]
|
||||
|
||||
return f"Successfully compressed '{input_path}' into '{output_path}'"
|
||||
except FileNotFoundError:
|
||||
@@ -84,7 +84,8 @@ class FileCompressorTool(BaseTool):
|
||||
except Exception as e:
|
||||
return f"An unexpected error occurred during compression: {e!s}"
|
||||
|
||||
def _generate_output_path(self, input_path: str, format: str) -> str:
|
||||
@staticmethod
|
||||
def _generate_output_path(input_path: str, format: str) -> str:
|
||||
"""Generates output path based on input path and format."""
|
||||
if os.path.isfile(input_path):
|
||||
base_name = os.path.splitext(os.path.basename(input_path))[
|
||||
@@ -94,7 +95,8 @@ class FileCompressorTool(BaseTool):
|
||||
base_name = os.path.basename(os.path.normpath(input_path)) # Directory name
|
||||
return os.path.join(os.getcwd(), f"{base_name}.{format}")
|
||||
|
||||
def _prepare_output(self, output_path: str, overwrite: bool) -> bool:
|
||||
@staticmethod
|
||||
def _prepare_output(output_path: str, overwrite: bool) -> bool:
|
||||
"""Ensures output path is ready for writing."""
|
||||
output_dir = os.path.dirname(output_path)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
@@ -103,7 +105,8 @@ class FileCompressorTool(BaseTool):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _compress_zip(self, input_path: str, output_path: str):
|
||||
@staticmethod
|
||||
def _compress_zip(input_path: str, output_path: str):
|
||||
"""Compresses input into a zip archive."""
|
||||
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||
if os.path.isfile(input_path):
|
||||
@@ -115,7 +118,8 @@ class FileCompressorTool(BaseTool):
|
||||
arcname = os.path.relpath(full_path, start=input_path)
|
||||
zipf.write(full_path, arcname)
|
||||
|
||||
def _compress_tar(self, input_path: str, output_path: str, format: str):
|
||||
@staticmethod
|
||||
def _compress_tar(input_path: str, output_path: str, format: str):
|
||||
"""Compresses input into a tar archive with the given format."""
|
||||
format_mode = {
|
||||
"tar": "w",
|
||||
@@ -129,6 +133,6 @@ class FileCompressorTool(BaseTool):
|
||||
|
||||
mode = format_mode[format]
|
||||
|
||||
with tarfile.open(output_path, mode) as tarf:
|
||||
with tarfile.open(output_path, mode) as tarf: # type: ignore[call-overload]
|
||||
arcname = os.path.basename(input_path)
|
||||
tarf.add(input_path, arcname=arcname)
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
|
||||
FIRECRAWL_AVAILABLE = True
|
||||
except ImportError:
|
||||
@@ -59,7 +61,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
},
|
||||
}
|
||||
)
|
||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||
_firecrawl: FirecrawlApp | None = PrivateAttr(None)
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["firecrawl-py"])
|
||||
env_vars: list[EnvVar] = Field(
|
||||
default_factory=lambda: [
|
||||
@@ -114,7 +116,7 @@ try:
|
||||
# Only rebuild if the class hasn't been initialized yet
|
||||
if not hasattr(FirecrawlCrawlWebsiteTool, "_model_rebuilt"):
|
||||
FirecrawlCrawlWebsiteTool.model_rebuild()
|
||||
FirecrawlCrawlWebsiteTool._model_rebuilt = True
|
||||
FirecrawlCrawlWebsiteTool._model_rebuilt = True # type: ignore[attr-defined]
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
|
||||
FIRECRAWL_AVAILABLE = True
|
||||
except ImportError:
|
||||
@@ -54,7 +56,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
}
|
||||
)
|
||||
|
||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||
_firecrawl: FirecrawlApp | None = PrivateAttr(None)
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["firecrawl-py"])
|
||||
env_vars: list[EnvVar] = Field(
|
||||
default_factory=lambda: [
|
||||
@@ -102,7 +104,7 @@ try:
|
||||
# Must rebuild model after class is defined
|
||||
if not hasattr(FirecrawlScrapeWebsiteTool, "_model_rebuilt"):
|
||||
FirecrawlScrapeWebsiteTool.model_rebuild()
|
||||
FirecrawlScrapeWebsiteTool._model_rebuilt = True
|
||||
FirecrawlScrapeWebsiteTool._model_rebuilt = True # type: ignore[attr-defined]
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
|
||||
FIRECRAWL_AVAILABLE = True
|
||||
except ImportError:
|
||||
@@ -53,7 +55,7 @@ class FirecrawlSearchTool(BaseTool):
|
||||
"timeout": 60000,
|
||||
}
|
||||
)
|
||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||
_firecrawl: FirecrawlApp | None = PrivateAttr(None)
|
||||
package_dependencies: list[str] = Field(default_factory=lambda: ["firecrawl-py"])
|
||||
env_vars: list[EnvVar] = Field(
|
||||
default_factory=lambda: [
|
||||
@@ -114,7 +116,7 @@ try:
|
||||
# Only rebuild if the class hasn't been initialized yet
|
||||
if not hasattr(FirecrawlSearchTool, "_model_rebuilt"):
|
||||
FirecrawlSearchTool.model_rebuild()
|
||||
FirecrawlSearchTool._model_rebuilt = True
|
||||
FirecrawlSearchTool._model_rebuilt = True # type: ignore[attr-defined]
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedGithubSearchToolSchema(BaseModel):
|
||||
@@ -61,7 +60,7 @@ class GithubSearchTool(RagTool):
|
||||
metadata={"content_types": content_types, "gh_token": self.gh_token},
|
||||
)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
github_repo: str | None = None,
|
||||
|
||||
@@ -51,7 +51,7 @@ class HyperbrowserLoadTool(BaseTool):
|
||||
)
|
||||
|
||||
try:
|
||||
from hyperbrowser import Hyperbrowser
|
||||
from hyperbrowser import Hyperbrowser # type: ignore[import-untyped]
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"`hyperbrowser` package not found, please run `pip install hyperbrowser`"
|
||||
@@ -64,11 +64,16 @@ class HyperbrowserLoadTool(BaseTool):
|
||||
|
||||
self.hyperbrowser = Hyperbrowser(api_key=self.api_key)
|
||||
|
||||
def _prepare_params(self, params: dict) -> dict:
|
||||
@staticmethod
|
||||
def _prepare_params(params: dict) -> dict:
|
||||
"""Prepare session and scrape options parameters."""
|
||||
try:
|
||||
from hyperbrowser.models.scrape import ScrapeOptions
|
||||
from hyperbrowser.models.session import CreateSessionParams
|
||||
from hyperbrowser.models.scrape import ( # type: ignore[import-untyped]
|
||||
ScrapeOptions,
|
||||
)
|
||||
from hyperbrowser.models.session import ( # type: ignore[import-untyped]
|
||||
CreateSessionParams,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"`hyperbrowser` package not found, please run `pip install hyperbrowser`"
|
||||
@@ -102,8 +107,12 @@ class HyperbrowserLoadTool(BaseTool):
|
||||
if params is None:
|
||||
params = {}
|
||||
try:
|
||||
from hyperbrowser.models.crawl import StartCrawlJobParams
|
||||
from hyperbrowser.models.scrape import StartScrapeJobParams
|
||||
from hyperbrowser.models.crawl import ( # type: ignore[import-untyped]
|
||||
StartCrawlJobParams,
|
||||
)
|
||||
from hyperbrowser.models.scrape import ( # type: ignore[import-untyped]
|
||||
StartScrapeJobParams,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"`hyperbrowser` package not found, please run `pip install hyperbrowser`"
|
||||
@@ -113,10 +122,10 @@ class HyperbrowserLoadTool(BaseTool):
|
||||
|
||||
if operation == "scrape":
|
||||
scrape_params = StartScrapeJobParams(url=url, **params)
|
||||
scrape_resp = self.hyperbrowser.scrape.start_and_wait(scrape_params)
|
||||
scrape_resp = self.hyperbrowser.scrape.start_and_wait(scrape_params) # type: ignore[union-attr]
|
||||
return self._extract_content(scrape_resp.data)
|
||||
crawl_params = StartCrawlJobParams(url=url, **params)
|
||||
crawl_resp = self.hyperbrowser.crawl.start_and_wait(crawl_params)
|
||||
crawl_resp = self.hyperbrowser.crawl.start_and_wait(crawl_params) # type: ignore[union-attr]
|
||||
content = ""
|
||||
if crawl_resp.data:
|
||||
for page in crawl_resp.data:
|
||||
|
||||
@@ -102,7 +102,7 @@ class InvokeCrewAIAutomationTool(BaseTool):
|
||||
fields[field_name] = (str, field_def)
|
||||
|
||||
# Create dynamic model
|
||||
args_schema = create_model("DynamicInvokeCrewAIAutomationInput", **fields)
|
||||
args_schema = create_model("DynamicInvokeCrewAIAutomationInput", **fields) # type: ignore[call-overload]
|
||||
else:
|
||||
args_schema = InvokeCrewAIAutomationInput
|
||||
|
||||
@@ -162,12 +162,11 @@ class InvokeCrewAIAutomationTool(BaseTool):
|
||||
|
||||
# Start the crew
|
||||
response = self._kickoff_crew(inputs=kwargs)
|
||||
kickoff_id: str | None = response.get("kickoff_id")
|
||||
|
||||
if response.get("kickoff_id") is None:
|
||||
if kickoff_id is None:
|
||||
return f"Error: Failed to kickoff crew. Response: {response}"
|
||||
|
||||
kickoff_id = response.get("kickoff_id")
|
||||
|
||||
# Poll for completion
|
||||
for i in range(self.max_polling_time):
|
||||
try:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedJSONSearchToolSchema(BaseModel):
|
||||
@@ -35,7 +35,7 @@ class JSONSearchTool(RagTool):
|
||||
self.args_schema = FixedJSONSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
json_path: str | None = None,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import os
|
||||
from typing import Any
|
||||
from typing import Any, Literal
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
|
||||
@@ -10,7 +10,7 @@ try:
|
||||
LINKUP_AVAILABLE = True
|
||||
except ImportError:
|
||||
LINKUP_AVAILABLE = False
|
||||
LinkupClient = Any # type placeholder when package is not available
|
||||
LinkupClient = Any # type: ignore[misc,assignment] # type placeholder when package is not available
|
||||
|
||||
from pydantic import Field, PrivateAttr
|
||||
|
||||
@@ -32,7 +32,7 @@ class LinkupSearchTool(BaseTool):
|
||||
|
||||
def __init__(self, api_key: str | None = None) -> None:
|
||||
"""Initialize the tool with an API key."""
|
||||
super().__init__()
|
||||
super().__init__() # type: ignore[call-arg]
|
||||
try:
|
||||
from linkup import LinkupClient
|
||||
except ImportError:
|
||||
@@ -54,7 +54,12 @@ class LinkupSearchTool(BaseTool):
|
||||
self._client = LinkupClient(api_key=api_key or os.getenv("LINKUP_API_KEY"))
|
||||
|
||||
def _run(
|
||||
self, query: str, depth: str = "standard", output_type: str = "searchResults"
|
||||
self,
|
||||
query: str,
|
||||
depth: Literal["standard", "deep"] = "standard",
|
||||
output_type: Literal[
|
||||
"searchResults", "sourcedAnswer", "structured"
|
||||
] = "searchResults",
|
||||
) -> dict:
|
||||
"""Executes a search using the Linkup API.
|
||||
|
||||
|
||||
@@ -17,7 +17,9 @@ class LlamaIndexTool(BaseTool):
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run tool."""
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
from llama_index.core.tools import ( # type: ignore[import-not-found]
|
||||
BaseTool as LlamaBaseTool,
|
||||
)
|
||||
|
||||
tool = cast(LlamaBaseTool, self.llama_index_tool)
|
||||
|
||||
@@ -28,7 +30,9 @@ class LlamaIndexTool(BaseTool):
|
||||
|
||||
@classmethod
|
||||
def from_tool(cls, tool: Any, **kwargs: Any) -> LlamaIndexTool:
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
from llama_index.core.tools import ( # type: ignore[import-not-found]
|
||||
BaseTool as LlamaBaseTool,
|
||||
)
|
||||
|
||||
if not isinstance(tool, LlamaBaseTool):
|
||||
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
|
||||
@@ -57,8 +61,12 @@ class LlamaIndexTool(BaseTool):
|
||||
return_direct: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> LlamaIndexTool:
|
||||
from llama_index.core.query_engine import BaseQueryEngine
|
||||
from llama_index.core.tools import QueryEngineTool
|
||||
from llama_index.core.query_engine import ( # type: ignore[import-not-found]
|
||||
BaseQueryEngine,
|
||||
)
|
||||
from llama_index.core.tools import ( # type: ignore[import-not-found]
|
||||
QueryEngineTool,
|
||||
)
|
||||
|
||||
if not isinstance(query_engine, BaseQueryEngine):
|
||||
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedMDXSearchToolSchema(BaseModel):
|
||||
@@ -38,7 +37,7 @@ class MDXSearchTool(RagTool):
|
||||
def add(self, mdx: str) -> None:
|
||||
super().add(mdx, data_type=DataType.MDX)
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
mdx: str | None = None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .vector_search import (
|
||||
from crewai_tools.tools.mongodb_vector_search_tool.vector_search import (
|
||||
MongoDBToolSchema,
|
||||
MongoDBVectorSearchConfig,
|
||||
MongoDBVectorSearchTool,
|
||||
|
||||
@@ -197,7 +197,6 @@ class MongoDBVectorSearchTool(BaseTool):
|
||||
|
||||
_metadatas = metadatas or [{} for _ in texts]
|
||||
ids = [str(ObjectId()) for _ in range(len(list(texts)))]
|
||||
metadatas_batch = _metadatas
|
||||
|
||||
result_ids = []
|
||||
texts_batch = []
|
||||
@@ -285,7 +284,7 @@ class MongoDBVectorSearchTool(BaseTool):
|
||||
"index": self.vector_index_name,
|
||||
"path": self.embedding_key,
|
||||
"queryVector": query_vector,
|
||||
"numCandidates": limit * oversampling_factor,
|
||||
"numCandidates": limit * oversampling_factor, # type: ignore[operator]
|
||||
"limit": limit,
|
||||
}
|
||||
if pre_filter:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
|
||||
from crewai import Agent, Crew, Task
|
||||
from multion_tool import MultiOnTool
|
||||
from multion_tool import MultiOnTool # type: ignore[import-not-found]
|
||||
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "Your Key"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Multion tool spec."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
@@ -30,8 +31,6 @@ class MultiOnTool(BaseTool):
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None = None,
|
||||
local: bool = False,
|
||||
max_steps: int = 3,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
@@ -43,8 +42,6 @@ class MultiOnTool(BaseTool):
|
||||
if click.confirm(
|
||||
"You are missing the 'multion' package. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["uv", "add", "multion"], check=True) # noqa: S607
|
||||
from multion.client import MultiOn
|
||||
else:
|
||||
@@ -52,9 +49,7 @@ class MultiOnTool(BaseTool):
|
||||
"`multion` package not found, please run `uv add multion`"
|
||||
) from None
|
||||
self.session_id = None
|
||||
self.local = local
|
||||
self.multion = MultiOn(api_key=api_key or os.getenv("MULTION_API_KEY"))
|
||||
self.max_steps = max_steps
|
||||
|
||||
def _run(
|
||||
self,
|
||||
@@ -70,6 +65,9 @@ class MultiOnTool(BaseTool):
|
||||
*args (Any): Additional arguments to pass to the Multion client
|
||||
**kwargs (Any): Additional keyword arguments to pass to the Multion client
|
||||
"""
|
||||
if self.multion is None:
|
||||
raise ValueError("Multion client is not initialized.")
|
||||
|
||||
browse = self.multion.browse(
|
||||
cmd=cmd,
|
||||
session_id=self.session_id,
|
||||
|
||||
@@ -3,8 +3,7 @@ from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class MySQLSearchToolSchema(BaseModel):
|
||||
@@ -35,7 +34,7 @@ class MySQLSearchTool(RagTool):
|
||||
) -> None:
|
||||
super().add(f"SELECT * FROM {table_name};", **kwargs) # noqa: S608
|
||||
|
||||
def _run(
|
||||
def _run( # type: ignore[override]
|
||||
self,
|
||||
search_query: str,
|
||||
similarity_threshold: float | None = None,
|
||||
|
||||
@@ -82,7 +82,7 @@ class NL2SQLTool(BaseTool):
|
||||
result = session.execute(text(sql_query))
|
||||
session.commit()
|
||||
|
||||
if result.returns_rows:
|
||||
if result.returns_rows: # type: ignore[attr-defined]
|
||||
columns = result.keys()
|
||||
return [
|
||||
dict(zip(columns, row, strict=False)) for row in result.fetchall()
|
||||
|
||||
@@ -5,9 +5,10 @@ This tool provides functionality for extracting text from images using supported
|
||||
|
||||
import base64
|
||||
|
||||
from crewai import LLM
|
||||
from crewai.llm import LLM
|
||||
from crewai.tools.base_tool import BaseTool
|
||||
from pydantic import BaseModel, PrivateAttr
|
||||
from crewai.utilities.types import LLMMessage
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class OCRToolSchema(BaseModel):
|
||||
@@ -19,7 +20,7 @@ class OCRToolSchema(BaseModel):
|
||||
For remote images, provide the complete URL starting with 'http' or 'https'.
|
||||
"""
|
||||
|
||||
image_path_url: str = "The image path or URL."
|
||||
image_path_url: str = Field(description="The image path or URL.")
|
||||
|
||||
|
||||
class OCRTool(BaseTool):
|
||||
@@ -39,29 +40,9 @@ class OCRTool(BaseTool):
|
||||
|
||||
name: str = "Optical Character Recognition Tool"
|
||||
description: str = "This tool uses an LLM's API to extract text from an image file."
|
||||
_llm: LLM | None = PrivateAttr(default=None)
|
||||
|
||||
llm: LLM = Field(default_factory=lambda: LLM(model="gpt-4o", temperature=0.7))
|
||||
args_schema: type[BaseModel] = OCRToolSchema
|
||||
|
||||
def __init__(self, llm: LLM = None, **kwargs):
|
||||
"""Initialize the OCR tool.
|
||||
|
||||
Args:
|
||||
llm (LLM, optional): Language model instance to use for API calls.
|
||||
If not provided, a default LLM with gpt-4o model will be used.
|
||||
**kwargs: Additional arguments passed to the parent class.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if llm is None:
|
||||
# Use the default LLM
|
||||
llm = LLM(
|
||||
model="gpt-4o",
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
self._llm = llm
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
"""Execute the OCR operation on the provided image.
|
||||
|
||||
@@ -88,7 +69,7 @@ class OCRTool(BaseTool):
|
||||
base64_image = self._encode_image(image_path_url)
|
||||
image_data = f"data:image/jpeg;base64,{base64_image}"
|
||||
|
||||
messages = [
|
||||
messages: list[LLMMessage] = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an expert OCR specialist. Extract complete text from the provided image. Provide the result as a raw text.",
|
||||
@@ -104,9 +85,10 @@ class OCRTool(BaseTool):
|
||||
},
|
||||
]
|
||||
|
||||
return self._llm.call(messages=messages)
|
||||
return self.llm.call(messages=messages)
|
||||
|
||||
def _encode_image(self, image_path: str):
|
||||
@staticmethod
|
||||
def _encode_image(image_path: str):
|
||||
"""Encode an image file to base64 format.
|
||||
|
||||
Args:
|
||||
@@ -116,4 +98,4 @@ class OCRTool(BaseTool):
|
||||
str: Base64-encoded image data as a UTF-8 string.
|
||||
"""
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
return base64.b64encode(image_file.read()).decode()
|
||||
|
||||
@@ -9,8 +9,10 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
try:
|
||||
from oxylabs import RealtimeClient
|
||||
from oxylabs.sources.response import Response as OxylabsResponse
|
||||
from oxylabs import RealtimeClient # type: ignore[import-untyped]
|
||||
from oxylabs.sources.response import ( # type: ignore[import-untyped]
|
||||
Response as OxylabsResponse,
|
||||
)
|
||||
|
||||
OXYLABS_AVAILABLE = True
|
||||
except ImportError:
|
||||
|
||||
@@ -9,8 +9,10 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
try:
|
||||
from oxylabs import RealtimeClient
|
||||
from oxylabs.sources.response import Response as OxylabsResponse
|
||||
from oxylabs import RealtimeClient # type: ignore[import-untyped]
|
||||
from oxylabs.sources.response import ( # type: ignore[import-untyped]
|
||||
Response as OxylabsResponse,
|
||||
)
|
||||
|
||||
OXYLABS_AVAILABLE = True
|
||||
except ImportError:
|
||||
|
||||
@@ -9,8 +9,10 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
try:
|
||||
from oxylabs import RealtimeClient
|
||||
from oxylabs.sources.response import Response as OxylabsResponse
|
||||
from oxylabs import RealtimeClient # type: ignore[import-untyped]
|
||||
from oxylabs.sources.response import ( # type: ignore[import-untyped]
|
||||
Response as OxylabsResponse,
|
||||
)
|
||||
|
||||
OXYLABS_AVAILABLE = True
|
||||
except ImportError:
|
||||
|
||||
@@ -9,8 +9,10 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
try:
|
||||
from oxylabs import RealtimeClient
|
||||
from oxylabs.sources.response import Response as OxylabsResponse
|
||||
from oxylabs import RealtimeClient # type: ignore[import-untyped]
|
||||
from oxylabs.sources.response import ( # type: ignore[import-untyped]
|
||||
Response as OxylabsResponse,
|
||||
)
|
||||
|
||||
OXYLABS_AVAILABLE = True
|
||||
except ImportError:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .parallel_search_tool import ParallelSearchTool
|
||||
from crewai_tools.tools.parallel_tools.parallel_search_tool import ParallelSearchTool
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from .patronus_eval_tool import PatronusEvalTool as PatronusEvalTool
|
||||
from .patronus_local_evaluator_tool import (
|
||||
from crewai_tools.tools.patronus_eval_tool.patronus_eval_tool import (
|
||||
PatronusEvalTool as PatronusEvalTool,
|
||||
)
|
||||
from crewai_tools.tools.patronus_eval_tool.patronus_local_evaluator_tool import (
|
||||
PatronusLocalEvaluatorTool as PatronusLocalEvaluatorTool,
|
||||
)
|
||||
from .patronus_predefined_criteria_eval_tool import (
|
||||
from crewai_tools.tools.patronus_eval_tool.patronus_predefined_criteria_eval_tool import (
|
||||
PatronusPredefinedCriteriaEvalTool as PatronusPredefinedCriteriaEvalTool,
|
||||
)
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import random
|
||||
|
||||
from patronus import Client, EvaluationResult # type: ignore[import-not-found]
|
||||
from patronus_local_evaluator_tool import ( # type: ignore[import-not-found]
|
||||
from crewai import Agent, Crew, Task
|
||||
from patronus import ( # type: ignore[import-not-found,import-untyped]
|
||||
Client,
|
||||
EvaluationResult,
|
||||
)
|
||||
from patronus_local_evaluator_tool import ( # type: ignore[import-not-found,import-untyped]
|
||||
PatronusLocalEvaluatorTool,
|
||||
)
|
||||
|
||||
from crewai import Agent, Crew, Task
|
||||
|
||||
|
||||
# Test the PatronusLocalEvaluatorTool where agent uses the local evaluator
|
||||
client = Client()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user