mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-02 04:38:29 +00:00
Compare commits
36 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9fcf55198f | ||
|
|
f46a846ddc | ||
|
|
b546982690 | ||
|
|
d7bdac12a2 | ||
|
|
528d812263 | ||
|
|
ffd717c51a | ||
|
|
fbe4aa4bd1 | ||
|
|
c205d2e8de | ||
|
|
fcb5b19b2e | ||
|
|
01f0111d52 | ||
|
|
6b52587c67 | ||
|
|
629f7f34ce | ||
|
|
0f1c173d02 | ||
|
|
19c5b9a35e | ||
|
|
1ed307b58c | ||
|
|
d29867bbb6 | ||
|
|
b2c278ed22 | ||
|
|
f6aed9798b | ||
|
|
40a2d387a1 | ||
|
|
6f36d7003b | ||
|
|
9e5906c52f | ||
|
|
fc521839e4 | ||
|
|
e4cc9a664c | ||
|
|
7e6171d5bc | ||
|
|
61ad1fb112 | ||
|
|
54710a8711 | ||
|
|
5abf976373 | ||
|
|
329567153b | ||
|
|
60332e0b19 | ||
|
|
40932af3fa | ||
|
|
e134e5305b | ||
|
|
e229ef4e19 | ||
|
|
2e9eb8c32d | ||
|
|
4ebb5114ed | ||
|
|
70b083945f | ||
|
|
410db1ff39 |
11
.github/dependabot.yml
vendored
Normal file
11
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: uv # See documentation for possible values
|
||||
directory: "/" # Location of package manifests
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
35
.github/workflows/docs-broken-links.yml
vendored
Normal file
35
.github/workflows/docs-broken-links.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
name: Check Documentation Broken Links
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "docs/**"
|
||||
- "docs.json"
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "docs/**"
|
||||
- "docs.json"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
check-links:
|
||||
name: Check broken links
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "latest"
|
||||
|
||||
- name: Install Mintlify CLI
|
||||
run: npm i -g mintlify
|
||||
|
||||
- name: Run broken link checker
|
||||
run: |
|
||||
# Auto-answer the prompt with yes command
|
||||
yes "" | mintlify broken-links || test $? -eq 141
|
||||
working-directory: ./docs
|
||||
@@ -19,6 +19,7 @@ repos:
|
||||
language: system
|
||||
pass_filenames: true
|
||||
types: [python]
|
||||
exclude: ^(lib/crewai/src/crewai/cli/templates/|lib/crewai/tests/|lib/crewai-tools/tests/)
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
rev: 0.9.3
|
||||
hooks:
|
||||
|
||||
@@ -313,7 +313,10 @@
|
||||
"en/learn/multimodal-agents",
|
||||
"en/learn/replay-tasks-from-latest-crew-kickoff",
|
||||
"en/learn/sequential-process",
|
||||
"en/learn/using-annotations"
|
||||
"en/learn/using-annotations",
|
||||
"en/learn/execution-hooks",
|
||||
"en/learn/llm-hooks",
|
||||
"en/learn/tool-hooks"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -737,7 +740,10 @@
|
||||
"pt-BR/learn/multimodal-agents",
|
||||
"pt-BR/learn/replay-tasks-from-latest-crew-kickoff",
|
||||
"pt-BR/learn/sequential-process",
|
||||
"pt-BR/learn/using-annotations"
|
||||
"pt-BR/learn/using-annotations",
|
||||
"pt-BR/learn/execution-hooks",
|
||||
"pt-BR/learn/llm-hooks",
|
||||
"pt-BR/learn/tool-hooks"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1170,7 +1176,10 @@
|
||||
"ko/learn/multimodal-agents",
|
||||
"ko/learn/replay-tasks-from-latest-crew-kickoff",
|
||||
"ko/learn/sequential-process",
|
||||
"ko/learn/using-annotations"
|
||||
"ko/learn/using-annotations",
|
||||
"ko/learn/execution-hooks",
|
||||
"ko/learn/llm-hooks",
|
||||
"ko/learn/tool-hooks"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -402,6 +402,77 @@ crewai config reset
|
||||
After resetting configuration, re-run `crewai login` to authenticate again.
|
||||
</Tip>
|
||||
|
||||
### 14. Trace Management
|
||||
|
||||
Manage trace collection preferences for your Crew and Flow executions.
|
||||
|
||||
```shell Terminal
|
||||
crewai traces [COMMAND]
|
||||
```
|
||||
|
||||
#### Commands:
|
||||
|
||||
- `enable`: Enable trace collection for crew/flow executions
|
||||
```shell Terminal
|
||||
crewai traces enable
|
||||
```
|
||||
|
||||
- `disable`: Disable trace collection for crew/flow executions
|
||||
```shell Terminal
|
||||
crewai traces disable
|
||||
```
|
||||
|
||||
- `status`: Show current trace collection status
|
||||
```shell Terminal
|
||||
crewai traces status
|
||||
```
|
||||
|
||||
#### How Tracing Works
|
||||
|
||||
Trace collection is controlled by checking three settings in priority order:
|
||||
|
||||
1. **Explicit flag in code** (highest priority - can enable OR disable):
|
||||
```python
|
||||
crew = Crew(agents=[...], tasks=[...], tracing=True) # Always enable
|
||||
crew = Crew(agents=[...], tasks=[...], tracing=False) # Always disable
|
||||
crew = Crew(agents=[...], tasks=[...]) # Check lower priorities (default)
|
||||
```
|
||||
- `tracing=True` will **always enable** tracing (overrides everything)
|
||||
- `tracing=False` will **always disable** tracing (overrides everything)
|
||||
- `tracing=None` or omitted will check lower priority settings
|
||||
|
||||
2. **Environment variable** (second priority):
|
||||
```env
|
||||
CREWAI_TRACING_ENABLED=true
|
||||
```
|
||||
- Checked only if `tracing` is not explicitly set to `True` or `False` in code
|
||||
- Set to `true` or `1` to enable tracing
|
||||
|
||||
3. **User preference** (lowest priority):
|
||||
```shell Terminal
|
||||
crewai traces enable
|
||||
```
|
||||
- Checked only if `tracing` is not set in code and `CREWAI_TRACING_ENABLED` is not set to `true`
|
||||
- Running `crewai traces enable` is sufficient to enable tracing by itself
|
||||
|
||||
<Note>
|
||||
**To enable tracing**, use any one of these methods:
|
||||
- Set `tracing=True` in your Crew/Flow code, OR
|
||||
- Add `CREWAI_TRACING_ENABLED=true` to your `.env` file, OR
|
||||
- Run `crewai traces enable`
|
||||
|
||||
**To disable tracing**, use any ONE of these methods:
|
||||
- Set `tracing=False` in your Crew/Flow code (overrides everything), OR
|
||||
- Remove or set to `false` the `CREWAI_TRACING_ENABLED` env var, OR
|
||||
- Run `crewai traces disable`
|
||||
|
||||
Higher priority settings override lower ones.
|
||||
</Note>
|
||||
|
||||
<Tip>
|
||||
For more information about tracing, see the [Tracing documentation](/observability/tracing).
|
||||
</Tip>
|
||||
|
||||
<Tip>
|
||||
CrewAI CLI handles authentication to the Tool Repository automatically when adding packages to your project. Just append `crewai` before any `uv` command to use it. E.g. `crewai uv add requests`. For more information, see [Tool Repository](https://docs.crewai.com/enterprise/features/tool-repository) docs.
|
||||
</Tip>
|
||||
|
||||
@@ -739,7 +739,7 @@ class KnowledgeMonitorListener(BaseEventListener):
|
||||
knowledge_monitor = KnowledgeMonitorListener()
|
||||
```
|
||||
|
||||
For more information on using events, see the [Event Listeners](https://docs.crewai.com/concepts/event-listener) documentation.
|
||||
For more information on using events, see the [Event Listeners](/en/concepts/event-listener) documentation.
|
||||
|
||||
### Custom Knowledge Sources
|
||||
|
||||
|
||||
@@ -1035,7 +1035,7 @@ CrewAI supports streaming responses from LLMs, allowing your application to rece
|
||||
```
|
||||
|
||||
<Tip>
|
||||
[Click here](https://docs.crewai.com/concepts/event-listener#event-listeners) for more details
|
||||
[Click here](/en/concepts/event-listener#event-listeners) for more details
|
||||
</Tip>
|
||||
</Tab>
|
||||
|
||||
@@ -1200,6 +1200,52 @@ Learn how to get the most out of your LLM configuration:
|
||||
)
|
||||
```
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Transport Interceptors">
|
||||
CrewAI provides message interceptors for several providers, allowing you to hook into request/response cycles at the transport layer.
|
||||
|
||||
**Supported Providers:**
|
||||
- ✅ OpenAI
|
||||
- ✅ Anthropic
|
||||
|
||||
**Basic Usage:**
|
||||
```python
|
||||
import httpx
|
||||
from crewai import LLM
|
||||
from crewai.llms.hooks import BaseInterceptor
|
||||
|
||||
class CustomInterceptor(BaseInterceptor[httpx.Request, httpx.Response]):
|
||||
"""Custom interceptor to modify requests and responses."""
|
||||
|
||||
def on_outbound(self, request: httpx.Request) -> httpx.Request:
|
||||
"""Print request before sending to the LLM provider."""
|
||||
print(request)
|
||||
return request
|
||||
|
||||
def on_inbound(self, response: httpx.Response) -> httpx.Response:
|
||||
"""Process response after receiving from the LLM provider."""
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Response time: {response.elapsed}")
|
||||
return response
|
||||
|
||||
# Use the interceptor with an LLM
|
||||
llm = LLM(
|
||||
model="openai/gpt-4o",
|
||||
interceptor=CustomInterceptor()
|
||||
)
|
||||
```
|
||||
|
||||
**Important Notes:**
|
||||
- Both methods must return the received object or type of object.
|
||||
- Modifying received objects may result in unexpected behavior or application crashes.
|
||||
- Not all providers support interceptors - check the supported providers list above
|
||||
|
||||
<Info>
|
||||
Interceptors operate at the transport layer. This is particularly useful for:
|
||||
- Message transformation and filtering
|
||||
- Debugging API interactions
|
||||
</Info>
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
|
||||
## Common Issues and Solutions
|
||||
|
||||
@@ -60,6 +60,7 @@ crew = Crew(
|
||||
| **Output Pydantic** _(optional)_ | `output_pydantic` | `Optional[Type[BaseModel]]` | A Pydantic model for task output. |
|
||||
| **Callback** _(optional)_ | `callback` | `Optional[Any]` | Function/object to be executed after task completion. |
|
||||
| **Guardrail** _(optional)_ | `guardrail` | `Optional[Callable]` | Function to validate task output before proceeding to next task. |
|
||||
| **Guardrails** _(optional)_ | `guardrails` | `Optional[List[Callable] | List[str]]` | List of guardrails to validate task output before proceeding to next task. |
|
||||
| **Guardrail Max Retries** _(optional)_ | `guardrail_max_retries` | `Optional[int]` | Maximum number of retries when guardrail validation fails. Defaults to 3. |
|
||||
|
||||
<Note type="warning" title="Deprecated: max_retries">
|
||||
@@ -223,6 +224,7 @@ By default, the `TaskOutput` will only include the `raw` output. A `TaskOutput`
|
||||
| **JSON Dict** | `json_dict` | `Optional[Dict[str, Any]]` | A dictionary representing the JSON output of the task. |
|
||||
| **Agent** | `agent` | `str` | The agent that executed the task. |
|
||||
| **Output Format** | `output_format` | `OutputFormat` | The format of the task output, with options including RAW, JSON, and Pydantic. The default is RAW. |
|
||||
| **Messages** | `messages` | `list[LLMMessage]` | The messages from the last task execution. |
|
||||
|
||||
### Task Methods and Properties
|
||||
|
||||
@@ -341,7 +343,11 @@ Task guardrails provide a way to validate and transform task outputs before they
|
||||
are passed to the next task. This feature helps ensure data quality and provides
|
||||
feedback to agents when their output doesn't meet specific criteria.
|
||||
|
||||
Guardrails are implemented as Python functions that contain custom validation logic, giving you complete control over the validation process and ensuring reliable, deterministic results.
|
||||
CrewAI supports two types of guardrails:
|
||||
|
||||
1. **Function-based guardrails**: Python functions with custom validation logic, giving you complete control over the validation process and ensuring reliable, deterministic results.
|
||||
|
||||
2. **LLM-based guardrails**: String descriptions that use the agent's LLM to validate outputs based on natural language criteria. These are ideal for complex or subjective validation requirements.
|
||||
|
||||
### Function-Based Guardrails
|
||||
|
||||
@@ -355,12 +361,12 @@ def validate_blog_content(result: TaskOutput) -> Tuple[bool, Any]:
|
||||
"""Validate blog content meets requirements."""
|
||||
try:
|
||||
# Check word count
|
||||
word_count = len(result.split())
|
||||
word_count = len(result.raw.split())
|
||||
if word_count > 200:
|
||||
return (False, "Blog content exceeds 200 words")
|
||||
|
||||
# Additional validation logic here
|
||||
return (True, result.strip())
|
||||
return (True, result.raw.strip())
|
||||
except Exception as e:
|
||||
return (False, "Unexpected error during validation")
|
||||
|
||||
@@ -372,6 +378,147 @@ blog_task = Task(
|
||||
)
|
||||
```
|
||||
|
||||
### LLM-Based Guardrails (String Descriptions)
|
||||
|
||||
Instead of writing custom validation functions, you can use string descriptions that leverage LLM-based validation. When you provide a string to the `guardrail` or `guardrails` parameter, CrewAI automatically creates an `LLMGuardrail` that uses the agent's LLM to validate the output based on your description.
|
||||
|
||||
**Requirements**:
|
||||
- The task must have an `agent` assigned (the guardrail uses the agent's LLM)
|
||||
- Provide a clear, descriptive string explaining the validation criteria
|
||||
|
||||
```python Code
|
||||
from crewai import Task
|
||||
|
||||
# Single LLM-based guardrail
|
||||
blog_task = Task(
|
||||
description="Write a blog post about AI",
|
||||
expected_output="A blog post under 200 words",
|
||||
agent=blog_agent,
|
||||
guardrail="The blog post must be under 200 words and contain no technical jargon"
|
||||
)
|
||||
```
|
||||
|
||||
LLM-based guardrails are particularly useful for:
|
||||
- **Complex validation logic** that's difficult to express programmatically
|
||||
- **Subjective criteria** like tone, style, or quality assessments
|
||||
- **Natural language requirements** that are easier to describe than code
|
||||
|
||||
The LLM guardrail will:
|
||||
1. Analyze the task output against your description
|
||||
2. Return `(True, output)` if the output complies with the criteria
|
||||
3. Return `(False, feedback)` with specific feedback if validation fails
|
||||
|
||||
**Example with detailed validation criteria**:
|
||||
|
||||
```python Code
|
||||
research_task = Task(
|
||||
description="Research the latest developments in quantum computing",
|
||||
expected_output="A comprehensive research report",
|
||||
agent=researcher_agent,
|
||||
guardrail="""
|
||||
The research report must:
|
||||
- Be at least 1000 words long
|
||||
- Include at least 5 credible sources
|
||||
- Cover both technical and practical applications
|
||||
- Be written in a professional, academic tone
|
||||
- Avoid speculation or unverified claims
|
||||
"""
|
||||
)
|
||||
```
|
||||
|
||||
### Multiple Guardrails
|
||||
|
||||
You can apply multiple guardrails to a task using the `guardrails` parameter. Multiple guardrails are executed sequentially, with each guardrail receiving the output from the previous one. This allows you to chain validation and transformation steps.
|
||||
|
||||
The `guardrails` parameter accepts:
|
||||
- A list of guardrail functions or string descriptions
|
||||
- A single guardrail function or string (same as `guardrail`)
|
||||
|
||||
**Note**: If `guardrails` is provided, it takes precedence over `guardrail`. The `guardrail` parameter will be ignored when `guardrails` is set.
|
||||
|
||||
```python Code
|
||||
from typing import Tuple, Any
|
||||
from crewai import TaskOutput, Task
|
||||
|
||||
def validate_word_count(result: TaskOutput) -> Tuple[bool, Any]:
|
||||
"""Validate word count is within limits."""
|
||||
word_count = len(result.raw.split())
|
||||
if word_count < 100:
|
||||
return (False, f"Content too short: {word_count} words. Need at least 100 words.")
|
||||
if word_count > 500:
|
||||
return (False, f"Content too long: {word_count} words. Maximum is 500 words.")
|
||||
return (True, result.raw)
|
||||
|
||||
def validate_no_profanity(result: TaskOutput) -> Tuple[bool, Any]:
|
||||
"""Check for inappropriate language."""
|
||||
profanity_words = ["badword1", "badword2"] # Example list
|
||||
content_lower = result.raw.lower()
|
||||
for word in profanity_words:
|
||||
if word in content_lower:
|
||||
return (False, f"Inappropriate language detected: {word}")
|
||||
return (True, result.raw)
|
||||
|
||||
def format_output(result: TaskOutput) -> Tuple[bool, Any]:
|
||||
"""Format and clean the output."""
|
||||
formatted = result.raw.strip()
|
||||
# Capitalize first letter
|
||||
formatted = formatted[0].upper() + formatted[1:] if formatted else formatted
|
||||
return (True, formatted)
|
||||
|
||||
# Apply multiple guardrails sequentially
|
||||
blog_task = Task(
|
||||
description="Write a blog post about AI",
|
||||
expected_output="A well-formatted blog post between 100-500 words",
|
||||
agent=blog_agent,
|
||||
guardrails=[
|
||||
validate_word_count, # First: validate length
|
||||
validate_no_profanity, # Second: check content
|
||||
format_output # Third: format the result
|
||||
],
|
||||
guardrail_max_retries=3
|
||||
)
|
||||
```
|
||||
|
||||
In this example, the guardrails execute in order:
|
||||
1. `validate_word_count` checks the word count
|
||||
2. `validate_no_profanity` checks for inappropriate language (using the output from step 1)
|
||||
3. `format_output` formats the final result (using the output from step 2)
|
||||
|
||||
If any guardrail fails, the error is sent back to the agent, and the task is retried up to `guardrail_max_retries` times.
|
||||
|
||||
**Mixing function-based and LLM-based guardrails**:
|
||||
|
||||
You can combine both function-based and string-based guardrails in the same list:
|
||||
|
||||
```python Code
|
||||
from typing import Tuple, Any
|
||||
from crewai import TaskOutput, Task
|
||||
|
||||
def validate_word_count(result: TaskOutput) -> Tuple[bool, Any]:
|
||||
"""Validate word count is within limits."""
|
||||
word_count = len(result.raw.split())
|
||||
if word_count < 100:
|
||||
return (False, f"Content too short: {word_count} words. Need at least 100 words.")
|
||||
if word_count > 500:
|
||||
return (False, f"Content too long: {word_count} words. Maximum is 500 words.")
|
||||
return (True, result.raw)
|
||||
|
||||
# Mix function-based and LLM-based guardrails
|
||||
blog_task = Task(
|
||||
description="Write a blog post about AI",
|
||||
expected_output="A well-formatted blog post between 100-500 words",
|
||||
agent=blog_agent,
|
||||
guardrails=[
|
||||
validate_word_count, # Function-based: precise word count check
|
||||
"The content must be engaging and suitable for a general audience", # LLM-based: subjective quality check
|
||||
"The writing style should be clear, concise, and free of technical jargon" # LLM-based: style validation
|
||||
],
|
||||
guardrail_max_retries=3
|
||||
)
|
||||
```
|
||||
|
||||
This approach combines the precision of programmatic validation with the flexibility of LLM-based assessment for subjective criteria.
|
||||
|
||||
### Guardrail Function Requirements
|
||||
|
||||
1. **Function Signature**:
|
||||
|
||||
@@ -37,7 +37,7 @@ you can use them locally or refine them to your needs.
|
||||
<Card title="Tools & Integrations" href="/en/enterprise/features/tools-and-integrations" icon="wrench">
|
||||
Connect external apps and manage internal tools your agents can use.
|
||||
</Card>
|
||||
<Card title="Tool Repository" href="/en/enterprise/features/tool-repository" icon="toolbox">
|
||||
<Card title="Tool Repository" href="/en/enterprise/guides/tool-repository#tool-repository" icon="toolbox">
|
||||
Publish and install tools to enhance your crews' capabilities.
|
||||
</Card>
|
||||
<Card title="Agents Repository" href="/en/enterprise/features/agent-repositories" icon="people-group">
|
||||
|
||||
@@ -241,7 +241,7 @@ Tools & Integrations is the central hub for connecting third‑party apps and ma
|
||||
## Related
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Tool Repository" href="/en/enterprise/features/tool-repository" icon="toolbox">
|
||||
<Card title="Tool Repository" href="/en/enterprise/guides/tool-repository#tool-repository" icon="toolbox">
|
||||
Create, publish, and version custom tools for your organization.
|
||||
</Card>
|
||||
<Card title="Webhook Automation" href="/en/enterprise/guides/webhook-automation" icon="bolt">
|
||||
|
||||
@@ -21,7 +21,7 @@ The repository is not a version control system. Use Git to track code changes an
|
||||
Before using the Tool Repository, ensure you have:
|
||||
|
||||
- A [CrewAI AMP](https://app.crewai.com) account
|
||||
- [CrewAI CLI](https://docs.crewai.com/concepts/cli#cli) installed
|
||||
- [CrewAI CLI](/en/concepts/cli#cli) installed
|
||||
- uv>=0.5.0 installed. Check out [how to upgrade](https://docs.astral.sh/uv/getting-started/installation/#upgrading-uv)
|
||||
- [Git](https://git-scm.com) installed and configured
|
||||
- Access permissions to publish or install tools in your CrewAI AMP organization
|
||||
@@ -112,7 +112,7 @@ By default, tools are published as private. To make a tool public:
|
||||
crewai tool publish --public
|
||||
```
|
||||
|
||||
For more details on how to build tools, see [Creating your own tools](https://docs.crewai.com/concepts/tools#creating-your-own-tools).
|
||||
For more details on how to build tools, see [Creating your own tools](/en/concepts/tools#creating-your-own-tools).
|
||||
|
||||
## Updating Tools
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ mode: "wide"
|
||||
|
||||
To integrate human input into agent execution, set the `human_input` flag in the task definition. When enabled, the agent prompts the user for input before delivering its final answer. This input can provide extra context, clarify ambiguities, or validate the agent's output.
|
||||
|
||||
For detailed implementation guidance, see our [Human-in-the-Loop guide](/en/how-to/human-in-the-loop).
|
||||
For detailed implementation guidance, see our [Human-in-the-Loop guide](/en/enterprise/guides/human-in-the-loop).
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="What advanced customization options are available for tailoring and enhancing agent behavior and capabilities in CrewAI?">
|
||||
@@ -142,7 +142,7 @@ mode: "wide"
|
||||
<Accordion title="How can I create custom tools for my CrewAI agents?">
|
||||
You can create custom tools by subclassing the `BaseTool` class provided by CrewAI or by using the tool decorator. Subclassing involves defining a new class that inherits from `BaseTool`, specifying the name, description, and the `_run` method for operational logic. The tool decorator allows you to create a `Tool` object directly with the required attributes and a functional logic.
|
||||
|
||||
<Card href="https://docs.crewai.com/how-to/create-custom-tools" icon="code">CrewAI Tools Guide</Card>
|
||||
<Card href="/en/learn/create-custom-tools" icon="code">CrewAI Tools Guide</Card>
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="How can you control the maximum number of requests per minute that the entire crew can perform?">
|
||||
|
||||
295
docs/en/learn/a2a-agent-delegation.mdx
Normal file
295
docs/en/learn/a2a-agent-delegation.mdx
Normal file
@@ -0,0 +1,295 @@
|
||||
---
|
||||
title: Agent-to-Agent (A2A) Protocol
|
||||
description: Enable CrewAI agents to delegate tasks to remote A2A-compliant agents for specialized handling
|
||||
icon: network-wired
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
## A2A Agent Delegation
|
||||
|
||||
CrewAI supports the Agent-to-Agent (A2A) protocol, allowing agents to delegate tasks to remote specialized agents. The agent's LLM automatically decides whether to handle a task directly or delegate to an A2A agent based on the task requirements.
|
||||
|
||||
<Note>
|
||||
A2A delegation requires the `a2a-sdk` package. Install with: `uv add 'crewai[a2a]'` or `pip install 'crewai[a2a]'`
|
||||
</Note>
|
||||
|
||||
## How It Works
|
||||
|
||||
When an agent is configured with A2A capabilities:
|
||||
|
||||
1. The LLM analyzes each task
|
||||
2. It decides to either:
|
||||
- Handle the task directly using its own capabilities
|
||||
- Delegate to a remote A2A agent for specialized handling
|
||||
3. If delegating, the agent communicates with the remote A2A agent through the protocol
|
||||
4. Results are returned to the CrewAI workflow
|
||||
|
||||
## Basic Configuration
|
||||
|
||||
Configure an agent for A2A delegation by setting the `a2a` parameter:
|
||||
|
||||
```python Code
|
||||
from crewai import Agent, Crew, Task
|
||||
from crewai.a2a import A2AConfig
|
||||
|
||||
agent = Agent(
|
||||
role="Research Coordinator",
|
||||
goal="Coordinate research tasks efficiently",
|
||||
backstory="Expert at delegating to specialized research agents",
|
||||
llm="gpt-4o",
|
||||
a2a=A2AConfig(
|
||||
endpoint="https://example.com/.well-known/agent-card.json",
|
||||
timeout=120,
|
||||
max_turns=10
|
||||
)
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Research the latest developments in quantum computing",
|
||||
expected_output="A comprehensive research report",
|
||||
agent=agent
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task], verbose=True)
|
||||
result = crew.kickoff()
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
The `A2AConfig` class accepts the following parameters:
|
||||
|
||||
<ParamField path="endpoint" type="str" required>
|
||||
The A2A agent endpoint URL (typically points to `.well-known/agent-card.json`)
|
||||
</ParamField>
|
||||
|
||||
<ParamField path="auth" type="AuthScheme" default="None">
|
||||
Authentication scheme for the A2A agent. Supports Bearer tokens, OAuth2, API keys, and HTTP authentication.
|
||||
</ParamField>
|
||||
|
||||
<ParamField path="timeout" type="int" default="120">
|
||||
Request timeout in seconds
|
||||
</ParamField>
|
||||
|
||||
<ParamField path="max_turns" type="int" default="10">
|
||||
Maximum number of conversation turns with the A2A agent
|
||||
</ParamField>
|
||||
|
||||
<ParamField path="response_model" type="type[BaseModel]" default="None">
|
||||
Optional Pydantic model for requesting structured output from an A2A agent. A2A protocol does not
|
||||
enforce this, so an A2A agent does not need to honor this request.
|
||||
</ParamField>
|
||||
|
||||
<ParamField path="fail_fast" type="bool" default="True">
|
||||
Whether to raise an error immediately if agent connection fails. When `False`, the agent continues with available agents and informs the LLM about unavailable ones.
|
||||
</ParamField>
|
||||
|
||||
<ParamField path="trust_remote_completion_status" type="bool" default="False">
|
||||
When `True`, returns the A2A agent's result directly when it signals completion. When `False`, allows the server agent to review the result and potentially continue the conversation.
|
||||
</ParamField>
|
||||
|
||||
## Authentication
|
||||
|
||||
For A2A agents that require authentication, use one of the provided auth schemes:
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Bearer Token">
|
||||
```python Code
|
||||
from crewai.a2a import A2AConfig
|
||||
from crewai.a2a.auth import BearerTokenAuth
|
||||
|
||||
agent = Agent(
|
||||
role="Secure Coordinator",
|
||||
goal="Coordinate tasks with secured agents",
|
||||
backstory="Manages secure agent communications",
|
||||
llm="gpt-4o",
|
||||
a2a=A2AConfig(
|
||||
endpoint="https://secure-agent.example.com/.well-known/agent-card.json",
|
||||
auth=BearerTokenAuth(token="your-bearer-token"),
|
||||
timeout=120
|
||||
)
|
||||
)
|
||||
```
|
||||
</Tab>
|
||||
|
||||
<Tab title="API Key">
|
||||
```python Code
|
||||
from crewai.a2a import A2AConfig
|
||||
from crewai.a2a.auth import APIKeyAuth
|
||||
|
||||
agent = Agent(
|
||||
role="API Coordinator",
|
||||
goal="Coordinate with API-based agents",
|
||||
backstory="Manages API-authenticated communications",
|
||||
llm="gpt-4o",
|
||||
a2a=A2AConfig(
|
||||
endpoint="https://api-agent.example.com/.well-known/agent-card.json",
|
||||
auth=APIKeyAuth(
|
||||
api_key="your-api-key",
|
||||
location="header", # or "query" or "cookie"
|
||||
name="X-API-Key"
|
||||
),
|
||||
timeout=120
|
||||
)
|
||||
)
|
||||
```
|
||||
</Tab>
|
||||
|
||||
<Tab title="OAuth2">
|
||||
```python Code
|
||||
from crewai.a2a import A2AConfig
|
||||
from crewai.a2a.auth import OAuth2ClientCredentials
|
||||
|
||||
agent = Agent(
|
||||
role="OAuth Coordinator",
|
||||
goal="Coordinate with OAuth-secured agents",
|
||||
backstory="Manages OAuth-authenticated communications",
|
||||
llm="gpt-4o",
|
||||
a2a=A2AConfig(
|
||||
endpoint="https://oauth-agent.example.com/.well-known/agent-card.json",
|
||||
auth=OAuth2ClientCredentials(
|
||||
token_url="https://auth.example.com/oauth/token",
|
||||
client_id="your-client-id",
|
||||
client_secret="your-client-secret",
|
||||
scopes=["read", "write"]
|
||||
),
|
||||
timeout=120
|
||||
)
|
||||
)
|
||||
```
|
||||
</Tab>
|
||||
|
||||
<Tab title="HTTP Basic">
|
||||
```python Code
|
||||
from crewai.a2a import A2AConfig
|
||||
from crewai.a2a.auth import HTTPBasicAuth
|
||||
|
||||
agent = Agent(
|
||||
role="Basic Auth Coordinator",
|
||||
goal="Coordinate with basic auth agents",
|
||||
backstory="Manages basic authentication communications",
|
||||
llm="gpt-4o",
|
||||
a2a=A2AConfig(
|
||||
endpoint="https://basic-agent.example.com/.well-known/agent-card.json",
|
||||
auth=HTTPBasicAuth(
|
||||
username="your-username",
|
||||
password="your-password"
|
||||
),
|
||||
timeout=120
|
||||
)
|
||||
)
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Multiple A2A Agents
|
||||
|
||||
Configure multiple A2A agents for delegation by passing a list:
|
||||
|
||||
```python Code
|
||||
from crewai.a2a import A2AConfig
|
||||
from crewai.a2a.auth import BearerTokenAuth
|
||||
|
||||
agent = Agent(
|
||||
role="Multi-Agent Coordinator",
|
||||
goal="Coordinate with multiple specialized agents",
|
||||
backstory="Expert at delegating to the right specialist",
|
||||
llm="gpt-4o",
|
||||
a2a=[
|
||||
A2AConfig(
|
||||
endpoint="https://research.example.com/.well-known/agent-card.json",
|
||||
timeout=120
|
||||
),
|
||||
A2AConfig(
|
||||
endpoint="https://data.example.com/.well-known/agent-card.json",
|
||||
auth=BearerTokenAuth(token="data-token"),
|
||||
timeout=90
|
||||
)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
The LLM will automatically choose which A2A agent to delegate to based on the task requirements.
|
||||
|
||||
## Error Handling
|
||||
|
||||
Control how agent connection failures are handled using the `fail_fast` parameter:
|
||||
|
||||
```python Code
|
||||
from crewai.a2a import A2AConfig
|
||||
|
||||
# Fail immediately on connection errors (default)
|
||||
agent = Agent(
|
||||
role="Research Coordinator",
|
||||
goal="Coordinate research tasks",
|
||||
backstory="Expert at delegation",
|
||||
llm="gpt-4o",
|
||||
a2a=A2AConfig(
|
||||
endpoint="https://research.example.com/.well-known/agent-card.json",
|
||||
fail_fast=True
|
||||
)
|
||||
)
|
||||
|
||||
# Continue with available agents
|
||||
agent = Agent(
|
||||
role="Multi-Agent Coordinator",
|
||||
goal="Coordinate with multiple agents",
|
||||
backstory="Expert at working with available resources",
|
||||
llm="gpt-4o",
|
||||
a2a=[
|
||||
A2AConfig(
|
||||
endpoint="https://primary.example.com/.well-known/agent-card.json",
|
||||
fail_fast=False
|
||||
),
|
||||
A2AConfig(
|
||||
endpoint="https://backup.example.com/.well-known/agent-card.json",
|
||||
fail_fast=False
|
||||
)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
When `fail_fast=False`:
|
||||
- If some agents fail, the LLM is informed which agents are unavailable and can delegate to working agents
|
||||
- If all agents fail, the LLM receives a notice about unavailable agents and handles the task directly
|
||||
- Connection errors are captured and included in the context for better decision-making
|
||||
|
||||
## Best Practices
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Set Appropriate Timeouts" icon="clock">
|
||||
Configure timeouts based on expected A2A agent response times. Longer-running tasks may need higher timeout values.
|
||||
</Card>
|
||||
|
||||
<Card title="Limit Conversation Turns" icon="comments">
|
||||
Use `max_turns` to prevent excessive back-and-forth. The agent will automatically conclude conversations before hitting the limit.
|
||||
</Card>
|
||||
|
||||
<Card title="Use Resilient Error Handling" icon="shield-check">
|
||||
Set `fail_fast=False` for production environments with multiple agents to gracefully handle connection failures and maintain workflow continuity.
|
||||
</Card>
|
||||
|
||||
<Card title="Secure Your Credentials" icon="lock">
|
||||
Store authentication tokens and credentials as environment variables, not in code.
|
||||
</Card>
|
||||
|
||||
<Card title="Monitor Delegation Decisions" icon="eye">
|
||||
Use verbose mode to observe when the LLM chooses to delegate versus handle tasks directly.
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## Supported Authentication Methods
|
||||
|
||||
- **Bearer Token** - Simple token-based authentication
|
||||
- **OAuth2 Client Credentials** - OAuth2 flow for machine-to-machine communication
|
||||
- **OAuth2 Authorization Code** - OAuth2 flow requiring user authorization
|
||||
- **API Key** - Key-based authentication (header, query param, or cookie)
|
||||
- **HTTP Basic** - Username/password authentication
|
||||
- **HTTP Digest** - Digest authentication (requires `httpx-auth` package)
|
||||
|
||||
## Learn More
|
||||
|
||||
For more information about the A2A protocol and reference implementations:
|
||||
|
||||
- [A2A Protocol Documentation](https://a2a-protocol.org)
|
||||
- [A2A Sample Implementations](https://github.com/a2aproject/a2a-samples)
|
||||
- [A2A Python SDK](https://github.com/a2aproject/a2a-python)
|
||||
522
docs/en/learn/execution-hooks.mdx
Normal file
522
docs/en/learn/execution-hooks.mdx
Normal file
@@ -0,0 +1,522 @@
|
||||
---
|
||||
title: Execution Hooks Overview
|
||||
description: Understanding and using execution hooks in CrewAI for fine-grained control over agent operations
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
Execution Hooks provide fine-grained control over the runtime behavior of your CrewAI agents. Unlike kickoff hooks that run before and after crew execution, execution hooks intercept specific operations during agent execution, allowing you to modify behavior, implement safety checks, and add comprehensive monitoring.
|
||||
|
||||
## Types of Execution Hooks
|
||||
|
||||
CrewAI provides two main categories of execution hooks:
|
||||
|
||||
### 1. [LLM Call Hooks](/learn/llm-hooks)
|
||||
|
||||
Control and monitor language model interactions:
|
||||
- **Before LLM Call**: Modify prompts, validate inputs, implement approval gates
|
||||
- **After LLM Call**: Transform responses, sanitize outputs, update conversation history
|
||||
|
||||
**Use Cases:**
|
||||
- Iteration limiting
|
||||
- Cost tracking and token usage monitoring
|
||||
- Response sanitization and content filtering
|
||||
- Human-in-the-loop approval for LLM calls
|
||||
- Adding safety guidelines or context
|
||||
- Debug logging and request/response inspection
|
||||
|
||||
[View LLM Hooks Documentation →](/learn/llm-hooks)
|
||||
|
||||
### 2. [Tool Call Hooks](/learn/tool-hooks)
|
||||
|
||||
Control and monitor tool execution:
|
||||
- **Before Tool Call**: Modify inputs, validate parameters, block dangerous operations
|
||||
- **After Tool Call**: Transform results, sanitize outputs, log execution details
|
||||
|
||||
**Use Cases:**
|
||||
- Safety guardrails for destructive operations
|
||||
- Human approval for sensitive actions
|
||||
- Input validation and sanitization
|
||||
- Result caching and rate limiting
|
||||
- Tool usage analytics
|
||||
- Debug logging and monitoring
|
||||
|
||||
[View Tool Hooks Documentation →](/learn/tool-hooks)
|
||||
|
||||
## Hook Registration Methods
|
||||
|
||||
### 1. Decorator-Based Hooks (Recommended)
|
||||
|
||||
The cleanest and most Pythonic way to register hooks:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_llm_call, after_llm_call, before_tool_call, after_tool_call
|
||||
|
||||
@before_llm_call
|
||||
def limit_iterations(context):
|
||||
"""Prevent infinite loops by limiting iterations."""
|
||||
if context.iterations > 10:
|
||||
return False # Block execution
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def sanitize_response(context):
|
||||
"""Remove sensitive data from LLM responses."""
|
||||
if "API_KEY" in context.response:
|
||||
return context.response.replace("API_KEY", "[REDACTED]")
|
||||
return None
|
||||
|
||||
@before_tool_call
|
||||
def block_dangerous_tools(context):
|
||||
"""Block destructive operations."""
|
||||
if context.tool_name == "delete_database":
|
||||
return False # Block execution
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def log_tool_result(context):
|
||||
"""Log tool execution."""
|
||||
print(f"Tool {context.tool_name} completed")
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. Crew-Scoped Hooks
|
||||
|
||||
Apply hooks only to specific crew instances:
|
||||
|
||||
```python
|
||||
from crewai import CrewBase
|
||||
from crewai.project import crew
|
||||
from crewai.hooks import before_llm_call_crew, after_tool_call_crew
|
||||
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_llm_call_crew
|
||||
def validate_inputs(self, context):
|
||||
# Only applies to this crew
|
||||
print(f"LLM call in {self.__class__.__name__}")
|
||||
return None
|
||||
|
||||
@after_tool_call_crew
|
||||
def log_results(self, context):
|
||||
# Crew-specific logging
|
||||
print(f"Tool result: {context.tool_result[:50]}...")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential
|
||||
)
|
||||
```
|
||||
|
||||
## Hook Execution Flow
|
||||
|
||||
### LLM Call Flow
|
||||
|
||||
```
|
||||
Agent needs to call LLM
|
||||
↓
|
||||
[Before LLM Call Hooks Execute]
|
||||
├→ Hook 1: Validate iteration count
|
||||
├→ Hook 2: Add safety context
|
||||
└→ Hook 3: Log request
|
||||
↓
|
||||
If any hook returns False:
|
||||
├→ Block LLM call
|
||||
└→ Raise ValueError
|
||||
↓
|
||||
If all hooks return True/None:
|
||||
├→ LLM call proceeds
|
||||
└→ Response generated
|
||||
↓
|
||||
[After LLM Call Hooks Execute]
|
||||
├→ Hook 1: Sanitize response
|
||||
├→ Hook 2: Log response
|
||||
└→ Hook 3: Update metrics
|
||||
↓
|
||||
Final response returned
|
||||
```
|
||||
|
||||
### Tool Call Flow
|
||||
|
||||
```
|
||||
Agent needs to execute tool
|
||||
↓
|
||||
[Before Tool Call Hooks Execute]
|
||||
├→ Hook 1: Check if tool is allowed
|
||||
├→ Hook 2: Validate inputs
|
||||
└→ Hook 3: Request approval if needed
|
||||
↓
|
||||
If any hook returns False:
|
||||
├→ Block tool execution
|
||||
└→ Return error message
|
||||
↓
|
||||
If all hooks return True/None:
|
||||
├→ Tool execution proceeds
|
||||
└→ Result generated
|
||||
↓
|
||||
[After Tool Call Hooks Execute]
|
||||
├→ Hook 1: Sanitize result
|
||||
├→ Hook 2: Cache result
|
||||
└→ Hook 3: Log metrics
|
||||
↓
|
||||
Final result returned
|
||||
```
|
||||
|
||||
## Hook Context Objects
|
||||
|
||||
### LLMCallHookContext
|
||||
|
||||
Provides access to LLM execution state:
|
||||
|
||||
```python
|
||||
class LLMCallHookContext:
|
||||
executor: CrewAgentExecutor # Full executor access
|
||||
messages: list # Mutable message list
|
||||
agent: Agent # Current agent
|
||||
task: Task # Current task
|
||||
crew: Crew # Crew instance
|
||||
llm: BaseLLM # LLM instance
|
||||
iterations: int # Current iteration
|
||||
response: str | None # LLM response (after hooks)
|
||||
```
|
||||
|
||||
### ToolCallHookContext
|
||||
|
||||
Provides access to tool execution state:
|
||||
|
||||
```python
|
||||
class ToolCallHookContext:
|
||||
tool_name: str # Tool being called
|
||||
tool_input: dict # Mutable input parameters
|
||||
tool: CrewStructuredTool # Tool instance
|
||||
agent: Agent | None # Agent executing
|
||||
task: Task | None # Current task
|
||||
crew: Crew | None # Crew instance
|
||||
tool_result: str | None # Tool result (after hooks)
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Safety and Validation
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safety_check(context):
|
||||
"""Block destructive operations."""
|
||||
dangerous = ['delete_file', 'drop_table', 'system_shutdown']
|
||||
if context.tool_name in dangerous:
|
||||
print(f"🛑 Blocked: {context.tool_name}")
|
||||
return False
|
||||
return None
|
||||
|
||||
@before_llm_call
|
||||
def iteration_limit(context):
|
||||
"""Prevent infinite loops."""
|
||||
if context.iterations > 15:
|
||||
print("⛔ Maximum iterations exceeded")
|
||||
return False
|
||||
return None
|
||||
```
|
||||
|
||||
### Human-in-the-Loop
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def require_approval(context):
|
||||
"""Require approval for sensitive operations."""
|
||||
sensitive = ['send_email', 'make_payment', 'post_message']
|
||||
|
||||
if context.tool_name in sensitive:
|
||||
response = context.request_human_input(
|
||||
prompt=f"Approve {context.tool_name}?",
|
||||
default_message="Type 'yes' to approve:"
|
||||
)
|
||||
|
||||
if response.lower() != 'yes':
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### Monitoring and Analytics
|
||||
|
||||
```python
|
||||
from collections import defaultdict
|
||||
import time
|
||||
|
||||
metrics = defaultdict(lambda: {'count': 0, 'total_time': 0})
|
||||
|
||||
@before_tool_call
|
||||
def start_timer(context):
|
||||
context.tool_input['_start'] = time.time()
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def track_metrics(context):
|
||||
start = context.tool_input.get('_start', time.time())
|
||||
duration = time.time() - start
|
||||
|
||||
metrics[context.tool_name]['count'] += 1
|
||||
metrics[context.tool_name]['total_time'] += duration
|
||||
|
||||
return None
|
||||
|
||||
# View metrics
|
||||
def print_metrics():
|
||||
for tool, data in metrics.items():
|
||||
avg = data['total_time'] / data['count']
|
||||
print(f"{tool}: {data['count']} calls, {avg:.2f}s avg")
|
||||
```
|
||||
|
||||
### Response Sanitization
|
||||
|
||||
```python
|
||||
import re
|
||||
|
||||
@after_llm_call
|
||||
def sanitize_llm_response(context):
|
||||
"""Remove sensitive data from LLM responses."""
|
||||
if not context.response:
|
||||
return None
|
||||
|
||||
result = context.response
|
||||
result = re.sub(r'(api[_-]?key)["\']?\s*[:=]\s*["\']?[\w-]+',
|
||||
r'\1: [REDACTED]', result, flags=re.IGNORECASE)
|
||||
return result
|
||||
|
||||
@after_tool_call
|
||||
def sanitize_tool_result(context):
|
||||
"""Remove sensitive data from tool results."""
|
||||
if not context.tool_result:
|
||||
return None
|
||||
|
||||
result = context.tool_result
|
||||
result = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
|
||||
'[EMAIL-REDACTED]', result)
|
||||
return result
|
||||
```
|
||||
|
||||
## Hook Management
|
||||
|
||||
### Clearing All Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import clear_all_global_hooks
|
||||
|
||||
# Clear all hooks at once
|
||||
result = clear_all_global_hooks()
|
||||
print(f"Cleared {result['total']} hooks")
|
||||
# Output: {'llm_hooks': (2, 1), 'tool_hooks': (1, 2), 'total': (3, 3)}
|
||||
```
|
||||
|
||||
### Clearing Specific Hook Types
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_llm_call_hooks,
|
||||
clear_after_llm_call_hooks,
|
||||
clear_before_tool_call_hooks,
|
||||
clear_after_tool_call_hooks
|
||||
)
|
||||
|
||||
# Clear specific types
|
||||
llm_before_count = clear_before_llm_call_hooks()
|
||||
tool_after_count = clear_after_tool_call_hooks()
|
||||
```
|
||||
|
||||
### Unregistering Individual Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
unregister_before_llm_call_hook,
|
||||
unregister_after_tool_call_hook
|
||||
)
|
||||
|
||||
def my_hook(context):
|
||||
...
|
||||
|
||||
# Register
|
||||
register_before_llm_call_hook(my_hook)
|
||||
|
||||
# Later, unregister
|
||||
success = unregister_before_llm_call_hook(my_hook)
|
||||
print(f"Unregistered: {success}")
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Keep Hooks Focused
|
||||
Each hook should have a single, clear responsibility:
|
||||
|
||||
```python
|
||||
# ✅ Good - focused responsibility
|
||||
@before_tool_call
|
||||
def validate_file_path(context):
|
||||
if context.tool_name == 'read_file':
|
||||
if '..' in context.tool_input.get('path', ''):
|
||||
return False
|
||||
return None
|
||||
|
||||
# ❌ Bad - too many responsibilities
|
||||
@before_tool_call
|
||||
def do_everything(context):
|
||||
# Validation + logging + metrics + approval...
|
||||
...
|
||||
```
|
||||
|
||||
### 2. Handle Errors Gracefully
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def safe_hook(context):
|
||||
try:
|
||||
# Your logic
|
||||
if some_condition:
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Hook error: {e}")
|
||||
return None # Allow execution despite error
|
||||
```
|
||||
|
||||
### 3. Modify Context In-Place
|
||||
|
||||
```python
|
||||
# ✅ Correct - modify in-place
|
||||
@before_llm_call
|
||||
def add_context(context):
|
||||
context.messages.append({"role": "system", "content": "Be concise"})
|
||||
|
||||
# ❌ Wrong - replaces reference
|
||||
@before_llm_call
|
||||
def wrong_approach(context):
|
||||
context.messages = [{"role": "system", "content": "Be concise"}]
|
||||
```
|
||||
|
||||
### 4. Use Type Hints
|
||||
|
||||
```python
|
||||
from crewai.hooks import LLMCallHookContext, ToolCallHookContext
|
||||
|
||||
def my_llm_hook(context: LLMCallHookContext) -> bool | None:
|
||||
# IDE autocomplete and type checking
|
||||
return None
|
||||
|
||||
def my_tool_hook(context: ToolCallHookContext) -> str | None:
|
||||
return None
|
||||
```
|
||||
|
||||
### 5. Clean Up in Tests
|
||||
|
||||
```python
|
||||
import pytest
|
||||
from crewai.hooks import clear_all_global_hooks
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_hooks():
|
||||
"""Reset hooks before each test."""
|
||||
yield
|
||||
clear_all_global_hooks()
|
||||
```
|
||||
|
||||
## When to Use Which Hook
|
||||
|
||||
### Use LLM Hooks When:
|
||||
- Implementing iteration limits
|
||||
- Adding context or safety guidelines to prompts
|
||||
- Tracking token usage and costs
|
||||
- Sanitizing or transforming responses
|
||||
- Implementing approval gates for LLM calls
|
||||
- Debugging prompt/response interactions
|
||||
|
||||
### Use Tool Hooks When:
|
||||
- Blocking dangerous or destructive operations
|
||||
- Validating tool inputs before execution
|
||||
- Implementing approval gates for sensitive actions
|
||||
- Caching tool results
|
||||
- Tracking tool usage and performance
|
||||
- Sanitizing tool outputs
|
||||
- Rate limiting tool calls
|
||||
|
||||
### Use Both When:
|
||||
Building comprehensive observability, safety, or approval systems that need to monitor all agent operations.
|
||||
|
||||
## Alternative Registration Methods
|
||||
|
||||
### Programmatic Registration (Advanced)
|
||||
|
||||
For dynamic hook registration or when you need to register hooks programmatically:
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
register_before_llm_call_hook,
|
||||
register_after_tool_call_hook
|
||||
)
|
||||
|
||||
def my_hook(context):
|
||||
return None
|
||||
|
||||
# Register programmatically
|
||||
register_before_llm_call_hook(my_hook)
|
||||
|
||||
# Useful for:
|
||||
# - Loading hooks from configuration
|
||||
# - Conditional hook registration
|
||||
# - Plugin systems
|
||||
```
|
||||
|
||||
**Note:** For most use cases, decorators are cleaner and more maintainable.
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
1. **Keep Hooks Fast**: Hooks execute on every call - avoid heavy computation
|
||||
2. **Cache When Possible**: Store expensive validations or lookups
|
||||
3. **Be Selective**: Use crew-scoped hooks when global hooks aren't needed
|
||||
4. **Monitor Hook Overhead**: Profile hook execution time in production
|
||||
5. **Lazy Import**: Import heavy dependencies only when needed
|
||||
|
||||
## Debugging Hooks
|
||||
|
||||
### Enable Debug Logging
|
||||
|
||||
```python
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@before_llm_call
|
||||
def debug_hook(context):
|
||||
logger.debug(f"LLM call: {context.agent.role}, iteration {context.iterations}")
|
||||
return None
|
||||
```
|
||||
|
||||
### Hook Execution Order
|
||||
|
||||
Hooks execute in registration order. If a before hook returns `False`, subsequent hooks don't execute:
|
||||
|
||||
```python
|
||||
# Register order matters!
|
||||
register_before_tool_call_hook(hook1) # Executes first
|
||||
register_before_tool_call_hook(hook2) # Executes second
|
||||
register_before_tool_call_hook(hook3) # Executes third
|
||||
|
||||
# If hook2 returns False:
|
||||
# - hook1 executed
|
||||
# - hook2 executed and returned False
|
||||
# - hook3 NOT executed
|
||||
# - Tool call blocked
|
||||
```
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [LLM Call Hooks →](/learn/llm-hooks) - Detailed LLM hook documentation
|
||||
- [Tool Call Hooks →](/learn/tool-hooks) - Detailed tool hook documentation
|
||||
- [Before and After Kickoff Hooks →](/learn/before-and-after-kickoff-hooks) - Crew lifecycle hooks
|
||||
- [Human-in-the-Loop →](/learn/human-in-the-loop) - Human input patterns
|
||||
|
||||
## Conclusion
|
||||
|
||||
Execution hooks provide powerful control over agent runtime behavior. Use them to implement safety guardrails, approval workflows, comprehensive monitoring, and custom business logic. Combined with proper error handling, type safety, and performance considerations, hooks enable production-ready, secure, and observable agent systems.
|
||||
@@ -97,7 +97,7 @@ project_crew = Crew(
|
||||
```
|
||||
|
||||
<Tip>
|
||||
For more details on creating and customizing a manager agent, check out the [Custom Manager Agent documentation](https://docs.crewai.com/how-to/custom-manager-agent#custom-manager-agent).
|
||||
For more details on creating and customizing a manager agent, check out the [Custom Manager Agent documentation](/en/learn/custom-manager-agent).
|
||||
</Tip>
|
||||
|
||||
|
||||
|
||||
427
docs/en/learn/llm-hooks.mdx
Normal file
427
docs/en/learn/llm-hooks.mdx
Normal file
@@ -0,0 +1,427 @@
|
||||
---
|
||||
title: LLM Call Hooks
|
||||
description: Learn how to use LLM call hooks to intercept, modify, and control language model interactions in CrewAI
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
LLM Call Hooks provide fine-grained control over language model interactions during agent execution. These hooks allow you to intercept LLM calls, modify prompts, transform responses, implement approval gates, and add custom logging or monitoring.
|
||||
|
||||
## Overview
|
||||
|
||||
LLM hooks are executed at two critical points:
|
||||
- **Before LLM Call**: Modify messages, validate inputs, or block execution
|
||||
- **After LLM Call**: Transform responses, sanitize outputs, or modify conversation history
|
||||
|
||||
## Hook Types
|
||||
|
||||
### Before LLM Call Hooks
|
||||
|
||||
Executed before every LLM call, these hooks can:
|
||||
- Inspect and modify messages sent to the LLM
|
||||
- Block LLM execution based on conditions
|
||||
- Implement rate limiting or approval gates
|
||||
- Add context or system messages
|
||||
- Log request details
|
||||
|
||||
**Signature:**
|
||||
```python
|
||||
def before_hook(context: LLMCallHookContext) -> bool | None:
|
||||
# Return False to block execution
|
||||
# Return True or None to allow execution
|
||||
...
|
||||
```
|
||||
|
||||
### After LLM Call Hooks
|
||||
|
||||
Executed after every LLM call, these hooks can:
|
||||
- Modify or sanitize LLM responses
|
||||
- Add metadata or formatting
|
||||
- Log response details
|
||||
- Update conversation history
|
||||
- Implement content filtering
|
||||
|
||||
**Signature:**
|
||||
```python
|
||||
def after_hook(context: LLMCallHookContext) -> str | None:
|
||||
# Return modified response string
|
||||
# Return None to keep original response
|
||||
...
|
||||
```
|
||||
|
||||
## LLM Hook Context
|
||||
|
||||
The `LLMCallHookContext` object provides comprehensive access to execution state:
|
||||
|
||||
```python
|
||||
class LLMCallHookContext:
|
||||
executor: CrewAgentExecutor # Full executor reference
|
||||
messages: list # Mutable message list
|
||||
agent: Agent # Current agent
|
||||
task: Task # Current task
|
||||
crew: Crew # Crew instance
|
||||
llm: BaseLLM # LLM instance
|
||||
iterations: int # Current iteration count
|
||||
response: str | None # LLM response (after hooks only)
|
||||
```
|
||||
|
||||
### Modifying Messages
|
||||
|
||||
**Important:** Always modify messages in-place:
|
||||
|
||||
```python
|
||||
# ✅ Correct - modify in-place
|
||||
def add_context(context: LLMCallHookContext) -> None:
|
||||
context.messages.append({"role": "system", "content": "Be concise"})
|
||||
|
||||
# ❌ Wrong - replaces list reference
|
||||
def wrong_approach(context: LLMCallHookContext) -> None:
|
||||
context.messages = [{"role": "system", "content": "Be concise"}]
|
||||
```
|
||||
|
||||
## Registration Methods
|
||||
|
||||
### 1. Global Hook Registration
|
||||
|
||||
Register hooks that apply to all LLM calls across all crews:
|
||||
|
||||
```python
|
||||
from crewai.hooks import register_before_llm_call_hook, register_after_llm_call_hook
|
||||
|
||||
def log_llm_call(context):
|
||||
print(f"LLM call by {context.agent.role} at iteration {context.iterations}")
|
||||
return None # Allow execution
|
||||
|
||||
register_before_llm_call_hook(log_llm_call)
|
||||
```
|
||||
|
||||
### 2. Decorator-Based Registration
|
||||
|
||||
Use decorators for cleaner syntax:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_llm_call, after_llm_call
|
||||
|
||||
@before_llm_call
|
||||
def validate_iteration_count(context):
|
||||
if context.iterations > 10:
|
||||
print("⚠️ Exceeded maximum iterations")
|
||||
return False # Block execution
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def sanitize_response(context):
|
||||
if context.response and "API_KEY" in context.response:
|
||||
return context.response.replace("API_KEY", "[REDACTED]")
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. Crew-Scoped Hooks
|
||||
|
||||
Register hooks for a specific crew instance:
|
||||
|
||||
```python
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_llm_call_crew
|
||||
def validate_inputs(self, context):
|
||||
# Only applies to this crew
|
||||
if context.iterations == 0:
|
||||
print(f"Starting task: {context.task.description}")
|
||||
return None
|
||||
|
||||
@after_llm_call_crew
|
||||
def log_responses(self, context):
|
||||
# Crew-specific response logging
|
||||
print(f"Response length: {len(context.response)}")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
```
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### 1. Iteration Limiting
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def limit_iterations(context: LLMCallHookContext) -> bool | None:
|
||||
max_iterations = 15
|
||||
if context.iterations > max_iterations:
|
||||
print(f"⛔ Blocked: Exceeded {max_iterations} iterations")
|
||||
return False # Block execution
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. Human Approval Gate
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def require_approval(context: LLMCallHookContext) -> bool | None:
|
||||
if context.iterations > 5:
|
||||
response = context.request_human_input(
|
||||
prompt=f"Iteration {context.iterations}: Approve LLM call?",
|
||||
default_message="Press Enter to approve, or type 'no' to block:"
|
||||
)
|
||||
if response.lower() == "no":
|
||||
print("🚫 LLM call blocked by user")
|
||||
return False
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. Adding System Context
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def add_guardrails(context: LLMCallHookContext) -> None:
|
||||
# Add safety guidelines to every LLM call
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "Ensure responses are factual and cite sources when possible."
|
||||
})
|
||||
return None
|
||||
```
|
||||
|
||||
### 4. Response Sanitization
|
||||
|
||||
```python
|
||||
@after_llm_call
|
||||
def sanitize_sensitive_data(context: LLMCallHookContext) -> str | None:
|
||||
if not context.response:
|
||||
return None
|
||||
|
||||
# Remove sensitive patterns
|
||||
import re
|
||||
sanitized = context.response
|
||||
sanitized = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[SSN-REDACTED]', sanitized)
|
||||
sanitized = re.sub(r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b', '[CARD-REDACTED]', sanitized)
|
||||
|
||||
return sanitized
|
||||
```
|
||||
|
||||
### 5. Cost Tracking
|
||||
|
||||
```python
|
||||
import tiktoken
|
||||
|
||||
@before_llm_call
|
||||
def track_token_usage(context: LLMCallHookContext) -> None:
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
total_tokens = sum(
|
||||
len(encoding.encode(msg.get("content", "")))
|
||||
for msg in context.messages
|
||||
)
|
||||
print(f"📊 Input tokens: ~{total_tokens}")
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def track_response_tokens(context: LLMCallHookContext) -> None:
|
||||
if context.response:
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
tokens = len(encoding.encode(context.response))
|
||||
print(f"📊 Response tokens: ~{tokens}")
|
||||
return None
|
||||
```
|
||||
|
||||
### 6. Debug Logging
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def debug_request(context: LLMCallHookContext) -> None:
|
||||
print(f"""
|
||||
🔍 LLM Call Debug:
|
||||
- Agent: {context.agent.role}
|
||||
- Task: {context.task.description[:50]}...
|
||||
- Iteration: {context.iterations}
|
||||
- Message Count: {len(context.messages)}
|
||||
- Last Message: {context.messages[-1] if context.messages else 'None'}
|
||||
""")
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def debug_response(context: LLMCallHookContext) -> None:
|
||||
if context.response:
|
||||
print(f"✅ Response Preview: {context.response[:100]}...")
|
||||
return None
|
||||
```
|
||||
|
||||
## Hook Management
|
||||
|
||||
### Unregistering Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
unregister_before_llm_call_hook,
|
||||
unregister_after_llm_call_hook
|
||||
)
|
||||
|
||||
# Unregister specific hook
|
||||
def my_hook(context):
|
||||
...
|
||||
|
||||
register_before_llm_call_hook(my_hook)
|
||||
# Later...
|
||||
unregister_before_llm_call_hook(my_hook) # Returns True if found
|
||||
```
|
||||
|
||||
### Clearing Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_llm_call_hooks,
|
||||
clear_after_llm_call_hooks,
|
||||
clear_all_llm_call_hooks
|
||||
)
|
||||
|
||||
# Clear specific hook type
|
||||
count = clear_before_llm_call_hooks()
|
||||
print(f"Cleared {count} before hooks")
|
||||
|
||||
# Clear all LLM hooks
|
||||
before_count, after_count = clear_all_llm_call_hooks()
|
||||
print(f"Cleared {before_count} before and {after_count} after hooks")
|
||||
```
|
||||
|
||||
### Listing Registered Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
get_before_llm_call_hooks,
|
||||
get_after_llm_call_hooks
|
||||
)
|
||||
|
||||
# Get current hooks
|
||||
before_hooks = get_before_llm_call_hooks()
|
||||
after_hooks = get_after_llm_call_hooks()
|
||||
|
||||
print(f"Registered: {len(before_hooks)} before, {len(after_hooks)} after")
|
||||
```
|
||||
|
||||
## Advanced Patterns
|
||||
|
||||
### Conditional Hook Execution
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def conditional_blocking(context: LLMCallHookContext) -> bool | None:
|
||||
# Only block for specific agents
|
||||
if context.agent.role == "researcher" and context.iterations > 10:
|
||||
return False
|
||||
|
||||
# Only block for specific tasks
|
||||
if "sensitive" in context.task.description.lower() and context.iterations > 5:
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### Context-Aware Modifications
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def adaptive_prompting(context: LLMCallHookContext) -> None:
|
||||
# Add different context based on iteration
|
||||
if context.iterations == 0:
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "Start with a high-level overview."
|
||||
})
|
||||
elif context.iterations > 3:
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "Focus on specific details and provide examples."
|
||||
})
|
||||
return None
|
||||
```
|
||||
|
||||
### Chaining Hooks
|
||||
|
||||
```python
|
||||
# Multiple hooks execute in registration order
|
||||
|
||||
@before_llm_call
|
||||
def first_hook(context):
|
||||
print("1. First hook executed")
|
||||
return None
|
||||
|
||||
@before_llm_call
|
||||
def second_hook(context):
|
||||
print("2. Second hook executed")
|
||||
return None
|
||||
|
||||
@before_llm_call
|
||||
def blocking_hook(context):
|
||||
if context.iterations > 10:
|
||||
print("3. Blocking hook - execution stopped")
|
||||
return False # Subsequent hooks won't execute
|
||||
print("3. Blocking hook - execution allowed")
|
||||
return None
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Keep Hooks Focused**: Each hook should have a single responsibility
|
||||
2. **Avoid Heavy Computation**: Hooks execute on every LLM call
|
||||
3. **Handle Errors Gracefully**: Use try-except to prevent hook failures from breaking execution
|
||||
4. **Use Type Hints**: Leverage `LLMCallHookContext` for better IDE support
|
||||
5. **Document Hook Behavior**: Especially for blocking conditions
|
||||
6. **Test Hooks Independently**: Unit test hooks before using in production
|
||||
7. **Clear Hooks in Tests**: Use `clear_all_llm_call_hooks()` between test runs
|
||||
8. **Modify In-Place**: Always modify `context.messages` in-place, never replace
|
||||
|
||||
## Error Handling
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def safe_hook(context: LLMCallHookContext) -> bool | None:
|
||||
try:
|
||||
# Your hook logic
|
||||
if some_condition:
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ Hook error: {e}")
|
||||
# Decide: allow or block on error
|
||||
return None # Allow execution despite error
|
||||
```
|
||||
|
||||
## Type Safety
|
||||
|
||||
```python
|
||||
from crewai.hooks import LLMCallHookContext, BeforeLLMCallHookType, AfterLLMCallHookType
|
||||
|
||||
# Explicit type annotations
|
||||
def my_before_hook(context: LLMCallHookContext) -> bool | None:
|
||||
return None
|
||||
|
||||
def my_after_hook(context: LLMCallHookContext) -> str | None:
|
||||
return None
|
||||
|
||||
# Type-safe registration
|
||||
register_before_llm_call_hook(my_before_hook)
|
||||
register_after_llm_call_hook(my_after_hook)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Hook Not Executing
|
||||
- Verify hook is registered before crew execution
|
||||
- Check if previous hook returned `False` (blocks subsequent hooks)
|
||||
- Ensure hook signature matches expected type
|
||||
|
||||
### Message Modifications Not Persisting
|
||||
- Use in-place modifications: `context.messages.append()`
|
||||
- Don't replace the list: `context.messages = []`
|
||||
|
||||
### Response Modifications Not Working
|
||||
- Return the modified string from after hooks
|
||||
- Returning `None` keeps the original response
|
||||
|
||||
## Conclusion
|
||||
|
||||
LLM Call Hooks provide powerful capabilities for controlling and monitoring language model interactions in CrewAI. Use them to implement safety guardrails, approval gates, logging, cost tracking, and response sanitization. Combined with proper error handling and type safety, hooks enable robust and production-ready agent systems.
|
||||
600
docs/en/learn/tool-hooks.mdx
Normal file
600
docs/en/learn/tool-hooks.mdx
Normal file
@@ -0,0 +1,600 @@
|
||||
---
|
||||
title: Tool Call Hooks
|
||||
description: Learn how to use tool call hooks to intercept, modify, and control tool execution in CrewAI
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
Tool Call Hooks provide fine-grained control over tool execution during agent operations. These hooks allow you to intercept tool calls, modify inputs, transform outputs, implement safety checks, and add comprehensive logging or monitoring.
|
||||
|
||||
## Overview
|
||||
|
||||
Tool hooks are executed at two critical points:
|
||||
- **Before Tool Call**: Modify inputs, validate parameters, or block execution
|
||||
- **After Tool Call**: Transform results, sanitize outputs, or log execution details
|
||||
|
||||
## Hook Types
|
||||
|
||||
### Before Tool Call Hooks
|
||||
|
||||
Executed before every tool execution, these hooks can:
|
||||
- Inspect and modify tool inputs
|
||||
- Block tool execution based on conditions
|
||||
- Implement approval gates for dangerous operations
|
||||
- Validate parameters
|
||||
- Log tool invocations
|
||||
|
||||
**Signature:**
|
||||
```python
|
||||
def before_hook(context: ToolCallHookContext) -> bool | None:
|
||||
# Return False to block execution
|
||||
# Return True or None to allow execution
|
||||
...
|
||||
```
|
||||
|
||||
### After Tool Call Hooks
|
||||
|
||||
Executed after every tool execution, these hooks can:
|
||||
- Modify or sanitize tool results
|
||||
- Add metadata or formatting
|
||||
- Log execution results
|
||||
- Implement result validation
|
||||
- Transform output formats
|
||||
|
||||
**Signature:**
|
||||
```python
|
||||
def after_hook(context: ToolCallHookContext) -> str | None:
|
||||
# Return modified result string
|
||||
# Return None to keep original result
|
||||
...
|
||||
```
|
||||
|
||||
## Tool Hook Context
|
||||
|
||||
The `ToolCallHookContext` object provides comprehensive access to tool execution state:
|
||||
|
||||
```python
|
||||
class ToolCallHookContext:
|
||||
tool_name: str # Name of the tool being called
|
||||
tool_input: dict[str, Any] # Mutable tool input parameters
|
||||
tool: CrewStructuredTool # Tool instance reference
|
||||
agent: Agent | BaseAgent | None # Agent executing the tool
|
||||
task: Task | None # Current task
|
||||
crew: Crew | None # Crew instance
|
||||
tool_result: str | None # Tool result (after hooks only)
|
||||
```
|
||||
|
||||
### Modifying Tool Inputs
|
||||
|
||||
**Important:** Always modify tool inputs in-place:
|
||||
|
||||
```python
|
||||
# ✅ Correct - modify in-place
|
||||
def sanitize_input(context: ToolCallHookContext) -> None:
|
||||
context.tool_input['query'] = context.tool_input['query'].lower()
|
||||
|
||||
# ❌ Wrong - replaces dict reference
|
||||
def wrong_approach(context: ToolCallHookContext) -> None:
|
||||
context.tool_input = {'query': 'new query'}
|
||||
```
|
||||
|
||||
## Registration Methods
|
||||
|
||||
### 1. Global Hook Registration
|
||||
|
||||
Register hooks that apply to all tool calls across all crews:
|
||||
|
||||
```python
|
||||
from crewai.hooks import register_before_tool_call_hook, register_after_tool_call_hook
|
||||
|
||||
def log_tool_call(context):
|
||||
print(f"Tool: {context.tool_name}")
|
||||
print(f"Input: {context.tool_input}")
|
||||
return None # Allow execution
|
||||
|
||||
register_before_tool_call_hook(log_tool_call)
|
||||
```
|
||||
|
||||
### 2. Decorator-Based Registration
|
||||
|
||||
Use decorators for cleaner syntax:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_tool_call, after_tool_call
|
||||
|
||||
@before_tool_call
|
||||
def block_dangerous_tools(context):
|
||||
dangerous_tools = ['delete_database', 'drop_table', 'rm_rf']
|
||||
if context.tool_name in dangerous_tools:
|
||||
print(f"⛔ Blocked dangerous tool: {context.tool_name}")
|
||||
return False # Block execution
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def sanitize_results(context):
|
||||
if context.tool_result and "password" in context.tool_result.lower():
|
||||
return context.tool_result.replace("password", "[REDACTED]")
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. Crew-Scoped Hooks
|
||||
|
||||
Register hooks for a specific crew instance:
|
||||
|
||||
```python
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_tool_call_crew
|
||||
def validate_tool_inputs(self, context):
|
||||
# Only applies to this crew
|
||||
if context.tool_name == "web_search":
|
||||
if not context.tool_input.get('query'):
|
||||
print("❌ Invalid search query")
|
||||
return False
|
||||
return None
|
||||
|
||||
@after_tool_call_crew
|
||||
def log_tool_results(self, context):
|
||||
# Crew-specific tool logging
|
||||
print(f"✅ {context.tool_name} completed")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
```
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### 1. Safety Guardrails
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safety_check(context: ToolCallHookContext) -> bool | None:
|
||||
# Block tools that could cause harm
|
||||
destructive_tools = [
|
||||
'delete_file',
|
||||
'drop_table',
|
||||
'remove_user',
|
||||
'system_shutdown'
|
||||
]
|
||||
|
||||
if context.tool_name in destructive_tools:
|
||||
print(f"🛑 Blocked destructive tool: {context.tool_name}")
|
||||
return False
|
||||
|
||||
# Warn on sensitive operations
|
||||
sensitive_tools = ['send_email', 'post_to_social_media', 'charge_payment']
|
||||
if context.tool_name in sensitive_tools:
|
||||
print(f"⚠️ Executing sensitive tool: {context.tool_name}")
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. Human Approval Gate
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def require_approval_for_actions(context: ToolCallHookContext) -> bool | None:
|
||||
approval_required = [
|
||||
'send_email',
|
||||
'make_purchase',
|
||||
'delete_file',
|
||||
'post_message'
|
||||
]
|
||||
|
||||
if context.tool_name in approval_required:
|
||||
response = context.request_human_input(
|
||||
prompt=f"Approve {context.tool_name}?",
|
||||
default_message=f"Input: {context.tool_input}\nType 'yes' to approve:"
|
||||
)
|
||||
|
||||
if response.lower() != 'yes':
|
||||
print(f"❌ Tool execution denied: {context.tool_name}")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. Input Validation and Sanitization
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def validate_and_sanitize_inputs(context: ToolCallHookContext) -> bool | None:
|
||||
# Validate search queries
|
||||
if context.tool_name == 'web_search':
|
||||
query = context.tool_input.get('query', '')
|
||||
if len(query) < 3:
|
||||
print("❌ Search query too short")
|
||||
return False
|
||||
|
||||
# Sanitize query
|
||||
context.tool_input['query'] = query.strip().lower()
|
||||
|
||||
# Validate file paths
|
||||
if context.tool_name == 'read_file':
|
||||
path = context.tool_input.get('path', '')
|
||||
if '..' in path or path.startswith('/'):
|
||||
print("❌ Invalid file path")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 4. Result Sanitization
|
||||
|
||||
```python
|
||||
@after_tool_call
|
||||
def sanitize_sensitive_data(context: ToolCallHookContext) -> str | None:
|
||||
if not context.tool_result:
|
||||
return None
|
||||
|
||||
import re
|
||||
result = context.tool_result
|
||||
|
||||
# Remove API keys
|
||||
result = re.sub(
|
||||
r'(api[_-]?key|token)["\']?\s*[:=]\s*["\']?[\w-]+',
|
||||
r'\1: [REDACTED]',
|
||||
result,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Remove email addresses
|
||||
result = re.sub(
|
||||
r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
|
||||
'[EMAIL-REDACTED]',
|
||||
result
|
||||
)
|
||||
|
||||
# Remove credit card numbers
|
||||
result = re.sub(
|
||||
r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b',
|
||||
'[CARD-REDACTED]',
|
||||
result
|
||||
)
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### 5. Tool Usage Analytics
|
||||
|
||||
```python
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
tool_stats = defaultdict(lambda: {'count': 0, 'total_time': 0, 'failures': 0})
|
||||
|
||||
@before_tool_call
|
||||
def start_timer(context: ToolCallHookContext) -> None:
|
||||
context.tool_input['_start_time'] = time.time()
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def track_tool_usage(context: ToolCallHookContext) -> None:
|
||||
start_time = context.tool_input.get('_start_time', time.time())
|
||||
duration = time.time() - start_time
|
||||
|
||||
tool_stats[context.tool_name]['count'] += 1
|
||||
tool_stats[context.tool_name]['total_time'] += duration
|
||||
|
||||
if not context.tool_result or 'error' in context.tool_result.lower():
|
||||
tool_stats[context.tool_name]['failures'] += 1
|
||||
|
||||
print(f"""
|
||||
📊 Tool Stats for {context.tool_name}:
|
||||
- Executions: {tool_stats[context.tool_name]['count']}
|
||||
- Avg Time: {tool_stats[context.tool_name]['total_time'] / tool_stats[context.tool_name]['count']:.2f}s
|
||||
- Failures: {tool_stats[context.tool_name]['failures']}
|
||||
""")
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 6. Rate Limiting
|
||||
|
||||
```python
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
tool_call_history = defaultdict(list)
|
||||
|
||||
@before_tool_call
|
||||
def rate_limit_tools(context: ToolCallHookContext) -> bool | None:
|
||||
tool_name = context.tool_name
|
||||
now = datetime.now()
|
||||
|
||||
# Clean old entries (older than 1 minute)
|
||||
tool_call_history[tool_name] = [
|
||||
call_time for call_time in tool_call_history[tool_name]
|
||||
if now - call_time < timedelta(minutes=1)
|
||||
]
|
||||
|
||||
# Check rate limit (max 10 calls per minute)
|
||||
if len(tool_call_history[tool_name]) >= 10:
|
||||
print(f"🚫 Rate limit exceeded for {tool_name}")
|
||||
return False
|
||||
|
||||
# Record this call
|
||||
tool_call_history[tool_name].append(now)
|
||||
return None
|
||||
```
|
||||
|
||||
### 7. Caching Tool Results
|
||||
|
||||
```python
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
tool_cache = {}
|
||||
|
||||
def cache_key(tool_name: str, tool_input: dict) -> str:
|
||||
"""Generate cache key from tool name and input."""
|
||||
input_str = json.dumps(tool_input, sort_keys=True)
|
||||
return hashlib.md5(f"{tool_name}:{input_str}".encode()).hexdigest()
|
||||
|
||||
@before_tool_call
|
||||
def check_cache(context: ToolCallHookContext) -> bool | None:
|
||||
key = cache_key(context.tool_name, context.tool_input)
|
||||
if key in tool_cache:
|
||||
print(f"💾 Cache hit for {context.tool_name}")
|
||||
# Note: Can't return cached result from before hook
|
||||
# Would need to implement this differently
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def cache_result(context: ToolCallHookContext) -> None:
|
||||
if context.tool_result:
|
||||
key = cache_key(context.tool_name, context.tool_input)
|
||||
tool_cache[key] = context.tool_result
|
||||
print(f"💾 Cached result for {context.tool_name}")
|
||||
return None
|
||||
```
|
||||
|
||||
### 8. Debug Logging
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def debug_tool_call(context: ToolCallHookContext) -> None:
|
||||
print(f"""
|
||||
🔍 Tool Call Debug:
|
||||
- Tool: {context.tool_name}
|
||||
- Agent: {context.agent.role if context.agent else 'Unknown'}
|
||||
- Task: {context.task.description[:50] if context.task else 'Unknown'}...
|
||||
- Input: {context.tool_input}
|
||||
""")
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def debug_tool_result(context: ToolCallHookContext) -> None:
|
||||
if context.tool_result:
|
||||
result_preview = context.tool_result[:200]
|
||||
print(f"✅ Result Preview: {result_preview}...")
|
||||
else:
|
||||
print("⚠️ No result returned")
|
||||
return None
|
||||
```
|
||||
|
||||
## Hook Management
|
||||
|
||||
### Unregistering Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
unregister_before_tool_call_hook,
|
||||
unregister_after_tool_call_hook
|
||||
)
|
||||
|
||||
# Unregister specific hook
|
||||
def my_hook(context):
|
||||
...
|
||||
|
||||
register_before_tool_call_hook(my_hook)
|
||||
# Later...
|
||||
success = unregister_before_tool_call_hook(my_hook)
|
||||
print(f"Unregistered: {success}")
|
||||
```
|
||||
|
||||
### Clearing Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_tool_call_hooks,
|
||||
clear_after_tool_call_hooks,
|
||||
clear_all_tool_call_hooks
|
||||
)
|
||||
|
||||
# Clear specific hook type
|
||||
count = clear_before_tool_call_hooks()
|
||||
print(f"Cleared {count} before hooks")
|
||||
|
||||
# Clear all tool hooks
|
||||
before_count, after_count = clear_all_tool_call_hooks()
|
||||
print(f"Cleared {before_count} before and {after_count} after hooks")
|
||||
```
|
||||
|
||||
### Listing Registered Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
get_before_tool_call_hooks,
|
||||
get_after_tool_call_hooks
|
||||
)
|
||||
|
||||
# Get current hooks
|
||||
before_hooks = get_before_tool_call_hooks()
|
||||
after_hooks = get_after_tool_call_hooks()
|
||||
|
||||
print(f"Registered: {len(before_hooks)} before, {len(after_hooks)} after")
|
||||
```
|
||||
|
||||
## Advanced Patterns
|
||||
|
||||
### Conditional Hook Execution
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def conditional_blocking(context: ToolCallHookContext) -> bool | None:
|
||||
# Only block for specific agents
|
||||
if context.agent and context.agent.role == "junior_agent":
|
||||
if context.tool_name in ['delete_file', 'send_email']:
|
||||
print(f"❌ Junior agents cannot use {context.tool_name}")
|
||||
return False
|
||||
|
||||
# Only block during specific tasks
|
||||
if context.task and "sensitive" in context.task.description.lower():
|
||||
if context.tool_name == 'web_search':
|
||||
print("❌ Web search blocked for sensitive tasks")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### Context-Aware Input Modification
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def enhance_tool_inputs(context: ToolCallHookContext) -> None:
|
||||
# Add context based on agent role
|
||||
if context.agent and context.agent.role == "researcher":
|
||||
if context.tool_name == 'web_search':
|
||||
# Add domain restrictions for researchers
|
||||
context.tool_input['domains'] = ['edu', 'gov', 'org']
|
||||
|
||||
# Add context based on task
|
||||
if context.task and "urgent" in context.task.description.lower():
|
||||
if context.tool_name == 'send_email':
|
||||
context.tool_input['priority'] = 'high'
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### Tool Chain Monitoring
|
||||
|
||||
```python
|
||||
tool_call_chain = []
|
||||
|
||||
@before_tool_call
|
||||
def track_tool_chain(context: ToolCallHookContext) -> None:
|
||||
tool_call_chain.append({
|
||||
'tool': context.tool_name,
|
||||
'timestamp': time.time(),
|
||||
'agent': context.agent.role if context.agent else 'Unknown'
|
||||
})
|
||||
|
||||
# Detect potential infinite loops
|
||||
recent_calls = tool_call_chain[-5:]
|
||||
if len(recent_calls) == 5 and all(c['tool'] == context.tool_name for c in recent_calls):
|
||||
print(f"⚠️ Warning: {context.tool_name} called 5 times in a row")
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Keep Hooks Focused**: Each hook should have a single responsibility
|
||||
2. **Avoid Heavy Computation**: Hooks execute on every tool call
|
||||
3. **Handle Errors Gracefully**: Use try-except to prevent hook failures
|
||||
4. **Use Type Hints**: Leverage `ToolCallHookContext` for better IDE support
|
||||
5. **Document Blocking Conditions**: Make it clear when/why tools are blocked
|
||||
6. **Test Hooks Independently**: Unit test hooks before using in production
|
||||
7. **Clear Hooks in Tests**: Use `clear_all_tool_call_hooks()` between test runs
|
||||
8. **Modify In-Place**: Always modify `context.tool_input` in-place, never replace
|
||||
9. **Log Important Decisions**: Especially when blocking tool execution
|
||||
10. **Consider Performance**: Cache expensive validations when possible
|
||||
|
||||
## Error Handling
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safe_validation(context: ToolCallHookContext) -> bool | None:
|
||||
try:
|
||||
# Your validation logic
|
||||
if not validate_input(context.tool_input):
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ Hook error: {e}")
|
||||
# Decide: allow or block on error
|
||||
return None # Allow execution despite error
|
||||
```
|
||||
|
||||
## Type Safety
|
||||
|
||||
```python
|
||||
from crewai.hooks import ToolCallHookContext, BeforeToolCallHookType, AfterToolCallHookType
|
||||
|
||||
# Explicit type annotations
|
||||
def my_before_hook(context: ToolCallHookContext) -> bool | None:
|
||||
return None
|
||||
|
||||
def my_after_hook(context: ToolCallHookContext) -> str | None:
|
||||
return None
|
||||
|
||||
# Type-safe registration
|
||||
register_before_tool_call_hook(my_before_hook)
|
||||
register_after_tool_call_hook(my_after_hook)
|
||||
```
|
||||
|
||||
## Integration with Existing Tools
|
||||
|
||||
### Wrapping Existing Validation
|
||||
|
||||
```python
|
||||
def existing_validator(tool_name: str, inputs: dict) -> bool:
|
||||
"""Your existing validation function."""
|
||||
# Your validation logic
|
||||
return True
|
||||
|
||||
@before_tool_call
|
||||
def integrate_validator(context: ToolCallHookContext) -> bool | None:
|
||||
if not existing_validator(context.tool_name, context.tool_input):
|
||||
print(f"❌ Validation failed for {context.tool_name}")
|
||||
return False
|
||||
return None
|
||||
```
|
||||
|
||||
### Logging to External Systems
|
||||
|
||||
```python
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@before_tool_call
|
||||
def log_to_external_system(context: ToolCallHookContext) -> None:
|
||||
logger.info(f"Tool call: {context.tool_name}", extra={
|
||||
'tool_name': context.tool_name,
|
||||
'tool_input': context.tool_input,
|
||||
'agent': context.agent.role if context.agent else None
|
||||
})
|
||||
return None
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Hook Not Executing
|
||||
- Verify hook is registered before crew execution
|
||||
- Check if previous hook returned `False` (blocks execution and subsequent hooks)
|
||||
- Ensure hook signature matches expected type
|
||||
|
||||
### Input Modifications Not Working
|
||||
- Use in-place modifications: `context.tool_input['key'] = value`
|
||||
- Don't replace the dict: `context.tool_input = {}`
|
||||
|
||||
### Result Modifications Not Working
|
||||
- Return the modified string from after hooks
|
||||
- Returning `None` keeps the original result
|
||||
- Ensure the tool actually returned a result
|
||||
|
||||
### Tool Blocked Unexpectedly
|
||||
- Check all before hooks for blocking conditions
|
||||
- Verify hook execution order
|
||||
- Add debug logging to identify which hook is blocking
|
||||
|
||||
## Conclusion
|
||||
|
||||
Tool Call Hooks provide powerful capabilities for controlling and monitoring tool execution in CrewAI. Use them to implement safety guardrails, approval gates, input validation, result sanitization, logging, and analytics. Combined with proper error handling and type safety, hooks enable secure and production-ready agent systems with comprehensive observability.
|
||||
@@ -11,9 +11,13 @@ The [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP)
|
||||
|
||||
CrewAI offers **two approaches** for MCP integration:
|
||||
|
||||
### Simple DSL Integration** (Recommended)
|
||||
### 🚀 **Simple DSL Integration** (Recommended)
|
||||
|
||||
Use the `mcps` field directly on agents for seamless MCP tool integration:
|
||||
Use the `mcps` field directly on agents for seamless MCP tool integration. The DSL supports both **string references** (for quick setup) and **structured configurations** (for full control).
|
||||
|
||||
#### String-Based References (Quick Setup)
|
||||
|
||||
Perfect for remote HTTPS servers and CrewAI AMP marketplace:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
@@ -32,6 +36,46 @@ agent = Agent(
|
||||
# MCP tools are now automatically available to your agent!
|
||||
```
|
||||
|
||||
#### Structured Configurations (Full Control)
|
||||
|
||||
For complete control over connection settings, tool filtering, and all transport types:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai.mcp import MCPServerStdio, MCPServerHTTP, MCPServerSSE
|
||||
from crewai.mcp.filters import create_static_tool_filter
|
||||
|
||||
agent = Agent(
|
||||
role="Advanced Research Analyst",
|
||||
goal="Research with full control over MCP connections",
|
||||
backstory="Expert researcher with advanced tool access",
|
||||
mcps=[
|
||||
# Stdio transport for local servers
|
||||
MCPServerStdio(
|
||||
command="npx",
|
||||
args=["-y", "@modelcontextprotocol/server-filesystem"],
|
||||
env={"API_KEY": "your_key"},
|
||||
tool_filter=create_static_tool_filter(
|
||||
allowed_tool_names=["read_file", "list_directory"]
|
||||
),
|
||||
cache_tools_list=True,
|
||||
),
|
||||
# HTTP/Streamable HTTP transport for remote servers
|
||||
MCPServerHTTP(
|
||||
url="https://api.example.com/mcp",
|
||||
headers={"Authorization": "Bearer your_token"},
|
||||
streamable=True,
|
||||
cache_tools_list=True,
|
||||
),
|
||||
# SSE transport for real-time streaming
|
||||
MCPServerSSE(
|
||||
url="https://stream.example.com/mcp/sse",
|
||||
headers={"Authorization": "Bearer your_token"},
|
||||
),
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 🔧 **Advanced: MCPServerAdapter** (For Complex Scenarios)
|
||||
|
||||
For advanced use cases requiring manual connection management, the `crewai-tools` library provides the `MCPServerAdapter` class.
|
||||
@@ -68,12 +112,14 @@ uv pip install 'crewai-tools[mcp]'
|
||||
|
||||
## Quick Start: Simple DSL Integration
|
||||
|
||||
The easiest way to integrate MCP servers is using the `mcps` field on your agents:
|
||||
The easiest way to integrate MCP servers is using the `mcps` field on your agents. You can use either string references or structured configurations.
|
||||
|
||||
### Quick Start with String References
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
# Create agent with MCP tools
|
||||
# Create agent with MCP tools using string references
|
||||
research_agent = Agent(
|
||||
role="Research Analyst",
|
||||
goal="Find and analyze information using advanced search tools",
|
||||
@@ -96,13 +142,53 @@ crew = Crew(agents=[research_agent], tasks=[research_task])
|
||||
result = crew.kickoff()
|
||||
```
|
||||
|
||||
### Quick Start with Structured Configurations
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai.mcp import MCPServerStdio, MCPServerHTTP, MCPServerSSE
|
||||
|
||||
# Create agent with structured MCP configurations
|
||||
research_agent = Agent(
|
||||
role="Research Analyst",
|
||||
goal="Find and analyze information using advanced search tools",
|
||||
backstory="Expert researcher with access to multiple data sources",
|
||||
mcps=[
|
||||
# Local stdio server
|
||||
MCPServerStdio(
|
||||
command="python",
|
||||
args=["local_server.py"],
|
||||
env={"API_KEY": "your_key"},
|
||||
),
|
||||
# Remote HTTP server
|
||||
MCPServerHTTP(
|
||||
url="https://api.research.com/mcp",
|
||||
headers={"Authorization": "Bearer your_token"},
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
# Create task
|
||||
research_task = Task(
|
||||
description="Research the latest developments in AI agent frameworks",
|
||||
expected_output="Comprehensive research report with citations",
|
||||
agent=research_agent
|
||||
)
|
||||
|
||||
# Create and run crew
|
||||
crew = Crew(agents=[research_agent], tasks=[research_task])
|
||||
result = crew.kickoff()
|
||||
```
|
||||
|
||||
That's it! The MCP tools are automatically discovered and available to your agent.
|
||||
|
||||
## MCP Reference Formats
|
||||
|
||||
The `mcps` field supports various reference formats for maximum flexibility:
|
||||
The `mcps` field supports both **string references** (for quick setup) and **structured configurations** (for full control). You can mix both formats in the same list.
|
||||
|
||||
### External MCP Servers
|
||||
### String-Based References
|
||||
|
||||
#### External MCP Servers
|
||||
|
||||
```python
|
||||
mcps=[
|
||||
@@ -117,7 +203,7 @@ mcps=[
|
||||
]
|
||||
```
|
||||
|
||||
### CrewAI AMP Marketplace
|
||||
#### CrewAI AMP Marketplace
|
||||
|
||||
```python
|
||||
mcps=[
|
||||
@@ -133,17 +219,166 @@ mcps=[
|
||||
]
|
||||
```
|
||||
|
||||
### Mixed References
|
||||
### Structured Configurations
|
||||
|
||||
#### Stdio Transport (Local Servers)
|
||||
|
||||
Perfect for local MCP servers that run as processes:
|
||||
|
||||
```python
|
||||
from crewai.mcp import MCPServerStdio
|
||||
from crewai.mcp.filters import create_static_tool_filter
|
||||
|
||||
mcps=[
|
||||
"https://external-api.com/mcp", # External server
|
||||
"https://weather.service.com/mcp#forecast", # Specific external tool
|
||||
"crewai-amp:financial-insights", # AMP service
|
||||
"crewai-amp:data-analysis#sentiment_tool" # Specific AMP tool
|
||||
MCPServerStdio(
|
||||
command="npx",
|
||||
args=["-y", "@modelcontextprotocol/server-filesystem"],
|
||||
env={"API_KEY": "your_key"},
|
||||
tool_filter=create_static_tool_filter(
|
||||
allowed_tool_names=["read_file", "write_file"]
|
||||
),
|
||||
cache_tools_list=True,
|
||||
),
|
||||
# Python-based server
|
||||
MCPServerStdio(
|
||||
command="python",
|
||||
args=["path/to/server.py"],
|
||||
env={"UV_PYTHON": "3.12", "API_KEY": "your_key"},
|
||||
),
|
||||
]
|
||||
```
|
||||
|
||||
#### HTTP/Streamable HTTP Transport (Remote Servers)
|
||||
|
||||
For remote MCP servers over HTTP/HTTPS:
|
||||
|
||||
```python
|
||||
from crewai.mcp import MCPServerHTTP
|
||||
|
||||
mcps=[
|
||||
# Streamable HTTP (default)
|
||||
MCPServerHTTP(
|
||||
url="https://api.example.com/mcp",
|
||||
headers={"Authorization": "Bearer your_token"},
|
||||
streamable=True,
|
||||
cache_tools_list=True,
|
||||
),
|
||||
# Standard HTTP
|
||||
MCPServerHTTP(
|
||||
url="https://api.example.com/mcp",
|
||||
headers={"Authorization": "Bearer your_token"},
|
||||
streamable=False,
|
||||
),
|
||||
]
|
||||
```
|
||||
|
||||
#### SSE Transport (Real-Time Streaming)
|
||||
|
||||
For remote servers using Server-Sent Events:
|
||||
|
||||
```python
|
||||
from crewai.mcp import MCPServerSSE
|
||||
|
||||
mcps=[
|
||||
MCPServerSSE(
|
||||
url="https://stream.example.com/mcp/sse",
|
||||
headers={"Authorization": "Bearer your_token"},
|
||||
cache_tools_list=True,
|
||||
),
|
||||
]
|
||||
```
|
||||
|
||||
### Mixed References
|
||||
|
||||
You can combine string references and structured configurations:
|
||||
|
||||
```python
|
||||
from crewai.mcp import MCPServerStdio, MCPServerHTTP
|
||||
|
||||
mcps=[
|
||||
# String references
|
||||
"https://external-api.com/mcp", # External server
|
||||
"crewai-amp:financial-insights", # AMP service
|
||||
|
||||
# Structured configurations
|
||||
MCPServerStdio(
|
||||
command="npx",
|
||||
args=["-y", "@modelcontextprotocol/server-filesystem"],
|
||||
),
|
||||
MCPServerHTTP(
|
||||
url="https://api.example.com/mcp",
|
||||
headers={"Authorization": "Bearer token"},
|
||||
),
|
||||
]
|
||||
```
|
||||
|
||||
### Tool Filtering
|
||||
|
||||
Structured configurations support advanced tool filtering:
|
||||
|
||||
```python
|
||||
from crewai.mcp import MCPServerStdio
|
||||
from crewai.mcp.filters import create_static_tool_filter, create_dynamic_tool_filter, ToolFilterContext
|
||||
|
||||
# Static filtering (allow/block lists)
|
||||
static_filter = create_static_tool_filter(
|
||||
allowed_tool_names=["read_file", "write_file"],
|
||||
blocked_tool_names=["delete_file"],
|
||||
)
|
||||
|
||||
# Dynamic filtering (context-aware)
|
||||
def dynamic_filter(context: ToolFilterContext, tool: dict) -> bool:
|
||||
# Block dangerous tools for certain agent roles
|
||||
if context.agent.role == "Code Reviewer":
|
||||
if "delete" in tool.get("name", "").lower():
|
||||
return False
|
||||
return True
|
||||
|
||||
mcps=[
|
||||
MCPServerStdio(
|
||||
command="npx",
|
||||
args=["-y", "@modelcontextprotocol/server-filesystem"],
|
||||
tool_filter=static_filter, # or dynamic_filter
|
||||
),
|
||||
]
|
||||
```
|
||||
|
||||
## Configuration Parameters
|
||||
|
||||
Each transport type supports specific configuration options:
|
||||
|
||||
### MCPServerStdio Parameters
|
||||
|
||||
- **`command`** (required): Command to execute (e.g., `"python"`, `"node"`, `"npx"`, `"uvx"`)
|
||||
- **`args`** (optional): List of command arguments (e.g., `["server.py"]` or `["-y", "@mcp/server"]`)
|
||||
- **`env`** (optional): Dictionary of environment variables to pass to the process
|
||||
- **`tool_filter`** (optional): Tool filter function for filtering available tools
|
||||
- **`cache_tools_list`** (optional): Whether to cache the tool list for faster subsequent access (default: `False`)
|
||||
|
||||
### MCPServerHTTP Parameters
|
||||
|
||||
- **`url`** (required): Server URL (e.g., `"https://api.example.com/mcp"`)
|
||||
- **`headers`** (optional): Dictionary of HTTP headers for authentication or other purposes
|
||||
- **`streamable`** (optional): Whether to use streamable HTTP transport (default: `True`)
|
||||
- **`tool_filter`** (optional): Tool filter function for filtering available tools
|
||||
- **`cache_tools_list`** (optional): Whether to cache the tool list for faster subsequent access (default: `False`)
|
||||
|
||||
### MCPServerSSE Parameters
|
||||
|
||||
- **`url`** (required): Server URL (e.g., `"https://api.example.com/mcp/sse"`)
|
||||
- **`headers`** (optional): Dictionary of HTTP headers for authentication or other purposes
|
||||
- **`tool_filter`** (optional): Tool filter function for filtering available tools
|
||||
- **`cache_tools_list`** (optional): Whether to cache the tool list for faster subsequent access (default: `False`)
|
||||
|
||||
### Common Parameters
|
||||
|
||||
All transport types support:
|
||||
- **`tool_filter`**: Filter function to control which tools are available. Can be:
|
||||
- `None` (default): All tools are available
|
||||
- Static filter: Created with `create_static_tool_filter()` for allow/block lists
|
||||
- Dynamic filter: Created with `create_dynamic_tool_filter()` for context-aware filtering
|
||||
- **`cache_tools_list`**: When `True`, caches the tool list after first discovery to improve performance on subsequent connections
|
||||
|
||||
## Key Features
|
||||
|
||||
- 🔄 **Automatic Tool Discovery**: Tools are automatically discovered and integrated
|
||||
@@ -152,26 +387,47 @@ mcps=[
|
||||
- 🛡️ **Error Resilience**: Graceful handling of unavailable servers
|
||||
- ⏱️ **Timeout Protection**: Built-in timeouts prevent hanging connections
|
||||
- 📊 **Transparent Integration**: Works seamlessly with existing CrewAI features
|
||||
- 🔧 **Full Transport Support**: Stdio, HTTP/Streamable HTTP, and SSE transports
|
||||
- 🎯 **Advanced Filtering**: Static and dynamic tool filtering capabilities
|
||||
- 🔐 **Flexible Authentication**: Support for headers, environment variables, and query parameters
|
||||
|
||||
## Error Handling
|
||||
|
||||
The MCP DSL integration is designed to be resilient:
|
||||
The MCP DSL integration is designed to be resilient and handles failures gracefully:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai.mcp import MCPServerStdio, MCPServerHTTP
|
||||
|
||||
agent = Agent(
|
||||
role="Resilient Agent",
|
||||
goal="Continue working despite server issues",
|
||||
backstory="Agent that handles failures gracefully",
|
||||
mcps=[
|
||||
# String references
|
||||
"https://reliable-server.com/mcp", # Will work
|
||||
"https://unreachable-server.com/mcp", # Will be skipped gracefully
|
||||
"https://slow-server.com/mcp", # Will timeout gracefully
|
||||
"crewai-amp:working-service" # Will work
|
||||
"crewai-amp:working-service", # Will work
|
||||
|
||||
# Structured configs
|
||||
MCPServerStdio(
|
||||
command="python",
|
||||
args=["reliable_server.py"], # Will work
|
||||
),
|
||||
MCPServerHTTP(
|
||||
url="https://slow-server.com/mcp", # Will timeout gracefully
|
||||
),
|
||||
]
|
||||
)
|
||||
# Agent will use tools from working servers and log warnings for failing ones
|
||||
```
|
||||
|
||||
All connection errors are handled gracefully:
|
||||
- **Connection failures**: Logged as warnings, agent continues with available tools
|
||||
- **Timeout errors**: Connections timeout after 30 seconds (configurable)
|
||||
- **Authentication errors**: Logged clearly for debugging
|
||||
- **Invalid configurations**: Validation errors are raised at agent creation time
|
||||
|
||||
## Advanced: MCPServerAdapter
|
||||
|
||||
For complex scenarios requiring manual connection management, use the `MCPServerAdapter` class from `crewai-tools`. Using a Python context manager (`with` statement) is the recommended approach as it automatically handles starting and stopping the connection to the MCP server.
|
||||
|
||||
@@ -93,11 +93,15 @@ After running the application, you can view the traces in [Datadog LLM Observabi
|
||||
|
||||
Clicking on a trace will show you the details of the trace, including total tokens used, number of LLM calls, models used, and estimated cost. Clicking into a specific span will narrow down these details, and show related input, output, and metadata.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-1.png" alt="Datadog LLM Observability Trace View" />
|
||||
</Frame>
|
||||
|
||||
Additionally, you can view the execution graph view of the trace, which shows the control and data flow of the trace, which will scale with larger agents to show handoffs and relationships between LLM calls, tool calls, and agent interactions.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-2.png" alt="Datadog LLM Observability Agent Execution Flow View" />
|
||||
</Frame>
|
||||
|
||||
## References
|
||||
|
||||
|
||||
@@ -733,9 +733,7 @@ Here's a basic configuration to route requests to OpenAI, specifically using GPT
|
||||
- Collect relevant metadata to filter logs
|
||||
- Enforce access permissions
|
||||
|
||||
Create API keys through:
|
||||
- [Portkey App](https://app.portkey.ai/)
|
||||
- [API Key Management API](/en/api-reference/admin-api/control-plane/api-keys/create-api-key)
|
||||
Create API keys through the [Portkey App](https://app.portkey.ai/)
|
||||
|
||||
Example using Python SDK:
|
||||
```python
|
||||
@@ -758,7 +756,7 @@ Here's a basic configuration to route requests to OpenAI, specifically using GPT
|
||||
)
|
||||
```
|
||||
|
||||
For detailed key management instructions, see our [API Keys documentation](/en/api-reference/admin-api/control-plane/api-keys/create-api-key).
|
||||
For detailed key management instructions, see the [Portkey documentation](https://portkey.ai/docs).
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Step 4: Deploy & Monitor">
|
||||
|
||||
@@ -18,7 +18,7 @@ These tools enable your agents to interact with cloud services, access cloud sto
|
||||
Write and upload files to Amazon S3 storage.
|
||||
</Card>
|
||||
|
||||
<Card title="Bedrock Invoke Agent" icon="aws" href="/en/tools/cloud-storage/bedrockinvokeagenttool">
|
||||
<Card title="Bedrock Invoke Agent" icon="aws" href="/en/tools/integration/bedrockinvokeagenttool">
|
||||
Invoke Amazon Bedrock agents for AI-powered tasks.
|
||||
</Card>
|
||||
|
||||
|
||||
@@ -23,13 +23,15 @@ Here's a minimal example of how to use the tool:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import QdrantVectorSearchTool
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Initialize the tool
|
||||
# Initialize the tool with QdrantConfig
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
)
|
||||
)
|
||||
|
||||
# Create an agent that uses the tool
|
||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
||||
def get_openai_embedding(text):
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model="text-embedding-3-small"
|
||||
model="text-embedding-3-large"
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||
# Extract text from PDF
|
||||
text_chunks = extract_text_from_pdf(pdf_path)
|
||||
|
||||
|
||||
# Create Qdrant collection
|
||||
if qdrant.collection_exists(collection_name):
|
||||
qdrant.delete_collection(collection_name)
|
||||
qdrant.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
||||
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||
)
|
||||
|
||||
# Store embeddings
|
||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||
|
||||
# Initialize Qdrant search tool
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
)
|
||||
)
|
||||
|
||||
# Create CrewAI agents
|
||||
search_agent = Agent(
|
||||
role="Senior Semantic Search Agent",
|
||||
goal="Find and analyze documents based on semantic search",
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
information using semantic search in a Qdrant database.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
||||
answer_agent = Agent(
|
||||
role="Senior Answer Assistant",
|
||||
goal="Generate answers to questions based on the context provided",
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
answers to questions based on the context provided.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -180,21 +186,82 @@ print(result)
|
||||
## Tool Parameters
|
||||
|
||||
### Required Parameters
|
||||
- `qdrant_url` (str): The URL of your Qdrant server
|
||||
- `qdrant_api_key` (str): API key for authentication with Qdrant
|
||||
- `collection_name` (str): Name of the Qdrant collection to search
|
||||
- `qdrant_config` (QdrantConfig): Configuration object containing all Qdrant settings
|
||||
|
||||
### Optional Parameters
|
||||
### QdrantConfig Parameters
|
||||
- `qdrant_url` (str): The URL of your Qdrant server
|
||||
- `qdrant_api_key` (str, optional): API key for authentication with Qdrant
|
||||
- `collection_name` (str): Name of the Qdrant collection to search
|
||||
- `limit` (int): Maximum number of results to return (default: 3)
|
||||
- `score_threshold` (float): Minimum similarity score threshold (default: 0.35)
|
||||
- `filter` (Any, optional): Qdrant Filter instance for advanced filtering (default: None)
|
||||
|
||||
### Optional Tool Parameters
|
||||
- `custom_embedding_fn` (Callable[[str], list[float]]): Custom function for text vectorization
|
||||
- `qdrant_package` (str): Base package path for Qdrant (default: "qdrant_client")
|
||||
- `client` (Any): Pre-initialized Qdrant client (optional)
|
||||
|
||||
## Advanced Filtering
|
||||
|
||||
The QdrantVectorSearchTool supports powerful filtering capabilities to refine your search results:
|
||||
|
||||
### Dynamic Filtering
|
||||
Use `filter_by` and `filter_value` parameters in your search to filter results on-the-fly:
|
||||
|
||||
```python
|
||||
# Agent will use these parameters when calling the tool
|
||||
# The tool schema accepts filter_by and filter_value
|
||||
# Example: search with category filter
|
||||
# Results will be filtered where category == "technology"
|
||||
```
|
||||
|
||||
### Preset Filters with QdrantConfig
|
||||
For complex filtering, use Qdrant Filter instances in your configuration:
|
||||
|
||||
```python
|
||||
from qdrant_client.http import models as qmodels
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Create a filter for specific conditions
|
||||
preset_filter = qmodels.Filter(
|
||||
must=[
|
||||
qmodels.FieldCondition(
|
||||
key="category",
|
||||
match=qmodels.MatchValue(value="research")
|
||||
),
|
||||
qmodels.FieldCondition(
|
||||
key="year",
|
||||
match=qmodels.MatchValue(value=2024)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Initialize tool with preset filter
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
filter=preset_filter # Preset filter applied to all searches
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Combining Filters
|
||||
The tool automatically combines preset filters from `QdrantConfig` with dynamic filters from `filter_by` and `filter_value`:
|
||||
|
||||
```python
|
||||
# If QdrantConfig has a preset filter for category="research"
|
||||
# And the search uses filter_by="year", filter_value=2024
|
||||
# Both filters will be combined (AND logic)
|
||||
```
|
||||
|
||||
## Search Parameters
|
||||
|
||||
The tool accepts these parameters in its schema:
|
||||
- `query` (str): The search query to find similar documents
|
||||
- `filter_by` (str, optional): Metadata field to filter on
|
||||
- `filter_value` (str, optional): Value to filter by
|
||||
- `filter_value` (Any, optional): Value to filter by
|
||||
|
||||
## Return Format
|
||||
|
||||
@@ -214,7 +281,7 @@ The tool returns results in JSON format:
|
||||
|
||||
## Default Embedding
|
||||
|
||||
By default, the tool uses OpenAI's `text-embedding-3-small` model for vectorization. This requires:
|
||||
By default, the tool uses OpenAI's `text-embedding-3-large` model for vectorization. This requires:
|
||||
- OpenAI API key set in environment: `OPENAI_API_KEY`
|
||||
|
||||
## Custom Embeddings
|
||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
||||
# Tokenize and get model outputs
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||
outputs = model(**inputs)
|
||||
|
||||
|
||||
# Use mean pooling to get text embedding
|
||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||
|
||||
|
||||
# Convert to list of floats and return
|
||||
return embeddings[0].tolist()
|
||||
|
||||
# Use custom embeddings with the tool
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection"
|
||||
),
|
||||
custom_embedding_fn=custom_embeddings # Pass your custom function
|
||||
)
|
||||
```
|
||||
@@ -269,4 +340,4 @@ Required environment variables:
|
||||
```bash
|
||||
export QDRANT_URL="your_qdrant_url" # If not provided in constructor
|
||||
export QDRANT_API_KEY="your_api_key" # If not provided in constructor
|
||||
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
||||
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
||||
|
||||
@@ -54,25 +54,25 @@ The following parameters can be used to customize the `CSVSearchTool`'s behavior
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = CSVSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -46,23 +46,25 @@ tool = DirectorySearchTool(directory='/path/to/directory')
|
||||
The DirectorySearchTool uses OpenAI for embeddings and summarization by default. Customization options for these settings include changing the model provider and configuration, enhancing flexibility for advanced users.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DirectorySearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # Options include ollama, google, anthropic, llama2, and more
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Additional configurations here
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -56,25 +56,25 @@ The following parameters can be used to customize the `DOCXSearchTool`'s behavio
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DOCXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -48,27 +48,25 @@ tool = MDXSearchTool(mdx='path/to/your/document.mdx')
|
||||
The tool defaults to using OpenAI for embeddings and summarization. For customization, utilize a configuration dictionary as shown below:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = MDXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # Options include google, openai, anthropic, llama2, etc.
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Optional parameters can be included here.
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# Optional title for the embeddings can be added here.
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -45,28 +45,64 @@ tool = PDFSearchTool(pdf='path/to/your/document.pdf')
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows. Note: a vector database is required because generated embeddings must be stored and queried from a vectordb.
|
||||
|
||||
```python Code
|
||||
from crewai_tools import PDFSearchTool
|
||||
|
||||
# - embedding_model (required): choose provider + provider-specific config
|
||||
# - vectordb (required): choose vector DB and pass its config
|
||||
|
||||
tool = PDFSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
# Supported providers: "openai", "azure", "google-generativeai", "google-vertex",
|
||||
# "voyageai", "cohere", "huggingface", "jina", "sentence-transformer",
|
||||
# "text2vec", "ollama", "openclip", "instructor", "onnx", "roboflow", "watsonx", "custom"
|
||||
"provider": "openai", # or: "google-generativeai", "cohere", "ollama", ...
|
||||
"config": {
|
||||
# Model identifier for the chosen provider. "model" will be auto-mapped to "model_name" internally.
|
||||
"model": "text-embedding-3-small",
|
||||
# Optional: API key. If omitted, the tool will use provider-specific env vars when available
|
||||
# (e.g., OPENAI_API_KEY for provider="openai").
|
||||
# "api_key": "sk-...",
|
||||
|
||||
# Provider-specific examples:
|
||||
# --- Google Generative AI ---
|
||||
# (Set provider="google-generativeai" above)
|
||||
# "model": "models/embedding-001",
|
||||
# "task_type": "retrieval_document",
|
||||
# "title": "Embeddings",
|
||||
|
||||
# --- Cohere ---
|
||||
# (Set provider="cohere" above)
|
||||
# "model": "embed-english-v3.0",
|
||||
|
||||
# --- Ollama (local) ---
|
||||
# (Set provider="ollama" above)
|
||||
# "model": "nomic-embed-text",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# For ChromaDB: pass "settings" (chromadb.config.Settings) or rely on defaults.
|
||||
# Example (uncomment and import):
|
||||
# from chromadb.config import Settings
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# For Qdrant: pass "vectors_config" (qdrant_client.models.VectorParams).
|
||||
# Example (uncomment and import):
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Note: collection name is controlled by the tool (default: "rag_tool_collection"), not set here.
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -57,25 +57,41 @@ By default, the tool uses OpenAI for both embeddings and summarization.
|
||||
To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = TXTSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# Required: embeddings provider + config
|
||||
"embedding_model": {
|
||||
"provider": "openai", # or google-generativeai, cohere, ollama, ...
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...", # optional if env var is set
|
||||
# Provider examples:
|
||||
# Google → model: "models/embedding-001", task_type: "retrieval_document"
|
||||
# Cohere → model: "embed-english-v3.0"
|
||||
# Ollama → model: "nomic-embed-text"
|
||||
},
|
||||
},
|
||||
|
||||
# Required: vector database config
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# Chroma settings (optional persistence)
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Qdrant vector params example:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Note: collection name is controlled by the tool (default: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -54,25 +54,25 @@ It is an optional parameter during the tool's initialization but must be provide
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = XMLSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -632,11 +632,11 @@ mode: "wide"
|
||||
|
||||
## 기여
|
||||
|
||||
기여를 원하시면, [기여 가이드](CONTRIBUTING.md)를 참조하세요.
|
||||
기여를 원하시면, [기여 가이드](https://github.com/crewAIInc/crewAI/blob/main/CONTRIBUTING.md)를 참조하세요.
|
||||
|
||||
## 라이센스
|
||||
|
||||
이 프로젝트는 MIT 라이센스 하에 배포됩니다. 자세한 내용은 [LICENSE](LICENSE) 파일을 확인하세요.
|
||||
이 프로젝트는 MIT 라이센스 하에 배포됩니다. 자세한 내용은 [LICENSE](https://github.com/crewAIInc/crewAI/blob/main/LICENSE) 파일을 확인하세요.
|
||||
</Update>
|
||||
|
||||
<Update label="2025년 5월 22일">
|
||||
|
||||
@@ -706,7 +706,7 @@ class KnowledgeMonitorListener(BaseEventListener):
|
||||
knowledge_monitor = KnowledgeMonitorListener()
|
||||
```
|
||||
|
||||
이벤트 사용에 대한 자세한 내용은 [이벤트 리스너](https://docs.crewai.com/concepts/event-listener) 문서를 참고하세요.
|
||||
이벤트 사용에 대한 자세한 내용은 [이벤트 리스너](/ko/concepts/event-listener) 문서를 참고하세요.
|
||||
|
||||
### 맞춤형 지식 소스
|
||||
|
||||
|
||||
@@ -748,7 +748,7 @@ CrewAI는 LLM의 스트리밍 응답을 지원하여, 애플리케이션이 출
|
||||
```
|
||||
|
||||
<Tip>
|
||||
[자세한 내용은 여기를 클릭하세요](https://docs.crewai.com/concepts/event-listener#event-listeners)
|
||||
[자세한 내용은 여기를 클릭하세요](/ko/concepts/event-listener#event-listeners)
|
||||
</Tip>
|
||||
</Tab>
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ mode: "wide"
|
||||
<Card title="도구 & 통합" href="/ko/enterprise/features/tools-and-integrations" icon="wrench">
|
||||
에이전트가 사용할 외부 앱 연결 및 내부 도구 관리.
|
||||
</Card>
|
||||
<Card title="도구 저장소" href="/ko/enterprise/features/tool-repository" icon="toolbox">
|
||||
<Card title="도구 저장소" href="/ko/enterprise/guides/tool-repository" icon="toolbox">
|
||||
크루 기능을 확장할 수 있도록 도구를 게시하고 설치.
|
||||
</Card>
|
||||
<Card title="에이전트 저장소" href="/ko/enterprise/features/agent-repositories" icon="people-group">
|
||||
|
||||
@@ -231,7 +231,7 @@ mode: "wide"
|
||||
## 관련 문서
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="도구 저장소" href="/ko/enterprise/features/tool-repository" icon="toolbox">
|
||||
<Card title="도구 저장소" href="/ko/enterprise/guides/tool-repository" icon="toolbox">
|
||||
크루 기능을 확장할 수 있도록 도구를 게시하고 설치하세요.
|
||||
</Card>
|
||||
<Card title="Webhook 자동화" href="/ko/enterprise/guides/webhook-automation" icon="bolt">
|
||||
|
||||
@@ -21,7 +21,7 @@ Tool Repository는 CrewAI 도구를 위한 패키지 관리자입니다. 사용
|
||||
Tool Repository를 사용하기 전에 다음이 준비되어 있어야 합니다:
|
||||
|
||||
- [CrewAI AMP](https://app.crewai.com) 계정
|
||||
- [CrewAI CLI](https://docs.crewai.com/concepts/cli#cli) 설치됨
|
||||
- [CrewAI CLI](/ko/concepts/cli#cli) 설치됨
|
||||
- uv>=0.5.0 이 설치되어 있어야 합니다. [업그레이드 방법](https://docs.astral.sh/uv/getting-started/installation/#upgrading-uv)을 참고하세요.
|
||||
- [Git](https://git-scm.com) 설치 및 구성 완료
|
||||
- CrewAI AMP 조직에서 도구를 게시하거나 설치할 수 있는 액세스 권한
|
||||
@@ -66,7 +66,7 @@ crewai tool publish
|
||||
crewai tool publish --public
|
||||
```
|
||||
|
||||
도구 빌드에 대한 자세한 내용은 [나만의 도구 만들기](https://docs.crewai.com/concepts/tools#creating-your-own-tools)를 참고하세요.
|
||||
도구 빌드에 대한 자세한 내용은 [나만의 도구 만들기](/ko/concepts/tools#creating-your-own-tools)를 참고하세요.
|
||||
|
||||
## 도구 업데이트
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ mode: "wide"
|
||||
|
||||
에이전트 실행에 인간 입력을 통합하려면 작업 정의에서 `human_input` 플래그를 설정하세요. 활성화하면, 에이전트가 최종 답변을 제공하기 전에 사용자에게 입력을 요청합니다. 이 입력은 추가 맥락을 제공하거나, 애매함을 해소하거나, 에이전트의 출력을 검증해야 할 때 활용될 수 있습니다.
|
||||
|
||||
자세한 구현 방법은 [Human-in-the-Loop 가이드](/ko/how-to/human-in-the-loop)를 참고해 주세요.
|
||||
자세한 구현 방법은 [Human-in-the-Loop 가이드](/ko/enterprise/guides/human-in-the-loop)를 참고해 주세요.
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="CrewAI에서 에이전트의 행동과 역량을 맞춤화하고 향상시키기 위한 고급 커스터마이징 옵션에는 어떤 것이 있나요?">
|
||||
@@ -142,7 +142,7 @@ mode: "wide"
|
||||
<Accordion title="CrewAI 에이전트를 위한 커스텀 도구는 어떻게 만들 수 있습니까?">
|
||||
CrewAI에서 제공하는 `BaseTool` 클래스를 상속받아 커스텀 도구를 직접 만들거나, tool 데코레이터를 활용할 수 있습니다. 상속 방식은 `BaseTool`을 상속하는 새로운 클래스를 정의해 이름, 설명, 그리고 실제 논리를 처리하는 `_run` 메서드를 작성합니다. tool 데코레이터를 사용하면 필수 속성과 운영 로직만 정의해 바로 `Tool` 객체를 만들 수 있습니다.
|
||||
|
||||
<Card href="https://docs.crewai.com/how-to/create-custom-tools" icon="code">CrewAI 도구 가이드</Card>
|
||||
<Card href="/ko/learn/create-custom-tools" icon="code">CrewAI 도구 가이드</Card>
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="전체 crew가 수행할 수 있는 분당 최대 요청 수는 어떻게 제한할 수 있나요?">
|
||||
|
||||
379
docs/ko/learn/execution-hooks.mdx
Normal file
379
docs/ko/learn/execution-hooks.mdx
Normal file
@@ -0,0 +1,379 @@
|
||||
---
|
||||
title: 실행 훅 개요
|
||||
description: 에이전트 작업에 대한 세밀한 제어를 위한 CrewAI 실행 훅 이해 및 사용
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
실행 훅(Execution Hooks)은 CrewAI 에이전트의 런타임 동작을 세밀하게 제어할 수 있게 해줍니다. 크루 실행 전후에 실행되는 킥오프 훅과 달리, 실행 훅은 에이전트 실행 중 특정 작업을 가로채서 동작을 수정하고, 안전성 검사를 구현하며, 포괄적인 모니터링을 추가할 수 있습니다.
|
||||
|
||||
## 실행 훅의 유형
|
||||
|
||||
CrewAI는 두 가지 주요 범주의 실행 훅을 제공합니다:
|
||||
|
||||
### 1. [LLM 호출 훅](/learn/llm-hooks)
|
||||
|
||||
언어 모델 상호작용을 제어하고 모니터링합니다:
|
||||
- **LLM 호출 전**: 프롬프트 수정, 입력 검증, 승인 게이트 구현
|
||||
- **LLM 호출 후**: 응답 변환, 출력 정제, 대화 기록 업데이트
|
||||
|
||||
**사용 사례:**
|
||||
- 반복 제한
|
||||
- 비용 추적 및 토큰 사용량 모니터링
|
||||
- 응답 정제 및 콘텐츠 필터링
|
||||
- LLM 호출에 대한 사람의 승인
|
||||
- 안전 가이드라인 또는 컨텍스트 추가
|
||||
- 디버그 로깅 및 요청/응답 검사
|
||||
|
||||
[LLM 훅 문서 보기 →](/learn/llm-hooks)
|
||||
|
||||
### 2. [도구 호출 훅](/learn/tool-hooks)
|
||||
|
||||
도구 실행을 제어하고 모니터링합니다:
|
||||
- **도구 호출 전**: 입력 수정, 매개변수 검증, 위험한 작업 차단
|
||||
- **도구 호출 후**: 결과 변환, 출력 정제, 실행 세부사항 로깅
|
||||
|
||||
**사용 사례:**
|
||||
- 파괴적인 작업에 대한 안전 가드레일
|
||||
- 민감한 작업에 대한 사람의 승인
|
||||
- 입력 검증 및 정제
|
||||
- 결과 캐싱 및 속도 제한
|
||||
- 도구 사용 분석
|
||||
- 디버그 로깅 및 모니터링
|
||||
|
||||
[도구 훅 문서 보기 →](/learn/tool-hooks)
|
||||
|
||||
## 훅 등록 방법
|
||||
|
||||
### 1. 데코레이터 기반 훅 (권장)
|
||||
|
||||
훅을 등록하는 가장 깔끔하고 파이썬스러운 방법:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_llm_call, after_llm_call, before_tool_call, after_tool_call
|
||||
|
||||
@before_llm_call
|
||||
def limit_iterations(context):
|
||||
"""반복 횟수를 제한하여 무한 루프를 방지합니다."""
|
||||
if context.iterations > 10:
|
||||
return False # 실행 차단
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def sanitize_response(context):
|
||||
"""LLM 응답에서 민감한 데이터를 제거합니다."""
|
||||
if "API_KEY" in context.response:
|
||||
return context.response.replace("API_KEY", "[수정됨]")
|
||||
return None
|
||||
|
||||
@before_tool_call
|
||||
def block_dangerous_tools(context):
|
||||
"""파괴적인 작업을 차단합니다."""
|
||||
if context.tool_name == "delete_database":
|
||||
return False # 실행 차단
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def log_tool_result(context):
|
||||
"""도구 실행을 로깅합니다."""
|
||||
print(f"도구 {context.tool_name} 완료")
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. 크루 범위 훅
|
||||
|
||||
특정 크루 인스턴스에만 훅을 적용합니다:
|
||||
|
||||
```python
|
||||
from crewai import CrewBase
|
||||
from crewai.project import crew
|
||||
from crewai.hooks import before_llm_call_crew, after_tool_call_crew
|
||||
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_llm_call_crew
|
||||
def validate_inputs(self, context):
|
||||
# 이 크루에만 적용됩니다
|
||||
print(f"{self.__class__.__name__}에서 LLM 호출")
|
||||
return None
|
||||
|
||||
@after_tool_call_crew
|
||||
def log_results(self, context):
|
||||
# 크루별 로깅
|
||||
print(f"도구 결과: {context.tool_result[:50]}...")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential
|
||||
)
|
||||
```
|
||||
|
||||
## 훅 실행 흐름
|
||||
|
||||
### LLM 호출 흐름
|
||||
|
||||
```
|
||||
에이전트가 LLM을 호출해야 함
|
||||
↓
|
||||
[LLM 호출 전 훅 실행]
|
||||
├→ 훅 1: 반복 횟수 검증
|
||||
├→ 훅 2: 안전 컨텍스트 추가
|
||||
└→ 훅 3: 요청 로깅
|
||||
↓
|
||||
훅이 False를 반환하는 경우:
|
||||
├→ LLM 호출 차단
|
||||
└→ ValueError 발생
|
||||
↓
|
||||
모든 훅이 True/None을 반환하는 경우:
|
||||
├→ LLM 호출 진행
|
||||
└→ 응답 생성
|
||||
↓
|
||||
[LLM 호출 후 훅 실행]
|
||||
├→ 훅 1: 응답 정제
|
||||
├→ 훅 2: 응답 로깅
|
||||
└→ 훅 3: 메트릭 업데이트
|
||||
↓
|
||||
최종 응답 반환
|
||||
```
|
||||
|
||||
### 도구 호출 흐름
|
||||
|
||||
```
|
||||
에이전트가 도구를 실행해야 함
|
||||
↓
|
||||
[도구 호출 전 훅 실행]
|
||||
├→ 훅 1: 도구 허용 여부 확인
|
||||
├→ 훅 2: 입력 검증
|
||||
└→ 훅 3: 필요시 승인 요청
|
||||
↓
|
||||
훅이 False를 반환하는 경우:
|
||||
├→ 도구 실행 차단
|
||||
└→ 오류 메시지 반환
|
||||
↓
|
||||
모든 훅이 True/None을 반환하는 경우:
|
||||
├→ 도구 실행 진행
|
||||
└→ 결과 생성
|
||||
↓
|
||||
[도구 호출 후 훅 실행]
|
||||
├→ 훅 1: 결과 정제
|
||||
├→ 훅 2: 결과 캐싱
|
||||
└→ 훅 3: 메트릭 로깅
|
||||
↓
|
||||
최종 결과 반환
|
||||
```
|
||||
|
||||
## 훅 컨텍스트 객체
|
||||
|
||||
### LLMCallHookContext
|
||||
|
||||
LLM 실행 상태에 대한 액세스를 제공합니다:
|
||||
|
||||
```python
|
||||
class LLMCallHookContext:
|
||||
executor: CrewAgentExecutor # 전체 실행자 액세스
|
||||
messages: list # 변경 가능한 메시지 목록
|
||||
agent: Agent # 현재 에이전트
|
||||
task: Task # 현재 작업
|
||||
crew: Crew # 크루 인스턴스
|
||||
llm: BaseLLM # LLM 인스턴스
|
||||
iterations: int # 현재 반복 횟수
|
||||
response: str | None # LLM 응답 (후 훅용)
|
||||
```
|
||||
|
||||
### ToolCallHookContext
|
||||
|
||||
도구 실행 상태에 대한 액세스를 제공합니다:
|
||||
|
||||
```python
|
||||
class ToolCallHookContext:
|
||||
tool_name: str # 호출되는 도구
|
||||
tool_input: dict # 변경 가능한 입력 매개변수
|
||||
tool: CrewStructuredTool # 도구 인스턴스
|
||||
agent: Agent | None # 실행 중인 에이전트
|
||||
task: Task | None # 현재 작업
|
||||
crew: Crew | None # 크루 인스턴스
|
||||
tool_result: str | None # 도구 결과 (후 훅용)
|
||||
```
|
||||
|
||||
## 일반적인 패턴
|
||||
|
||||
### 안전 및 검증
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safety_check(context):
|
||||
"""파괴적인 작업을 차단합니다."""
|
||||
dangerous = ['delete_file', 'drop_table', 'system_shutdown']
|
||||
if context.tool_name in dangerous:
|
||||
print(f"🛑 차단됨: {context.tool_name}")
|
||||
return False
|
||||
return None
|
||||
|
||||
@before_llm_call
|
||||
def iteration_limit(context):
|
||||
"""무한 루프를 방지합니다."""
|
||||
if context.iterations > 15:
|
||||
print("⛔ 최대 반복 횟수 초과")
|
||||
return False
|
||||
return None
|
||||
```
|
||||
|
||||
### 사람의 개입
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def require_approval(context):
|
||||
"""민감한 작업에 대한 승인을 요구합니다."""
|
||||
sensitive = ['send_email', 'make_payment', 'post_message']
|
||||
|
||||
if context.tool_name in sensitive:
|
||||
response = context.request_human_input(
|
||||
prompt=f"{context.tool_name} 승인하시겠습니까?",
|
||||
default_message="승인하려면 'yes'를 입력하세요:"
|
||||
)
|
||||
|
||||
if response.lower() != 'yes':
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 모니터링 및 분석
|
||||
|
||||
```python
|
||||
from collections import defaultdict
|
||||
import time
|
||||
|
||||
metrics = defaultdict(lambda: {'count': 0, 'total_time': 0})
|
||||
|
||||
@before_tool_call
|
||||
def start_timer(context):
|
||||
context.tool_input['_start'] = time.time()
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def track_metrics(context):
|
||||
start = context.tool_input.get('_start', time.time())
|
||||
duration = time.time() - start
|
||||
|
||||
metrics[context.tool_name]['count'] += 1
|
||||
metrics[context.tool_name]['total_time'] += duration
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
## 훅 관리
|
||||
|
||||
### 모든 훅 지우기
|
||||
|
||||
```python
|
||||
from crewai.hooks import clear_all_global_hooks
|
||||
|
||||
# 모든 훅을 한 번에 지웁니다
|
||||
result = clear_all_global_hooks()
|
||||
print(f"{result['total']} 훅이 지워졌습니다")
|
||||
```
|
||||
|
||||
### 특정 훅 유형 지우기
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_llm_call_hooks,
|
||||
clear_after_llm_call_hooks,
|
||||
clear_before_tool_call_hooks,
|
||||
clear_after_tool_call_hooks
|
||||
)
|
||||
|
||||
# 특정 유형 지우기
|
||||
llm_before_count = clear_before_llm_call_hooks()
|
||||
tool_after_count = clear_after_tool_call_hooks()
|
||||
```
|
||||
|
||||
## 모범 사례
|
||||
|
||||
### 1. 훅을 집중적으로 유지
|
||||
각 훅은 단일하고 명확한 책임을 가져야 합니다.
|
||||
|
||||
### 2. 오류를 우아하게 처리
|
||||
```python
|
||||
@before_llm_call
|
||||
def safe_hook(context):
|
||||
try:
|
||||
if some_condition:
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"훅 오류: {e}")
|
||||
return None # 오류에도 불구하고 실행 허용
|
||||
```
|
||||
|
||||
### 3. 컨텍스트를 제자리에서 수정
|
||||
```python
|
||||
# ✅ 올바름 - 제자리에서 수정
|
||||
@before_llm_call
|
||||
def add_context(context):
|
||||
context.messages.append({"role": "system", "content": "간결하게"})
|
||||
|
||||
# ❌ 잘못됨 - 참조를 교체
|
||||
@before_llm_call
|
||||
def wrong_approach(context):
|
||||
context.messages = [{"role": "system", "content": "간결하게"}]
|
||||
```
|
||||
|
||||
### 4. 타입 힌트 사용
|
||||
```python
|
||||
from crewai.hooks import LLMCallHookContext, ToolCallHookContext
|
||||
|
||||
def my_llm_hook(context: LLMCallHookContext) -> bool | None:
|
||||
return None
|
||||
|
||||
def my_tool_hook(context: ToolCallHookContext) -> str | None:
|
||||
return None
|
||||
```
|
||||
|
||||
### 5. 테스트에서 정리
|
||||
```python
|
||||
import pytest
|
||||
from crewai.hooks import clear_all_global_hooks
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_hooks():
|
||||
"""각 테스트 전에 훅을 재설정합니다."""
|
||||
yield
|
||||
clear_all_global_hooks()
|
||||
```
|
||||
|
||||
## 어떤 훅을 사용해야 할까요
|
||||
|
||||
### LLM 훅을 사용하는 경우:
|
||||
- 반복 제한 구현
|
||||
- 프롬프트에 컨텍스트 또는 안전 가이드라인 추가
|
||||
- 토큰 사용량 및 비용 추적
|
||||
- 응답 정제 또는 변환
|
||||
- LLM 호출에 대한 승인 게이트 구현
|
||||
- 프롬프트/응답 상호작용 디버깅
|
||||
|
||||
### 도구 훅을 사용하는 경우:
|
||||
- 위험하거나 파괴적인 작업 차단
|
||||
- 실행 전 도구 입력 검증
|
||||
- 민감한 작업에 대한 승인 게이트 구현
|
||||
- 도구 결과 캐싱
|
||||
- 도구 사용 및 성능 추적
|
||||
- 도구 출력 정제
|
||||
- 도구 호출 속도 제한
|
||||
|
||||
### 둘 다 사용하는 경우:
|
||||
모든 에이전트 작업을 모니터링해야 하는 포괄적인 관찰성, 안전 또는 승인 시스템을 구축하는 경우.
|
||||
|
||||
## 관련 문서
|
||||
|
||||
- [LLM 호출 훅 →](/learn/llm-hooks) - 상세한 LLM 훅 문서
|
||||
- [도구 호출 훅 →](/learn/tool-hooks) - 상세한 도구 훅 문서
|
||||
- [킥오프 전후 훅 →](/learn/before-and-after-kickoff-hooks) - 크루 생명주기 훅
|
||||
- [사람의 개입 →](/learn/human-in-the-loop) - 사람 입력 패턴
|
||||
|
||||
## 결론
|
||||
|
||||
실행 훅은 에이전트 런타임 동작에 대한 강력한 제어를 제공합니다. 이를 사용하여 안전 가드레일, 승인 워크플로우, 포괄적인 모니터링 및 사용자 정의 비즈니스 로직을 구현하세요. 적절한 오류 처리, 타입 안전성 및 성능 고려사항과 결합하면, 훅을 통해 프로덕션 준비가 된 안전하고 관찰 가능한 에이전트 시스템을 구축할 수 있습니다.
|
||||
@@ -95,7 +95,7 @@ project_crew = Crew(
|
||||
```
|
||||
|
||||
<Tip>
|
||||
매니저 에이전트 생성 및 맞춤화에 대한 자세한 내용은 [커스텀 매니저 에이전트 문서](https://docs.crewai.com/how-to/custom-manager-agent#custom-manager-agent)를 참고하세요.
|
||||
매니저 에이전트 생성 및 맞춤화에 대한 자세한 내용은 [커스텀 매니저 에이전트 문서](/ko/learn/custom-manager-agent)를 참고하세요.
|
||||
</Tip>
|
||||
|
||||
### 워크플로우 실행
|
||||
|
||||
412
docs/ko/learn/llm-hooks.mdx
Normal file
412
docs/ko/learn/llm-hooks.mdx
Normal file
@@ -0,0 +1,412 @@
|
||||
---
|
||||
title: LLM 호출 훅
|
||||
description: CrewAI에서 언어 모델 상호작용을 가로채고, 수정하고, 제어하는 LLM 호출 훅 사용 방법 배우기
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
LLM 호출 훅(LLM Call Hooks)은 에이전트 실행 중 언어 모델 상호작용에 대한 세밀한 제어를 제공합니다. 이러한 훅을 사용하면 LLM 호출을 가로채고, 프롬프트를 수정하고, 응답을 변환하고, 승인 게이트를 구현하고, 사용자 정의 로깅 또는 모니터링을 추가할 수 있습니다.
|
||||
|
||||
## 개요
|
||||
|
||||
LLM 훅은 두 가지 중요한 시점에 실행됩니다:
|
||||
- **LLM 호출 전**: 메시지 수정, 입력 검증 또는 실행 차단
|
||||
- **LLM 호출 후**: 응답 변환, 출력 정제 또는 대화 기록 수정
|
||||
|
||||
## 훅 타입
|
||||
|
||||
### LLM 호출 전 훅
|
||||
|
||||
모든 LLM 호출 전에 실행되며, 다음을 수행할 수 있습니다:
|
||||
- LLM에 전송되는 메시지 검사 및 수정
|
||||
- 조건에 따라 LLM 실행 차단
|
||||
- 속도 제한 또는 승인 게이트 구현
|
||||
- 컨텍스트 또는 시스템 메시지 추가
|
||||
- 요청 세부사항 로깅
|
||||
|
||||
**시그니처:**
|
||||
```python
|
||||
def before_hook(context: LLMCallHookContext) -> bool | None:
|
||||
# 실행을 차단하려면 False 반환
|
||||
# 실행을 허용하려면 True 또는 None 반환
|
||||
...
|
||||
```
|
||||
|
||||
### LLM 호출 후 훅
|
||||
|
||||
모든 LLM 호출 후에 실행되며, 다음을 수행할 수 있습니다:
|
||||
- LLM 응답 수정 또는 정제
|
||||
- 메타데이터 또는 서식 추가
|
||||
- 응답 세부사항 로깅
|
||||
- 대화 기록 업데이트
|
||||
- 콘텐츠 필터링 구현
|
||||
|
||||
**시그니처:**
|
||||
```python
|
||||
def after_hook(context: LLMCallHookContext) -> str | None:
|
||||
# 수정된 응답 문자열 반환
|
||||
# 원본 응답을 유지하려면 None 반환
|
||||
...
|
||||
```
|
||||
|
||||
## LLM 훅 컨텍스트
|
||||
|
||||
`LLMCallHookContext` 객체는 실행 상태에 대한 포괄적인 액세스를 제공합니다:
|
||||
|
||||
```python
|
||||
class LLMCallHookContext:
|
||||
executor: CrewAgentExecutor # 전체 실행자 참조
|
||||
messages: list # 변경 가능한 메시지 목록
|
||||
agent: Agent # 현재 에이전트
|
||||
task: Task # 현재 작업
|
||||
crew: Crew # 크루 인스턴스
|
||||
llm: BaseLLM # LLM 인스턴스
|
||||
iterations: int # 현재 반복 횟수
|
||||
response: str | None # LLM 응답 (후 훅용)
|
||||
```
|
||||
|
||||
### 메시지 수정
|
||||
|
||||
**중요:** 항상 메시지를 제자리에서 수정하세요:
|
||||
|
||||
```python
|
||||
# ✅ 올바름 - 제자리에서 수정
|
||||
def add_context(context: LLMCallHookContext) -> None:
|
||||
context.messages.append({"role": "system", "content": "간결하게 작성하세요"})
|
||||
|
||||
# ❌ 잘못됨 - 리스트 참조를 교체
|
||||
def wrong_approach(context: LLMCallHookContext) -> None:
|
||||
context.messages = [{"role": "system", "content": "간결하게 작성하세요"}]
|
||||
```
|
||||
|
||||
## 등록 방법
|
||||
|
||||
### 1. 데코레이터 기반 등록 (권장)
|
||||
|
||||
더 깔끔한 구문을 위해 데코레이터를 사용합니다:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_llm_call, after_llm_call
|
||||
|
||||
@before_llm_call
|
||||
def validate_iteration_count(context):
|
||||
"""반복 횟수를 검증합니다."""
|
||||
if context.iterations > 10:
|
||||
print("⚠️ 최대 반복 횟수 초과")
|
||||
return False # 실행 차단
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def sanitize_response(context):
|
||||
"""민감한 데이터를 제거합니다."""
|
||||
if context.response and "API_KEY" in context.response:
|
||||
return context.response.replace("API_KEY", "[수정됨]")
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. 크루 범위 훅
|
||||
|
||||
특정 크루 인스턴스에 대한 훅을 등록합니다:
|
||||
|
||||
```python
|
||||
from crewai import CrewBase
|
||||
from crewai.project import crew
|
||||
from crewai.hooks import before_llm_call_crew, after_llm_call_crew
|
||||
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_llm_call_crew
|
||||
def validate_inputs(self, context):
|
||||
# 이 크루에만 적용됩니다
|
||||
if context.iterations == 0:
|
||||
print(f"작업 시작: {context.task.description}")
|
||||
return None
|
||||
|
||||
@after_llm_call_crew
|
||||
def log_responses(self, context):
|
||||
# 크루별 응답 로깅
|
||||
print(f"응답 길이: {len(context.response)}")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
```
|
||||
|
||||
## 일반적인 사용 사례
|
||||
|
||||
### 1. 반복 제한
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def limit_iterations(context: LLMCallHookContext) -> bool | None:
|
||||
"""무한 루프를 방지하기 위해 반복을 제한합니다."""
|
||||
max_iterations = 15
|
||||
if context.iterations > max_iterations:
|
||||
print(f"⛔ 차단됨: {max_iterations}회 반복 초과")
|
||||
return False # 실행 차단
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. 사람의 승인 게이트
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def require_approval(context: LLMCallHookContext) -> bool | None:
|
||||
"""특정 반복 후 승인을 요구합니다."""
|
||||
if context.iterations > 5:
|
||||
response = context.request_human_input(
|
||||
prompt=f"반복 {context.iterations}: LLM 호출을 승인하시겠습니까?",
|
||||
default_message="승인하려면 Enter를 누르고, 차단하려면 'no'를 입력하세요:"
|
||||
)
|
||||
if response.lower() == "no":
|
||||
print("🚫 사용자에 의해 LLM 호출이 차단되었습니다")
|
||||
return False
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. 시스템 컨텍스트 추가
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def add_guardrails(context: LLMCallHookContext) -> None:
|
||||
"""모든 LLM 호출에 안전 가이드라인을 추가합니다."""
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "응답이 사실에 기반하고 가능한 경우 출처를 인용하도록 하세요."
|
||||
})
|
||||
return None
|
||||
```
|
||||
|
||||
### 4. 응답 정제
|
||||
|
||||
```python
|
||||
@after_llm_call
|
||||
def sanitize_sensitive_data(context: LLMCallHookContext) -> str | None:
|
||||
"""민감한 데이터 패턴을 제거합니다."""
|
||||
if not context.response:
|
||||
return None
|
||||
|
||||
import re
|
||||
sanitized = context.response
|
||||
sanitized = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[주민번호-수정됨]', sanitized)
|
||||
sanitized = re.sub(r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b', '[카드번호-수정됨]', sanitized)
|
||||
|
||||
return sanitized
|
||||
```
|
||||
|
||||
### 5. 비용 추적
|
||||
|
||||
```python
|
||||
import tiktoken
|
||||
|
||||
@before_llm_call
|
||||
def track_token_usage(context: LLMCallHookContext) -> None:
|
||||
"""입력 토큰을 추적합니다."""
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
total_tokens = sum(
|
||||
len(encoding.encode(msg.get("content", "")))
|
||||
for msg in context.messages
|
||||
)
|
||||
print(f"📊 입력 토큰: ~{total_tokens}")
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def track_response_tokens(context: LLMCallHookContext) -> None:
|
||||
"""응답 토큰을 추적합니다."""
|
||||
if context.response:
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
tokens = len(encoding.encode(context.response))
|
||||
print(f"📊 응답 토큰: ~{tokens}")
|
||||
return None
|
||||
```
|
||||
|
||||
### 6. 디버그 로깅
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def debug_request(context: LLMCallHookContext) -> None:
|
||||
"""LLM 요청을 디버그합니다."""
|
||||
print(f"""
|
||||
🔍 LLM 호출 디버그:
|
||||
- 에이전트: {context.agent.role}
|
||||
- 작업: {context.task.description[:50]}...
|
||||
- 반복: {context.iterations}
|
||||
- 메시지 수: {len(context.messages)}
|
||||
- 마지막 메시지: {context.messages[-1] if context.messages else 'None'}
|
||||
""")
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def debug_response(context: LLMCallHookContext) -> None:
|
||||
"""LLM 응답을 디버그합니다."""
|
||||
if context.response:
|
||||
print(f"✅ 응답 미리보기: {context.response[:100]}...")
|
||||
return None
|
||||
```
|
||||
|
||||
## 훅 관리
|
||||
|
||||
### 훅 등록 해제
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
unregister_before_llm_call_hook,
|
||||
unregister_after_llm_call_hook
|
||||
)
|
||||
|
||||
# 특정 훅 등록 해제
|
||||
def my_hook(context):
|
||||
...
|
||||
|
||||
register_before_llm_call_hook(my_hook)
|
||||
# 나중에...
|
||||
unregister_before_llm_call_hook(my_hook) # 찾으면 True 반환
|
||||
```
|
||||
|
||||
### 훅 지우기
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_llm_call_hooks,
|
||||
clear_after_llm_call_hooks,
|
||||
clear_all_llm_call_hooks
|
||||
)
|
||||
|
||||
# 특정 훅 타입 지우기
|
||||
count = clear_before_llm_call_hooks()
|
||||
print(f"{count}개의 전(before) 훅이 지워졌습니다")
|
||||
|
||||
# 모든 LLM 훅 지우기
|
||||
before_count, after_count = clear_all_llm_call_hooks()
|
||||
print(f"{before_count}개의 전(before) 훅과 {after_count}개의 후(after) 훅이 지워졌습니다")
|
||||
```
|
||||
|
||||
## 고급 패턴
|
||||
|
||||
### 조건부 훅 실행
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def conditional_blocking(context: LLMCallHookContext) -> bool | None:
|
||||
"""특정 조건에서만 차단합니다."""
|
||||
# 특정 에이전트에 대해서만 차단
|
||||
if context.agent.role == "researcher" and context.iterations > 10:
|
||||
return False
|
||||
|
||||
# 특정 작업에 대해서만 차단
|
||||
if "민감한" in context.task.description.lower() and context.iterations > 5:
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 컨텍스트 인식 수정
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def adaptive_prompting(context: LLMCallHookContext) -> None:
|
||||
"""반복에 따라 다른 컨텍스트를 추가합니다."""
|
||||
if context.iterations == 0:
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "높은 수준의 개요부터 시작하세요."
|
||||
})
|
||||
elif context.iterations > 3:
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "구체적인 세부사항에 집중하고 예제를 제공하세요."
|
||||
})
|
||||
return None
|
||||
```
|
||||
|
||||
### 훅 체이닝
|
||||
|
||||
```python
|
||||
# 여러 훅은 등록 순서대로 실행됩니다
|
||||
|
||||
@before_llm_call
|
||||
def first_hook(context):
|
||||
print("1. 첫 번째 훅 실행됨")
|
||||
return None
|
||||
|
||||
@before_llm_call
|
||||
def second_hook(context):
|
||||
print("2. 두 번째 훅 실행됨")
|
||||
return None
|
||||
|
||||
@before_llm_call
|
||||
def blocking_hook(context):
|
||||
if context.iterations > 10:
|
||||
print("3. 차단 훅 - 실행 중지")
|
||||
return False # 후속 훅은 실행되지 않습니다
|
||||
print("3. 차단 훅 - 실행 허용")
|
||||
return None
|
||||
```
|
||||
|
||||
## 모범 사례
|
||||
|
||||
1. **훅을 집중적으로 유지**: 각 훅은 단일 책임을 가져야 합니다
|
||||
2. **무거운 계산 피하기**: 훅은 모든 LLM 호출마다 실행됩니다
|
||||
3. **오류를 우아하게 처리**: try-except를 사용하여 훅 실패로 인한 실행 중단 방지
|
||||
4. **타입 힌트 사용**: 더 나은 IDE 지원을 위해 `LLMCallHookContext` 활용
|
||||
5. **훅 동작 문서화**: 특히 차단 조건에 대해
|
||||
6. **훅을 독립적으로 테스트**: 프로덕션에서 사용하기 전에 단위 테스트
|
||||
7. **테스트에서 훅 지우기**: 테스트 실행 간 `clear_all_llm_call_hooks()` 사용
|
||||
8. **제자리에서 수정**: 항상 `context.messages`를 제자리에서 수정하고 교체하지 마세요
|
||||
|
||||
## 오류 처리
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def safe_hook(context: LLMCallHookContext) -> bool | None:
|
||||
try:
|
||||
# 훅 로직
|
||||
if some_condition:
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ 훅 오류: {e}")
|
||||
# 결정: 오류 발생 시 허용 또는 차단
|
||||
return None # 오류에도 불구하고 실행 허용
|
||||
```
|
||||
|
||||
## 타입 안전성
|
||||
|
||||
```python
|
||||
from crewai.hooks import LLMCallHookContext, BeforeLLMCallHookType, AfterLLMCallHookType
|
||||
|
||||
# 명시적 타입 주석
|
||||
def my_before_hook(context: LLMCallHookContext) -> bool | None:
|
||||
return None
|
||||
|
||||
def my_after_hook(context: LLMCallHookContext) -> str | None:
|
||||
return None
|
||||
|
||||
# 타입 안전 등록
|
||||
register_before_llm_call_hook(my_before_hook)
|
||||
register_after_llm_call_hook(my_after_hook)
|
||||
```
|
||||
|
||||
## 문제 해결
|
||||
|
||||
### 훅이 실행되지 않음
|
||||
- 크루 실행 전에 훅이 등록되었는지 확인
|
||||
- 이전 훅이 `False`를 반환했는지 확인 (후속 훅 차단)
|
||||
- 훅 시그니처가 예상 타입과 일치하는지 확인
|
||||
|
||||
### 메시지 수정이 지속되지 않음
|
||||
- 제자리 수정 사용: `context.messages.append()`
|
||||
- 리스트를 교체하지 마세요: `context.messages = []`
|
||||
|
||||
### 응답 수정이 작동하지 않음
|
||||
- 후 훅에서 수정된 문자열을 반환
|
||||
- `None`을 반환하면 원본 응답이 유지됩니다
|
||||
|
||||
## 결론
|
||||
|
||||
LLM 호출 훅은 CrewAI에서 언어 모델 상호작용을 제어하고 모니터링하는 강력한 기능을 제공합니다. 이를 사용하여 안전 가드레일, 승인 게이트, 로깅, 비용 추적 및 응답 정제를 구현하세요. 적절한 오류 처리 및 타입 안전성과 결합하면, 훅을 통해 강력하고 프로덕션 준비가 된 에이전트 시스템을 구축할 수 있습니다.
|
||||
|
||||
498
docs/ko/learn/tool-hooks.mdx
Normal file
498
docs/ko/learn/tool-hooks.mdx
Normal file
@@ -0,0 +1,498 @@
|
||||
---
|
||||
title: 도구 호출 훅
|
||||
description: CrewAI에서 도구 실행을 가로채고, 수정하고, 제어하는 도구 호출 훅 사용 방법 배우기
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
도구 호출 훅(Tool Call Hooks)은 에이전트 작업 중 도구 실행에 대한 세밀한 제어를 제공합니다. 이러한 훅을 사용하면 도구 호출을 가로채고, 입력을 수정하고, 출력을 변환하고, 안전 검사를 구현하고, 포괄적인 로깅 또는 모니터링을 추가할 수 있습니다.
|
||||
|
||||
## 개요
|
||||
|
||||
도구 훅은 두 가지 중요한 시점에 실행됩니다:
|
||||
- **도구 호출 전**: 입력 수정, 매개변수 검증 또는 실행 차단
|
||||
- **도구 호출 후**: 결과 변환, 출력 정제 또는 실행 세부사항 로깅
|
||||
|
||||
## 훅 타입
|
||||
|
||||
### 도구 호출 전 훅
|
||||
|
||||
모든 도구 실행 전에 실행되며, 다음을 수행할 수 있습니다:
|
||||
- 도구 입력 검사 및 수정
|
||||
- 조건에 따라 도구 실행 차단
|
||||
- 위험한 작업에 대한 승인 게이트 구현
|
||||
- 매개변수 검증
|
||||
- 도구 호출 로깅
|
||||
|
||||
**시그니처:**
|
||||
```python
|
||||
def before_hook(context: ToolCallHookContext) -> bool | None:
|
||||
# 실행을 차단하려면 False 반환
|
||||
# 실행을 허용하려면 True 또는 None 반환
|
||||
...
|
||||
```
|
||||
|
||||
### 도구 호출 후 훅
|
||||
|
||||
모든 도구 실행 후에 실행되며, 다음을 수행할 수 있습니다:
|
||||
- 도구 결과 수정 또는 정제
|
||||
- 메타데이터 또는 서식 추가
|
||||
- 실행 결과 로깅
|
||||
- 결과 검증 구현
|
||||
- 출력 형식 변환
|
||||
|
||||
**시그니처:**
|
||||
```python
|
||||
def after_hook(context: ToolCallHookContext) -> str | None:
|
||||
# 수정된 결과 문자열 반환
|
||||
# 원본 결과를 유지하려면 None 반환
|
||||
...
|
||||
```
|
||||
|
||||
## 도구 훅 컨텍스트
|
||||
|
||||
`ToolCallHookContext` 객체는 도구 실행 상태에 대한 포괄적인 액세스를 제공합니다:
|
||||
|
||||
```python
|
||||
class ToolCallHookContext:
|
||||
tool_name: str # 호출되는 도구의 이름
|
||||
tool_input: dict[str, Any] # 변경 가능한 도구 입력 매개변수
|
||||
tool: CrewStructuredTool # 도구 인스턴스 참조
|
||||
agent: Agent | BaseAgent | None # 도구를 실행하는 에이전트
|
||||
task: Task | None # 현재 작업
|
||||
crew: Crew | None # 크루 인스턴스
|
||||
tool_result: str | None # 도구 결과 (후 훅용)
|
||||
```
|
||||
|
||||
### 도구 입력 수정
|
||||
|
||||
**중요:** 항상 도구 입력을 제자리에서 수정하세요:
|
||||
|
||||
```python
|
||||
# ✅ 올바름 - 제자리에서 수정
|
||||
def sanitize_input(context: ToolCallHookContext) -> None:
|
||||
context.tool_input['query'] = context.tool_input['query'].lower()
|
||||
|
||||
# ❌ 잘못됨 - 딕셔너리 참조를 교체
|
||||
def wrong_approach(context: ToolCallHookContext) -> None:
|
||||
context.tool_input = {'query': 'new query'}
|
||||
```
|
||||
|
||||
## 등록 방법
|
||||
|
||||
### 1. 데코레이터 기반 등록 (권장)
|
||||
|
||||
더 깔끔한 구문을 위해 데코레이터를 사용합니다:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_tool_call, after_tool_call
|
||||
|
||||
@before_tool_call
|
||||
def block_dangerous_tools(context):
|
||||
"""위험한 도구를 차단합니다."""
|
||||
dangerous_tools = ['delete_database', 'drop_table', 'rm_rf']
|
||||
if context.tool_name in dangerous_tools:
|
||||
print(f"⛔ 위험한 도구 차단됨: {context.tool_name}")
|
||||
return False # 실행 차단
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def sanitize_results(context):
|
||||
"""결과를 정제합니다."""
|
||||
if context.tool_result and "password" in context.tool_result.lower():
|
||||
return context.tool_result.replace("password", "[수정됨]")
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. 크루 범위 훅
|
||||
|
||||
특정 크루 인스턴스에 대한 훅을 등록합니다:
|
||||
|
||||
```python
|
||||
from crewai import CrewBase
|
||||
from crewai.project import crew
|
||||
from crewai.hooks import before_tool_call_crew, after_tool_call_crew
|
||||
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_tool_call_crew
|
||||
def validate_tool_inputs(self, context):
|
||||
# 이 크루에만 적용됩니다
|
||||
if context.tool_name == "web_search":
|
||||
if not context.tool_input.get('query'):
|
||||
print("❌ 잘못된 검색 쿼리")
|
||||
return False
|
||||
return None
|
||||
|
||||
@after_tool_call_crew
|
||||
def log_tool_results(self, context):
|
||||
# 크루별 도구 로깅
|
||||
print(f"✅ {context.tool_name} 완료됨")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
```
|
||||
|
||||
## 일반적인 사용 사례
|
||||
|
||||
### 1. 안전 가드레일
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safety_check(context: ToolCallHookContext) -> bool | None:
|
||||
"""해를 끼칠 수 있는 도구를 차단합니다."""
|
||||
destructive_tools = [
|
||||
'delete_file',
|
||||
'drop_table',
|
||||
'remove_user',
|
||||
'system_shutdown'
|
||||
]
|
||||
|
||||
if context.tool_name in destructive_tools:
|
||||
print(f"🛑 파괴적인 도구 차단됨: {context.tool_name}")
|
||||
return False
|
||||
|
||||
# 민감한 작업에 대해 경고
|
||||
sensitive_tools = ['send_email', 'post_to_social_media', 'charge_payment']
|
||||
if context.tool_name in sensitive_tools:
|
||||
print(f"⚠️ 민감한 도구 실행 중: {context.tool_name}")
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. 사람의 승인 게이트
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def require_approval_for_actions(context: ToolCallHookContext) -> bool | None:
|
||||
"""특정 작업에 대한 승인을 요구합니다."""
|
||||
approval_required = [
|
||||
'send_email',
|
||||
'make_purchase',
|
||||
'delete_file',
|
||||
'post_message'
|
||||
]
|
||||
|
||||
if context.tool_name in approval_required:
|
||||
response = context.request_human_input(
|
||||
prompt=f"{context.tool_name}을(를) 승인하시겠습니까?",
|
||||
default_message=f"입력: {context.tool_input}\n승인하려면 'yes'를 입력하세요:"
|
||||
)
|
||||
|
||||
if response.lower() != 'yes':
|
||||
print(f"❌ 도구 실행 거부됨: {context.tool_name}")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. 입력 검증 및 정제
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def validate_and_sanitize_inputs(context: ToolCallHookContext) -> bool | None:
|
||||
"""입력을 검증하고 정제합니다."""
|
||||
# 검색 쿼리 검증
|
||||
if context.tool_name == 'web_search':
|
||||
query = context.tool_input.get('query', '')
|
||||
if len(query) < 3:
|
||||
print("❌ 검색 쿼리가 너무 짧습니다")
|
||||
return False
|
||||
|
||||
# 쿼리 정제
|
||||
context.tool_input['query'] = query.strip().lower()
|
||||
|
||||
# 파일 경로 검증
|
||||
if context.tool_name == 'read_file':
|
||||
path = context.tool_input.get('path', '')
|
||||
if '..' in path or path.startswith('/'):
|
||||
print("❌ 잘못된 파일 경로")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 4. 결과 정제
|
||||
|
||||
```python
|
||||
@after_tool_call
|
||||
def sanitize_sensitive_data(context: ToolCallHookContext) -> str | None:
|
||||
"""민감한 데이터를 정제합니다."""
|
||||
if not context.tool_result:
|
||||
return None
|
||||
|
||||
import re
|
||||
result = context.tool_result
|
||||
|
||||
# API 키 제거
|
||||
result = re.sub(
|
||||
r'(api[_-]?key|token)["\']?\s*[:=]\s*["\']?[\w-]+',
|
||||
r'\1: [수정됨]',
|
||||
result,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# 이메일 주소 제거
|
||||
result = re.sub(
|
||||
r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
|
||||
'[이메일-수정됨]',
|
||||
result
|
||||
)
|
||||
|
||||
# 신용카드 번호 제거
|
||||
result = re.sub(
|
||||
r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b',
|
||||
'[카드-수정됨]',
|
||||
result
|
||||
)
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### 5. 도구 사용 분석
|
||||
|
||||
```python
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
tool_stats = defaultdict(lambda: {'count': 0, 'total_time': 0, 'failures': 0})
|
||||
|
||||
@before_tool_call
|
||||
def start_timer(context: ToolCallHookContext) -> None:
|
||||
context.tool_input['_start_time'] = time.time()
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def track_tool_usage(context: ToolCallHookContext) -> None:
|
||||
start_time = context.tool_input.get('_start_time', time.time())
|
||||
duration = time.time() - start_time
|
||||
|
||||
tool_stats[context.tool_name]['count'] += 1
|
||||
tool_stats[context.tool_name]['total_time'] += duration
|
||||
|
||||
if not context.tool_result or 'error' in context.tool_result.lower():
|
||||
tool_stats[context.tool_name]['failures'] += 1
|
||||
|
||||
print(f"""
|
||||
📊 {context.tool_name} 도구 통계:
|
||||
- 실행 횟수: {tool_stats[context.tool_name]['count']}
|
||||
- 평균 시간: {tool_stats[context.tool_name]['total_time'] / tool_stats[context.tool_name]['count']:.2f}초
|
||||
- 실패: {tool_stats[context.tool_name]['failures']}
|
||||
""")
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 6. 속도 제한
|
||||
|
||||
```python
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
tool_call_history = defaultdict(list)
|
||||
|
||||
@before_tool_call
|
||||
def rate_limit_tools(context: ToolCallHookContext) -> bool | None:
|
||||
"""도구 호출 속도를 제한합니다."""
|
||||
tool_name = context.tool_name
|
||||
now = datetime.now()
|
||||
|
||||
# 오래된 항목 정리 (1분 이상 된 것)
|
||||
tool_call_history[tool_name] = [
|
||||
call_time for call_time in tool_call_history[tool_name]
|
||||
if now - call_time < timedelta(minutes=1)
|
||||
]
|
||||
|
||||
# 속도 제한 확인 (분당 최대 10회 호출)
|
||||
if len(tool_call_history[tool_name]) >= 10:
|
||||
print(f"🚫 {tool_name}에 대한 속도 제한 초과")
|
||||
return False
|
||||
|
||||
# 이 호출 기록
|
||||
tool_call_history[tool_name].append(now)
|
||||
return None
|
||||
```
|
||||
|
||||
### 7. 디버그 로깅
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def debug_tool_call(context: ToolCallHookContext) -> None:
|
||||
"""도구 호출을 디버그합니다."""
|
||||
print(f"""
|
||||
🔍 도구 호출 디버그:
|
||||
- 도구: {context.tool_name}
|
||||
- 에이전트: {context.agent.role if context.agent else '알 수 없음'}
|
||||
- 작업: {context.task.description[:50] if context.task else '알 수 없음'}...
|
||||
- 입력: {context.tool_input}
|
||||
""")
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def debug_tool_result(context: ToolCallHookContext) -> None:
|
||||
"""도구 결과를 디버그합니다."""
|
||||
if context.tool_result:
|
||||
result_preview = context.tool_result[:200]
|
||||
print(f"✅ 결과 미리보기: {result_preview}...")
|
||||
else:
|
||||
print("⚠️ 반환된 결과 없음")
|
||||
return None
|
||||
```
|
||||
|
||||
## 훅 관리
|
||||
|
||||
### 훅 등록 해제
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
unregister_before_tool_call_hook,
|
||||
unregister_after_tool_call_hook
|
||||
)
|
||||
|
||||
# 특정 훅 등록 해제
|
||||
def my_hook(context):
|
||||
...
|
||||
|
||||
register_before_tool_call_hook(my_hook)
|
||||
# 나중에...
|
||||
success = unregister_before_tool_call_hook(my_hook)
|
||||
print(f"등록 해제됨: {success}")
|
||||
```
|
||||
|
||||
### 훅 지우기
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_tool_call_hooks,
|
||||
clear_after_tool_call_hooks,
|
||||
clear_all_tool_call_hooks
|
||||
)
|
||||
|
||||
# 특정 훅 타입 지우기
|
||||
count = clear_before_tool_call_hooks()
|
||||
print(f"{count}개의 전(before) 훅이 지워졌습니다")
|
||||
|
||||
# 모든 도구 훅 지우기
|
||||
before_count, after_count = clear_all_tool_call_hooks()
|
||||
print(f"{before_count}개의 전(before) 훅과 {after_count}개의 후(after) 훅이 지워졌습니다")
|
||||
```
|
||||
|
||||
## 고급 패턴
|
||||
|
||||
### 조건부 훅 실행
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def conditional_blocking(context: ToolCallHookContext) -> bool | None:
|
||||
"""특정 조건에서만 차단합니다."""
|
||||
# 특정 에이전트에 대해서만 차단
|
||||
if context.agent and context.agent.role == "junior_agent":
|
||||
if context.tool_name in ['delete_file', 'send_email']:
|
||||
print(f"❌ 주니어 에이전트는 {context.tool_name}을(를) 사용할 수 없습니다")
|
||||
return False
|
||||
|
||||
# 특정 작업 중에만 차단
|
||||
if context.task and "민감한" in context.task.description.lower():
|
||||
if context.tool_name == 'web_search':
|
||||
print("❌ 민감한 작업에서는 웹 검색이 차단됩니다")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 컨텍스트 인식 입력 수정
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def enhance_tool_inputs(context: ToolCallHookContext) -> None:
|
||||
"""에이전트 역할에 따라 컨텍스트를 추가합니다."""
|
||||
# 에이전트 역할에 따라 컨텍스트 추가
|
||||
if context.agent and context.agent.role == "researcher":
|
||||
if context.tool_name == 'web_search':
|
||||
# 연구원에 대한 도메인 제한 추가
|
||||
context.tool_input['domains'] = ['edu', 'gov', 'org']
|
||||
|
||||
# 작업에 따라 컨텍스트 추가
|
||||
if context.task and "긴급" in context.task.description.lower():
|
||||
if context.tool_name == 'send_email':
|
||||
context.tool_input['priority'] = 'high'
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
## 모범 사례
|
||||
|
||||
1. **훅을 집중적으로 유지**: 각 훅은 단일 책임을 가져야 합니다
|
||||
2. **무거운 계산 피하기**: 훅은 모든 도구 호출마다 실행됩니다
|
||||
3. **오류를 우아하게 처리**: try-except를 사용하여 훅 실패 방지
|
||||
4. **타입 힌트 사용**: 더 나은 IDE 지원을 위해 `ToolCallHookContext` 활용
|
||||
5. **차단 조건 문서화**: 도구가 차단되는 시기/이유를 명확히 하세요
|
||||
6. **훅을 독립적으로 테스트**: 프로덕션에서 사용하기 전에 단위 테스트
|
||||
7. **테스트에서 훅 지우기**: 테스트 실행 간 `clear_all_tool_call_hooks()` 사용
|
||||
8. **제자리에서 수정**: 항상 `context.tool_input`을 제자리에서 수정하고 교체하지 마세요
|
||||
9. **중요한 결정 로깅**: 특히 도구 실행을 차단할 때
|
||||
10. **성능 고려**: 가능한 경우 비용이 많이 드는 검증을 캐시
|
||||
|
||||
## 오류 처리
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safe_validation(context: ToolCallHookContext) -> bool | None:
|
||||
try:
|
||||
# 검증 로직
|
||||
if not validate_input(context.tool_input):
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ 훅 오류: {e}")
|
||||
# 결정: 오류 발생 시 허용 또는 차단
|
||||
return None # 오류에도 불구하고 실행 허용
|
||||
```
|
||||
|
||||
## 타입 안전성
|
||||
|
||||
```python
|
||||
from crewai.hooks import ToolCallHookContext, BeforeToolCallHookType, AfterToolCallHookType
|
||||
|
||||
# 명시적 타입 주석
|
||||
def my_before_hook(context: ToolCallHookContext) -> bool | None:
|
||||
return None
|
||||
|
||||
def my_after_hook(context: ToolCallHookContext) -> str | None:
|
||||
return None
|
||||
|
||||
# 타입 안전 등록
|
||||
register_before_tool_call_hook(my_before_hook)
|
||||
register_after_tool_call_hook(my_after_hook)
|
||||
```
|
||||
|
||||
## 문제 해결
|
||||
|
||||
### 훅이 실행되지 않음
|
||||
- 크루 실행 전에 훅이 등록되었는지 확인
|
||||
- 이전 훅이 `False`를 반환했는지 확인 (실행 및 후속 훅 차단)
|
||||
- 훅 시그니처가 예상 타입과 일치하는지 확인
|
||||
|
||||
### 입력 수정이 작동하지 않음
|
||||
- 제자리 수정 사용: `context.tool_input['key'] = value`
|
||||
- 딕셔너리를 교체하지 마세요: `context.tool_input = {}`
|
||||
|
||||
### 결과 수정이 작동하지 않음
|
||||
- 후 훅에서 수정된 문자열을 반환
|
||||
- `None`을 반환하면 원본 결과가 유지됩니다
|
||||
- 도구가 실제로 결과를 반환했는지 확인
|
||||
|
||||
### 도구가 예기치 않게 차단됨
|
||||
- 차단 조건에 대한 모든 전(before) 훅 확인
|
||||
- 훅 실행 순서 확인
|
||||
- 어떤 훅이 차단하는지 식별하기 위해 디버그 로깅 추가
|
||||
|
||||
## 결론
|
||||
|
||||
도구 호출 훅은 CrewAI에서 도구 실행을 제어하고 모니터링하는 강력한 기능을 제공합니다. 이를 사용하여 안전 가드레일, 승인 게이트, 입력 검증, 결과 정제, 로깅 및 분석을 구현하세요. 적절한 오류 처리 및 타입 안전성과 결합하면, 훅을 통해 포괄적인 관찰성을 갖춘 안전하고 프로덕션 준비가 된 에이전트 시스템을 구축할 수 있습니다.
|
||||
|
||||
@@ -93,11 +93,15 @@ ddtrace-run python crewai_agent.py
|
||||
|
||||
트레이스를 클릭하면 사용된 총 토큰, LLM 호출 수, 사용된 모델, 예상 비용 등 트레이스에 대한 세부 정보가 표시됩니다. 특정 스팬(span)을 클릭하면 이러한 세부 정보의 범위가 좁혀지고 관련 입력, 출력 및 메타데이터가 표시됩니다.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-1.png" alt="Datadog LLM 옵저버빌리티 추적 보기" />
|
||||
</Frame>
|
||||
|
||||
또한, 트레이스의 제어 및 데이터 흐름을 보여주는 트레이스의 실행 그래프 보기를 볼 수 있으며, 이는 더 큰 에이전트로 확장하여 LLM 호출, 도구 호출 및 에이전트 상호 작용 간의 핸드오프와 관계를 보여줍니다.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-2.png" alt="Datadog LLM Observability 에이전트 실행 흐름 보기" />
|
||||
</Frame>
|
||||
|
||||
## 참조
|
||||
|
||||
|
||||
@@ -730,9 +730,7 @@ Portkey 대시보드에서 [구성 페이지](https://app.portkey.ai/configs)에
|
||||
- 로그를 필터링하기 위한 관련 메타데이터 수집
|
||||
- 액세스 권한 적용
|
||||
|
||||
API 키 생성 방법:
|
||||
- [Portkey App](https://app.portkey.ai/)
|
||||
- [API Key Management API](/ko/api-reference/admin-api/control-plane/api-keys/create-api-key)
|
||||
[Portkey App](https://app.portkey.ai/)를 통해 API 키를 생성하세요
|
||||
|
||||
Python SDK를 사용한 예시:
|
||||
```python
|
||||
@@ -755,7 +753,7 @@ api_key = portkey.api_keys.create(
|
||||
)
|
||||
```
|
||||
|
||||
자세한 키 관리 방법은 [API 키 문서](/ko/api-reference/admin-api/control-plane/api-keys/create-api-key)를 참조하세요.
|
||||
자세한 키 관리 방법은 [Portkey 문서](https://portkey.ai/docs)를 참조하세요.
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="4단계: 배포 및 모니터링">
|
||||
|
||||
@@ -18,7 +18,7 @@ mode: "wide"
|
||||
파일을 Amazon S3 스토리지에 작성하고 업로드합니다.
|
||||
</Card>
|
||||
|
||||
<Card title="Bedrock Invoke Agent" icon="aws" href="/ko/tools/cloud-storage/bedrockinvokeagenttool">
|
||||
<Card title="Bedrock Invoke Agent" icon="aws" href="/ko/tools/integration/bedrockinvokeagenttool">
|
||||
AI 기반 작업을 위해 Amazon Bedrock 에이전트를 호출합니다.
|
||||
</Card>
|
||||
|
||||
|
||||
@@ -23,13 +23,15 @@ uv add qdrant-client
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import QdrantVectorSearchTool
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Initialize the tool
|
||||
# QdrantConfig로 도구 초기화
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
)
|
||||
)
|
||||
|
||||
# Create an agent that uses the tool
|
||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
||||
def get_openai_embedding(text):
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model="text-embedding-3-small"
|
||||
model="text-embedding-3-large"
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||
# Extract text from PDF
|
||||
text_chunks = extract_text_from_pdf(pdf_path)
|
||||
|
||||
|
||||
# Create Qdrant collection
|
||||
if qdrant.collection_exists(collection_name):
|
||||
qdrant.delete_collection(collection_name)
|
||||
qdrant.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
||||
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||
)
|
||||
|
||||
# Store embeddings
|
||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||
|
||||
# Initialize Qdrant search tool
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
)
|
||||
)
|
||||
|
||||
# Create CrewAI agents
|
||||
search_agent = Agent(
|
||||
role="Senior Semantic Search Agent",
|
||||
goal="Find and analyze documents based on semantic search",
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
information using semantic search in a Qdrant database.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
||||
answer_agent = Agent(
|
||||
role="Senior Answer Assistant",
|
||||
goal="Generate answers to questions based on the context provided",
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
answers to questions based on the context provided.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -180,21 +186,82 @@ print(result)
|
||||
## 도구 매개변수
|
||||
|
||||
### 필수 파라미터
|
||||
- `qdrant_url` (str): Qdrant 서버의 URL
|
||||
- `qdrant_api_key` (str): Qdrant 인증을 위한 API 키
|
||||
- `collection_name` (str): 검색할 Qdrant 컬렉션의 이름
|
||||
- `qdrant_config` (QdrantConfig): 모든 Qdrant 설정을 포함하는 구성 객체
|
||||
|
||||
### 선택적 매개변수
|
||||
### QdrantConfig 매개변수
|
||||
- `qdrant_url` (str): Qdrant 서버의 URL
|
||||
- `qdrant_api_key` (str, 선택 사항): Qdrant 인증을 위한 API 키
|
||||
- `collection_name` (str): 검색할 Qdrant 컬렉션의 이름
|
||||
- `limit` (int): 반환할 최대 결과 수 (기본값: 3)
|
||||
- `score_threshold` (float): 최소 유사도 점수 임계값 (기본값: 0.35)
|
||||
- `filter` (Any, 선택 사항): 고급 필터링을 위한 Qdrant Filter 인스턴스 (기본값: None)
|
||||
|
||||
### 선택적 도구 매개변수
|
||||
- `custom_embedding_fn` (Callable[[str], list[float]]): 텍스트 벡터화를 위한 사용자 지정 함수
|
||||
- `qdrant_package` (str): Qdrant의 기본 패키지 경로 (기본값: "qdrant_client")
|
||||
- `client` (Any): 사전 초기화된 Qdrant 클라이언트 (선택 사항)
|
||||
|
||||
## 고급 필터링
|
||||
|
||||
QdrantVectorSearchTool은 검색 결과를 세밀하게 조정할 수 있는 강력한 필터링 기능을 지원합니다:
|
||||
|
||||
### 동적 필터링
|
||||
검색 시 `filter_by` 및 `filter_value` 매개변수를 사용하여 즉석에서 결과를 필터링할 수 있습니다:
|
||||
|
||||
```python
|
||||
# 에이전트는 도구를 호출할 때 이러한 매개변수를 사용합니다
|
||||
# 도구 스키마는 filter_by 및 filter_value를 허용합니다
|
||||
# 예시: 카테고리 필터를 사용한 검색
|
||||
# 결과는 category == "기술"인 항목으로 필터링됩니다
|
||||
```
|
||||
|
||||
### QdrantConfig를 사용한 사전 설정 필터
|
||||
복잡한 필터링의 경우 구성에서 Qdrant Filter 인스턴스를 사용하세요:
|
||||
|
||||
```python
|
||||
from qdrant_client.http import models as qmodels
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# 특정 조건에 대한 필터 생성
|
||||
preset_filter = qmodels.Filter(
|
||||
must=[
|
||||
qmodels.FieldCondition(
|
||||
key="category",
|
||||
match=qmodels.MatchValue(value="research")
|
||||
),
|
||||
qmodels.FieldCondition(
|
||||
key="year",
|
||||
match=qmodels.MatchValue(value=2024)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# 사전 설정 필터로 도구 초기화
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
filter=preset_filter # 모든 검색에 적용되는 사전 설정 필터
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### 필터 결합
|
||||
도구는 `QdrantConfig`의 사전 설정 필터와 `filter_by` 및 `filter_value`의 동적 필터를 자동으로 결합합니다:
|
||||
|
||||
```python
|
||||
# QdrantConfig에 category="research"에 대한 사전 설정 필터가 있고
|
||||
# 검색에서 filter_by="year", filter_value=2024를 사용하는 경우
|
||||
# 두 필터가 모두 결합됩니다 (AND 논리)
|
||||
```
|
||||
|
||||
## 검색 매개변수
|
||||
|
||||
이 도구는 스키마에서 다음과 같은 매개변수를 허용합니다:
|
||||
- `query` (str): 유사한 문서를 찾기 위한 검색 쿼리
|
||||
- `filter_by` (str, 선택 사항): 필터링할 메타데이터 필드
|
||||
- `filter_value` (str, 선택 사항): 필터 기준 값
|
||||
- `filter_value` (Any, 선택 사항): 필터 기준 값
|
||||
|
||||
## 반환 형식
|
||||
|
||||
@@ -214,7 +281,7 @@ print(result)
|
||||
|
||||
## 기본 임베딩
|
||||
|
||||
기본적으로, 이 도구는 벡터화를 위해 OpenAI의 `text-embedding-3-small` 모델을 사용합니다. 이를 위해서는 다음이 필요합니다:
|
||||
기본적으로, 이 도구는 벡터화를 위해 OpenAI의 `text-embedding-3-large` 모델을 사용합니다. 이를 위해서는 다음이 필요합니다:
|
||||
- 환경변수에 설정된 OpenAI API 키: `OPENAI_API_KEY`
|
||||
|
||||
## 커스텀 임베딩
|
||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
||||
# Tokenize and get model outputs
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||
outputs = model(**inputs)
|
||||
|
||||
|
||||
# Use mean pooling to get text embedding
|
||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||
|
||||
|
||||
# Convert to list of floats and return
|
||||
return embeddings[0].tolist()
|
||||
|
||||
# Use custom embeddings with the tool
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection"
|
||||
),
|
||||
custom_embedding_fn=custom_embeddings # Pass your custom function
|
||||
)
|
||||
```
|
||||
@@ -270,4 +341,4 @@ tool = QdrantVectorSearchTool(
|
||||
export QDRANT_URL="your_qdrant_url" # If not provided in constructor
|
||||
export QDRANT_API_KEY="your_api_key" # If not provided in constructor
|
||||
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
||||
```
|
||||
```
|
||||
|
||||
@@ -54,25 +54,25 @@ tool = CSVSearchTool()
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 사용자 지정하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = CSVSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -46,23 +46,25 @@ tool = DirectorySearchTool(directory='/path/to/directory')
|
||||
DirectorySearchTool은 기본적으로 OpenAI를 사용하여 임베딩 및 요약을 수행합니다. 이 설정의 커스터마이즈 옵션에는 모델 공급자 및 구성을 변경하는 것이 포함되어 있어, 고급 사용자를 위한 유연성을 향상시킵니다.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DirectorySearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # Options include ollama, google, anthropic, llama2, and more
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Additional configurations here
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -56,25 +56,25 @@ tool = DOCXSearchTool(docx='path/to/your/document.docx')
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 커스터마이즈하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DOCXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -48,27 +48,25 @@ tool = MDXSearchTool(mdx='path/to/your/document.mdx')
|
||||
이 도구는 기본적으로 임베딩과 요약을 위해 OpenAI를 사용합니다. 커스터마이징을 위해 아래와 같이 설정 딕셔너리를 사용할 수 있습니다.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = MDXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # 옵션에는 google, openai, anthropic, llama2 등이 있습니다.
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# 선택적 파라미터를 여기에 포함할 수 있습니다.
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # 또는 openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# 임베딩에 대한 선택적 제목을 여기에 추가할 수 있습니다.
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -45,28 +45,60 @@ tool = PDFSearchTool(pdf='path/to/your/document.pdf')
|
||||
|
||||
## 커스텀 모델 및 임베딩
|
||||
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 커스터마이즈하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다:
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 커스터마이즈하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다. 참고: 임베딩은 벡터DB에 저장되어야 하므로 vectordb 설정이 필요합니다.
|
||||
|
||||
```python Code
|
||||
from crewai_tools import PDFSearchTool
|
||||
from chromadb.config import Settings # Chroma 영속성 설정
|
||||
|
||||
tool = PDFSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# 필수: 임베딩 제공자와 설정
|
||||
"embedding_model": {
|
||||
# 사용 가능 공급자: "openai", "azure", "google-generativeai", "google-vertex",
|
||||
# "voyageai", "cohere", "huggingface", "jina", "sentence-transformer",
|
||||
# "text2vec", "ollama", "openclip", "instructor", "onnx", "roboflow", "watsonx", "custom"
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
# "model" 키는 내부적으로 "model_name"으로 매핑됩니다.
|
||||
"model": "text-embedding-3-small",
|
||||
# 선택: API 키 (미설정 시 환경변수 사용)
|
||||
# "api_key": "sk-...",
|
||||
|
||||
# 공급자별 예시
|
||||
# --- Google ---
|
||||
# (provider를 "google-generativeai"로 설정)
|
||||
# "model": "models/embedding-001",
|
||||
# "task_type": "retrieval_document",
|
||||
|
||||
# --- Cohere ---
|
||||
# (provider를 "cohere"로 설정)
|
||||
# "model": "embed-english-v3.0",
|
||||
|
||||
# --- Ollama(로컬) ---
|
||||
# (provider를 "ollama"로 설정)
|
||||
# "model": "nomic-embed-text",
|
||||
},
|
||||
},
|
||||
|
||||
# 필수: 벡터DB 설정
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# Chroma 설정 예시
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Qdrant 설정 예시
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# 참고: 컬렉션 이름은 도구에서 관리합니다(기본값: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -57,25 +57,34 @@ tool = TXTSearchTool(txt='path/to/text/file.txt')
|
||||
모델을 커스터마이징하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = TXTSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# 필수: 임베딩 제공자 + 설정
|
||||
"embedding_model": {
|
||||
"provider": "openai", # 또는 google-generativeai, cohere, ollama 등
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...", # 환경변수 사용 시 생략 가능
|
||||
# 공급자별 예시: Google → model: "models/embedding-001", task_type: "retrieval_document"
|
||||
},
|
||||
},
|
||||
|
||||
# 필수: 벡터DB 설정
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# Chroma 설정(영속성 예시)
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
|
||||
# Qdrant 벡터 파라미터 예시:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# 참고: 컬렉션 이름은 도구에서 관리합니다(기본값: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -54,25 +54,25 @@ tool = XMLSearchTool(xml='path/to/your/xmlfile.xml')
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 커스터마이징하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = XMLSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -11,7 +11,7 @@ mode: "wide"
|
||||
<Card
|
||||
title="Bedrock Invoke Agent Tool"
|
||||
icon="cloud"
|
||||
href="/en/tools/tool-integrations/bedrockinvokeagenttool"
|
||||
href="/ko/tools/integration/bedrockinvokeagenttool"
|
||||
color="#0891B2"
|
||||
>
|
||||
Invoke Amazon Bedrock Agents from CrewAI to orchestrate actions across AWS services.
|
||||
@@ -20,7 +20,7 @@ mode: "wide"
|
||||
<Card
|
||||
title="CrewAI Automation Tool"
|
||||
icon="bolt"
|
||||
href="/en/tools/tool-integrations/crewaiautomationtool"
|
||||
href="/ko/tools/integration/crewaiautomationtool"
|
||||
color="#7C3AED"
|
||||
>
|
||||
Automate deployment and operations by integrating CrewAI with external platforms and workflows.
|
||||
|
||||
@@ -704,7 +704,7 @@ class KnowledgeMonitorListener(BaseEventListener):
|
||||
knowledge_monitor = KnowledgeMonitorListener()
|
||||
```
|
||||
|
||||
Para mais informações sobre como usar eventos, consulte a documentação [Event Listeners](https://docs.crewai.com/concepts/event-listener).
|
||||
Para mais informações sobre como usar eventos, consulte a documentação [Event Listeners](/pt-BR/concepts/event-listener).
|
||||
|
||||
### Fontes de Knowledge Personalizadas
|
||||
|
||||
|
||||
@@ -725,7 +725,7 @@ O CrewAI suporta respostas em streaming de LLMs, permitindo que sua aplicação
|
||||
```
|
||||
|
||||
<Tip>
|
||||
[Clique aqui](https://docs.crewai.com/concepts/event-listener#event-listeners) para mais detalhes
|
||||
[Clique aqui](/pt-BR/concepts/event-listener#event-listeners) para mais detalhes
|
||||
</Tip>
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
@@ -36,7 +36,7 @@ Você também pode baixar templates diretamente do marketplace clicando em `Down
|
||||
<Card title="Ferramentas & Integrações" href="/pt-BR/enterprise/features/tools-and-integrations" icon="wrench">
|
||||
Conecte apps externos e gerencie ferramentas internas que seus agentes podem usar.
|
||||
</Card>
|
||||
<Card title="Repositório de Ferramentas" href="/pt-BR/enterprise/features/tool-repository" icon="toolbox">
|
||||
<Card title="Repositório de Ferramentas" href="/pt-BR/enterprise/guides/tool-repository" icon="toolbox">
|
||||
Publique e instale ferramentas para ampliar as capacidades dos seus crews.
|
||||
</Card>
|
||||
<Card title="Repositório de Agentes" href="/pt-BR/enterprise/features/agent-repositories" icon="people-group">
|
||||
|
||||
@@ -231,7 +231,7 @@ Ferramentas & Integrações é o hub central para conectar aplicações de terce
|
||||
## Relacionados
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Repositório de Ferramentas" href="/pt-BR/enterprise/features/tool-repository" icon="toolbox">
|
||||
<Card title="Repositório de Ferramentas" href="/pt-BR/enterprise/guides/tool-repository" icon="toolbox">
|
||||
Publique e instale ferramentas para ampliar as capacidades dos seus crews.
|
||||
</Card>
|
||||
<Card title="Automação com Webhook" href="/pt-BR/enterprise/guides/webhook-automation" icon="bolt">
|
||||
|
||||
@@ -21,7 +21,7 @@ O repositório não é um sistema de controle de versões. Use Git para rastrear
|
||||
Antes de usar o Repositório de Ferramentas, certifique-se de que você possui:
|
||||
|
||||
- Uma conta [CrewAI AMP](https://app.crewai.com)
|
||||
- [CrewAI CLI](https://docs.crewai.com/concepts/cli#cli) instalada
|
||||
- [CrewAI CLI](/pt-BR/concepts/cli#cli) instalada
|
||||
- uv>=0.5.0 instalado. Veja [como atualizar](https://docs.astral.sh/uv/getting-started/installation/#upgrading-uv)
|
||||
- [Git](https://git-scm.com) instalado e configurado
|
||||
- Permissões de acesso para publicar ou instalar ferramentas em sua organização CrewAI AMP
|
||||
@@ -66,7 +66,7 @@ Por padrão, as ferramentas são publicadas como privadas. Para tornar uma ferra
|
||||
crewai tool publish --public
|
||||
```
|
||||
|
||||
Para mais detalhes sobre como construir ferramentas, acesse [Criando suas próprias ferramentas](https://docs.crewai.com/concepts/tools#creating-your-own-tools).
|
||||
Para mais detalhes sobre como construir ferramentas, acesse [Criando suas próprias ferramentas](/pt-BR/concepts/tools#creating-your-own-tools).
|
||||
|
||||
## Atualizando ferramentas
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ mode: "wide"
|
||||
|
||||
Para integrar a entrada humana na execução do agente, defina a flag `human_input` na definição da tarefa. Quando habilitada, o agente solicitará a entrada do usuário antes de entregar sua resposta final. Essa entrada pode fornecer contexto extra, esclarecer ambiguidades ou validar a saída do agente.
|
||||
|
||||
Para orientações detalhadas de implementação, veja nosso [guia Human-in-the-Loop](/pt-BR/how-to/human-in-the-loop).
|
||||
Para orientações detalhadas de implementação, veja nosso [guia Human-in-the-Loop](/pt-BR/enterprise/guides/human-in-the-loop).
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Quais opções avançadas de customização estão disponíveis para aprimorar e personalizar o comportamento e as capacidades dos agentes na CrewAI?">
|
||||
@@ -142,7 +142,7 @@ mode: "wide"
|
||||
<Accordion title="Como posso criar ferramentas personalizadas para meus agentes CrewAI?">
|
||||
Você pode criar ferramentas personalizadas herdando da classe `BaseTool` fornecida pela CrewAI ou usando o decorador de ferramenta. Herdar envolve definir uma nova classe que herda de `BaseTool`, especificando o nome, a descrição e o método `_run` para a lógica operacional. O decorador de ferramenta permite criar um objeto `Tool` diretamente com os atributos necessários e uma lógica funcional.
|
||||
|
||||
<Card href="https://docs.crewai.com/how-to/create-custom-tools" icon="code">CrewAI Tools Guide</Card>
|
||||
<Card href="/pt-BR/learn/create-custom-tools" icon="code">CrewAI Tools Guide</Card>
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Como controlar o número máximo de solicitações por minuto que toda a crew pode realizar?">
|
||||
|
||||
379
docs/pt-BR/learn/execution-hooks.mdx
Normal file
379
docs/pt-BR/learn/execution-hooks.mdx
Normal file
@@ -0,0 +1,379 @@
|
||||
---
|
||||
title: Visão Geral dos Hooks de Execução
|
||||
description: Entendendo e usando hooks de execução no CrewAI para controle fino sobre operações de agentes
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
Os Hooks de Execução fornecem controle fino sobre o comportamento em tempo de execução dos seus agentes CrewAI. Diferentemente dos hooks de kickoff que são executados antes e depois da execução da crew, os hooks de execução interceptam operações específicas durante a execução do agente, permitindo que você modifique comportamentos, implemente verificações de segurança e adicione monitoramento abrangente.
|
||||
|
||||
## Tipos de Hooks de Execução
|
||||
|
||||
O CrewAI fornece duas categorias principais de hooks de execução:
|
||||
|
||||
### 1. [Hooks de Chamada LLM](/learn/llm-hooks)
|
||||
|
||||
Controle e monitore interações com o modelo de linguagem:
|
||||
- **Antes da Chamada LLM**: Modifique prompts, valide entradas, implemente gates de aprovação
|
||||
- **Depois da Chamada LLM**: Transforme respostas, sanitize saídas, atualize histórico de conversação
|
||||
|
||||
**Casos de Uso:**
|
||||
- Limitação de iterações
|
||||
- Rastreamento de custos e monitoramento de uso de tokens
|
||||
- Sanitização de respostas e filtragem de conteúdo
|
||||
- Aprovação humana para chamadas LLM
|
||||
- Adição de diretrizes de segurança ou contexto
|
||||
- Logging de debug e inspeção de requisição/resposta
|
||||
|
||||
[Ver Documentação de Hooks LLM →](/learn/llm-hooks)
|
||||
|
||||
### 2. [Hooks de Chamada de Ferramenta](/learn/tool-hooks)
|
||||
|
||||
Controle e monitore execução de ferramentas:
|
||||
- **Antes da Chamada de Ferramenta**: Modifique entradas, valide parâmetros, bloqueie operações perigosas
|
||||
- **Depois da Chamada de Ferramenta**: Transforme resultados, sanitize saídas, registre detalhes de execução
|
||||
|
||||
**Casos de Uso:**
|
||||
- Guardrails de segurança para operações destrutivas
|
||||
- Aprovação humana para ações sensíveis
|
||||
- Validação e sanitização de entrada
|
||||
- Cache de resultados e limitação de taxa
|
||||
- Análise de uso de ferramentas
|
||||
- Logging de debug e monitoramento
|
||||
|
||||
[Ver Documentação de Hooks de Ferramenta →](/learn/tool-hooks)
|
||||
|
||||
## Métodos de Registro
|
||||
|
||||
### 1. Hooks Baseados em Decoradores (Recomendado)
|
||||
|
||||
A maneira mais limpa e pythônica de registrar hooks:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_llm_call, after_llm_call, before_tool_call, after_tool_call
|
||||
|
||||
@before_llm_call
|
||||
def limit_iterations(context):
|
||||
"""Previne loops infinitos limitando iterações."""
|
||||
if context.iterations > 10:
|
||||
return False # Bloquear execução
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def sanitize_response(context):
|
||||
"""Remove dados sensíveis das respostas do LLM."""
|
||||
if "API_KEY" in context.response:
|
||||
return context.response.replace("API_KEY", "[CENSURADO]")
|
||||
return None
|
||||
|
||||
@before_tool_call
|
||||
def block_dangerous_tools(context):
|
||||
"""Bloqueia operações destrutivas."""
|
||||
if context.tool_name == "delete_database":
|
||||
return False # Bloquear execução
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def log_tool_result(context):
|
||||
"""Registra execução de ferramenta."""
|
||||
print(f"Ferramenta {context.tool_name} concluída")
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. Hooks com Escopo de Crew
|
||||
|
||||
Aplica hooks apenas a instâncias específicas de crew:
|
||||
|
||||
```python
|
||||
from crewai import CrewBase
|
||||
from crewai.project import crew
|
||||
from crewai.hooks import before_llm_call_crew, after_tool_call_crew
|
||||
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_llm_call_crew
|
||||
def validate_inputs(self, context):
|
||||
# Aplica-se apenas a esta crew
|
||||
print(f"Chamada LLM em {self.__class__.__name__}")
|
||||
return None
|
||||
|
||||
@after_tool_call_crew
|
||||
def log_results(self, context):
|
||||
# Logging específico da crew
|
||||
print(f"Resultado da ferramenta: {context.tool_result[:50]}...")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential
|
||||
)
|
||||
```
|
||||
|
||||
## Fluxo de Execução de Hooks
|
||||
|
||||
### Fluxo de Chamada LLM
|
||||
|
||||
```
|
||||
Agente precisa chamar LLM
|
||||
↓
|
||||
[Hooks Antes da Chamada LLM Executam]
|
||||
├→ Hook 1: Validar contagem de iterações
|
||||
├→ Hook 2: Adicionar contexto de segurança
|
||||
└→ Hook 3: Registrar requisição
|
||||
↓
|
||||
Se algum hook retornar False:
|
||||
├→ Bloquear chamada LLM
|
||||
└→ Lançar ValueError
|
||||
↓
|
||||
Se todos os hooks retornarem True/None:
|
||||
├→ Chamada LLM prossegue
|
||||
└→ Resposta gerada
|
||||
↓
|
||||
[Hooks Depois da Chamada LLM Executam]
|
||||
├→ Hook 1: Sanitizar resposta
|
||||
├→ Hook 2: Registrar resposta
|
||||
└→ Hook 3: Atualizar métricas
|
||||
↓
|
||||
Resposta final retornada
|
||||
```
|
||||
|
||||
### Fluxo de Chamada de Ferramenta
|
||||
|
||||
```
|
||||
Agente precisa executar ferramenta
|
||||
↓
|
||||
[Hooks Antes da Chamada de Ferramenta Executam]
|
||||
├→ Hook 1: Verificar se ferramenta é permitida
|
||||
├→ Hook 2: Validar entradas
|
||||
└→ Hook 3: Solicitar aprovação se necessário
|
||||
↓
|
||||
Se algum hook retornar False:
|
||||
├→ Bloquear execução da ferramenta
|
||||
└→ Retornar mensagem de erro
|
||||
↓
|
||||
Se todos os hooks retornarem True/None:
|
||||
├→ Execução da ferramenta prossegue
|
||||
└→ Resultado gerado
|
||||
↓
|
||||
[Hooks Depois da Chamada de Ferramenta Executam]
|
||||
├→ Hook 1: Sanitizar resultado
|
||||
├→ Hook 2: Fazer cache do resultado
|
||||
└→ Hook 3: Registrar métricas
|
||||
↓
|
||||
Resultado final retornado
|
||||
```
|
||||
|
||||
## Objetos de Contexto de Hook
|
||||
|
||||
### LLMCallHookContext
|
||||
|
||||
Fornece acesso ao estado de execução do LLM:
|
||||
|
||||
```python
|
||||
class LLMCallHookContext:
|
||||
executor: CrewAgentExecutor # Acesso completo ao executor
|
||||
messages: list # Lista de mensagens mutável
|
||||
agent: Agent # Agente atual
|
||||
task: Task # Tarefa atual
|
||||
crew: Crew # Instância da crew
|
||||
llm: BaseLLM # Instância do LLM
|
||||
iterations: int # Iteração atual
|
||||
response: str | None # Resposta do LLM (hooks posteriores)
|
||||
```
|
||||
|
||||
### ToolCallHookContext
|
||||
|
||||
Fornece acesso ao estado de execução da ferramenta:
|
||||
|
||||
```python
|
||||
class ToolCallHookContext:
|
||||
tool_name: str # Ferramenta sendo chamada
|
||||
tool_input: dict # Parâmetros de entrada mutáveis
|
||||
tool: CrewStructuredTool # Instância da ferramenta
|
||||
agent: Agent | None # Agente executando
|
||||
task: Task | None # Tarefa atual
|
||||
crew: Crew | None # Instância da crew
|
||||
tool_result: str | None # Resultado da ferramenta (hooks posteriores)
|
||||
```
|
||||
|
||||
## Padrões Comuns
|
||||
|
||||
### Segurança e Validação
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safety_check(context):
|
||||
"""Bloqueia operações destrutivas."""
|
||||
dangerous = ['delete_file', 'drop_table', 'system_shutdown']
|
||||
if context.tool_name in dangerous:
|
||||
print(f"🛑 Bloqueado: {context.tool_name}")
|
||||
return False
|
||||
return None
|
||||
|
||||
@before_llm_call
|
||||
def iteration_limit(context):
|
||||
"""Previne loops infinitos."""
|
||||
if context.iterations > 15:
|
||||
print("⛔ Máximo de iterações excedido")
|
||||
return False
|
||||
return None
|
||||
```
|
||||
|
||||
### Humano no Loop
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def require_approval(context):
|
||||
"""Requer aprovação para operações sensíveis."""
|
||||
sensitive = ['send_email', 'make_payment', 'post_message']
|
||||
|
||||
if context.tool_name in sensitive:
|
||||
response = context.request_human_input(
|
||||
prompt=f"Aprovar {context.tool_name}?",
|
||||
default_message="Digite 'sim' para aprovar:"
|
||||
)
|
||||
|
||||
if response.lower() != 'sim':
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### Monitoramento e Análise
|
||||
|
||||
```python
|
||||
from collections import defaultdict
|
||||
import time
|
||||
|
||||
metrics = defaultdict(lambda: {'count': 0, 'total_time': 0})
|
||||
|
||||
@before_tool_call
|
||||
def start_timer(context):
|
||||
context.tool_input['_start'] = time.time()
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def track_metrics(context):
|
||||
start = context.tool_input.get('_start', time.time())
|
||||
duration = time.time() - start
|
||||
|
||||
metrics[context.tool_name]['count'] += 1
|
||||
metrics[context.tool_name]['total_time'] += duration
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
## Gerenciamento de Hooks
|
||||
|
||||
### Limpar Todos os Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import clear_all_global_hooks
|
||||
|
||||
# Limpa todos os hooks de uma vez
|
||||
result = clear_all_global_hooks()
|
||||
print(f"Limpou {result['total']} hooks")
|
||||
```
|
||||
|
||||
### Limpar Tipos Específicos de Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_llm_call_hooks,
|
||||
clear_after_llm_call_hooks,
|
||||
clear_before_tool_call_hooks,
|
||||
clear_after_tool_call_hooks
|
||||
)
|
||||
|
||||
# Limpar tipos específicos
|
||||
llm_before_count = clear_before_llm_call_hooks()
|
||||
tool_after_count = clear_after_tool_call_hooks()
|
||||
```
|
||||
|
||||
## Melhores Práticas
|
||||
|
||||
### 1. Mantenha os Hooks Focados
|
||||
Cada hook deve ter uma responsabilidade única e clara.
|
||||
|
||||
### 2. Trate Erros Graciosamente
|
||||
```python
|
||||
@before_llm_call
|
||||
def safe_hook(context):
|
||||
try:
|
||||
if some_condition:
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Erro no hook: {e}")
|
||||
return None # Permitir execução apesar do erro
|
||||
```
|
||||
|
||||
### 3. Modifique o Contexto In-Place
|
||||
```python
|
||||
# ✅ Correto - modificar in-place
|
||||
@before_llm_call
|
||||
def add_context(context):
|
||||
context.messages.append({"role": "system", "content": "Seja conciso"})
|
||||
|
||||
# ❌ Errado - substitui referência
|
||||
@before_llm_call
|
||||
def wrong_approach(context):
|
||||
context.messages = [{"role": "system", "content": "Seja conciso"}]
|
||||
```
|
||||
|
||||
### 4. Use Type Hints
|
||||
```python
|
||||
from crewai.hooks import LLMCallHookContext, ToolCallHookContext
|
||||
|
||||
def my_llm_hook(context: LLMCallHookContext) -> bool | None:
|
||||
return None
|
||||
|
||||
def my_tool_hook(context: ToolCallHookContext) -> str | None:
|
||||
return None
|
||||
```
|
||||
|
||||
### 5. Limpe em Testes
|
||||
```python
|
||||
import pytest
|
||||
from crewai.hooks import clear_all_global_hooks
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_hooks():
|
||||
"""Reseta hooks antes de cada teste."""
|
||||
yield
|
||||
clear_all_global_hooks()
|
||||
```
|
||||
|
||||
## Quando Usar Qual Hook
|
||||
|
||||
### Use Hooks LLM Quando:
|
||||
- Implementar limites de iteração
|
||||
- Adicionar contexto ou diretrizes de segurança aos prompts
|
||||
- Rastrear uso de tokens e custos
|
||||
- Sanitizar ou transformar respostas
|
||||
- Implementar gates de aprovação para chamadas LLM
|
||||
- Fazer debug de interações de prompt/resposta
|
||||
|
||||
### Use Hooks de Ferramenta Quando:
|
||||
- Bloquear operações perigosas ou destrutivas
|
||||
- Validar entradas de ferramenta antes da execução
|
||||
- Implementar gates de aprovação para ações sensíveis
|
||||
- Fazer cache de resultados de ferramenta
|
||||
- Rastrear uso e performance de ferramentas
|
||||
- Sanitizar saídas de ferramenta
|
||||
- Limitar taxa de chamadas de ferramenta
|
||||
|
||||
### Use Ambos Quando:
|
||||
Construir sistemas abrangentes de observabilidade, segurança ou aprovação que precisam monitorar todas as operações do agente.
|
||||
|
||||
## Documentação Relacionada
|
||||
|
||||
- [Hooks de Chamada LLM →](/learn/llm-hooks) - Documentação detalhada de hooks LLM
|
||||
- [Hooks de Chamada de Ferramenta →](/learn/tool-hooks) - Documentação detalhada de hooks de ferramenta
|
||||
- [Hooks Antes e Depois do Kickoff →](/learn/before-and-after-kickoff-hooks) - Hooks do ciclo de vida da crew
|
||||
- [Humano no Loop →](/learn/human-in-the-loop) - Padrões de entrada humana
|
||||
|
||||
## Conclusão
|
||||
|
||||
Os Hooks de Execução fornecem controle poderoso sobre o comportamento em tempo de execução do agente. Use-os para implementar guardrails de segurança, fluxos de trabalho de aprovação, monitoramento abrangente e lógica de negócio personalizada. Combinados com tratamento adequado de erros, segurança de tipos e considerações de performance, os hooks permitem sistemas de agentes seguros, prontos para produção e observáveis.
|
||||
@@ -96,7 +96,7 @@ project_crew = Crew(
|
||||
```
|
||||
|
||||
<Tip>
|
||||
Para mais detalhes sobre a criação e personalização de um agente gerente, confira a [documentação do Custom Manager Agent](https://docs.crewai.com/how-to/custom-manager-agent#custom-manager-agent).
|
||||
Para mais detalhes sobre a criação e personalização de um agente gerente, confira a [documentação do Custom Manager Agent](/pt-BR/learn/custom-manager-agent).
|
||||
</Tip>
|
||||
|
||||
|
||||
|
||||
388
docs/pt-BR/learn/llm-hooks.mdx
Normal file
388
docs/pt-BR/learn/llm-hooks.mdx
Normal file
@@ -0,0 +1,388 @@
|
||||
---
|
||||
title: Hooks de Chamada LLM
|
||||
description: Aprenda a usar hooks de chamada LLM para interceptar, modificar e controlar interações com modelos de linguagem no CrewAI
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
Os Hooks de Chamada LLM fornecem controle fino sobre interações com modelos de linguagem durante a execução do agente. Esses hooks permitem interceptar chamadas LLM, modificar prompts, transformar respostas, implementar gates de aprovação e adicionar logging ou monitoramento personalizado.
|
||||
|
||||
## Visão Geral
|
||||
|
||||
Os hooks LLM são executados em dois pontos críticos:
|
||||
- **Antes da Chamada LLM**: Modificar mensagens, validar entradas ou bloquear execução
|
||||
- **Depois da Chamada LLM**: Transformar respostas, sanitizar saídas ou modificar histórico de conversação
|
||||
|
||||
## Tipos de Hook
|
||||
|
||||
### Hooks Antes da Chamada LLM
|
||||
|
||||
Executados antes de cada chamada LLM, esses hooks podem:
|
||||
- Inspecionar e modificar mensagens enviadas ao LLM
|
||||
- Bloquear execução LLM com base em condições
|
||||
- Implementar limitação de taxa ou gates de aprovação
|
||||
- Adicionar contexto ou mensagens do sistema
|
||||
- Registrar detalhes da requisição
|
||||
|
||||
**Assinatura:**
|
||||
```python
|
||||
def before_hook(context: LLMCallHookContext) -> bool | None:
|
||||
# Retorne False para bloquear execução
|
||||
# Retorne True ou None para permitir execução
|
||||
...
|
||||
```
|
||||
|
||||
### Hooks Depois da Chamada LLM
|
||||
|
||||
Executados depois de cada chamada LLM, esses hooks podem:
|
||||
- Modificar ou sanitizar respostas do LLM
|
||||
- Adicionar metadados ou formatação
|
||||
- Registrar detalhes da resposta
|
||||
- Atualizar histórico de conversação
|
||||
- Implementar filtragem de conteúdo
|
||||
|
||||
**Assinatura:**
|
||||
```python
|
||||
def after_hook(context: LLMCallHookContext) -> str | None:
|
||||
# Retorne string de resposta modificada
|
||||
# Retorne None para manter resposta original
|
||||
...
|
||||
```
|
||||
|
||||
## Contexto do Hook LLM
|
||||
|
||||
O objeto `LLMCallHookContext` fornece acesso abrangente ao estado de execução:
|
||||
|
||||
```python
|
||||
class LLMCallHookContext:
|
||||
executor: CrewAgentExecutor # Referência completa ao executor
|
||||
messages: list # Lista de mensagens mutável
|
||||
agent: Agent # Agente atual
|
||||
task: Task # Tarefa atual
|
||||
crew: Crew # Instância da crew
|
||||
llm: BaseLLM # Instância do LLM
|
||||
iterations: int # Contagem de iteração atual
|
||||
response: str | None # Resposta do LLM (apenas hooks posteriores)
|
||||
```
|
||||
|
||||
### Modificando Mensagens
|
||||
|
||||
**Importante:** Sempre modifique mensagens in-place:
|
||||
|
||||
```python
|
||||
# ✅ Correto - modificar in-place
|
||||
def add_context(context: LLMCallHookContext) -> None:
|
||||
context.messages.append({"role": "system", "content": "Seja conciso"})
|
||||
|
||||
# ❌ Errado - substitui referência da lista
|
||||
def wrong_approach(context: LLMCallHookContext) -> None:
|
||||
context.messages = [{"role": "system", "content": "Seja conciso"}]
|
||||
```
|
||||
|
||||
## Métodos de Registro
|
||||
|
||||
### 1. Registro Baseado em Decoradores (Recomendado)
|
||||
|
||||
Use decoradores para sintaxe mais limpa:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_llm_call, after_llm_call
|
||||
|
||||
@before_llm_call
|
||||
def validate_iteration_count(context):
|
||||
"""Valida a contagem de iterações."""
|
||||
if context.iterations > 10:
|
||||
print("⚠️ Máximo de iterações excedido")
|
||||
return False # Bloquear execução
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def sanitize_response(context):
|
||||
"""Remove dados sensíveis."""
|
||||
if context.response and "API_KEY" in context.response:
|
||||
return context.response.replace("API_KEY", "[CENSURADO]")
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. Hooks com Escopo de Crew
|
||||
|
||||
Registre hooks para uma instância específica de crew:
|
||||
|
||||
```python
|
||||
from crewai import CrewBase
|
||||
from crewai.project import crew
|
||||
from crewai.hooks import before_llm_call_crew, after_llm_call_crew
|
||||
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_llm_call_crew
|
||||
def validate_inputs(self, context):
|
||||
# Aplica-se apenas a esta crew
|
||||
if context.iterations == 0:
|
||||
print(f"Iniciando tarefa: {context.task.description}")
|
||||
return None
|
||||
|
||||
@after_llm_call_crew
|
||||
def log_responses(self, context):
|
||||
# Logging específico da crew
|
||||
print(f"Comprimento da resposta: {len(context.response)}")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
```
|
||||
|
||||
## Casos de Uso Comuns
|
||||
|
||||
### 1. Limitação de Iterações
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def limit_iterations(context: LLMCallHookContext) -> bool | None:
|
||||
"""Previne loops infinitos limitando iterações."""
|
||||
max_iterations = 15
|
||||
if context.iterations > max_iterations:
|
||||
print(f"⛔ Bloqueado: Excedeu {max_iterations} iterações")
|
||||
return False # Bloquear execução
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. Gate de Aprovação Humana
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def require_approval(context: LLMCallHookContext) -> bool | None:
|
||||
"""Requer aprovação após certas iterações."""
|
||||
if context.iterations > 5:
|
||||
response = context.request_human_input(
|
||||
prompt=f"Iteração {context.iterations}: Aprovar chamada LLM?",
|
||||
default_message="Pressione Enter para aprovar, ou digite 'não' para bloquear:"
|
||||
)
|
||||
if response.lower() == "não":
|
||||
print("🚫 Chamada LLM bloqueada pelo usuário")
|
||||
return False
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. Adicionando Contexto do Sistema
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def add_guardrails(context: LLMCallHookContext) -> None:
|
||||
"""Adiciona diretrizes de segurança a cada chamada LLM."""
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "Garanta que as respostas sejam factuais e cite fontes quando possível."
|
||||
})
|
||||
return None
|
||||
```
|
||||
|
||||
### 4. Sanitização de Resposta
|
||||
|
||||
```python
|
||||
@after_llm_call
|
||||
def sanitize_sensitive_data(context: LLMCallHookContext) -> str | None:
|
||||
"""Remove padrões sensíveis."""
|
||||
if not context.response:
|
||||
return None
|
||||
|
||||
import re
|
||||
sanitized = context.response
|
||||
sanitized = re.sub(r'\b\d{3}\.\d{3}\.\d{3}-\d{2}\b', '[CPF-CENSURADO]', sanitized)
|
||||
sanitized = re.sub(r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b', '[CARTÃO-CENSURADO]', sanitized)
|
||||
|
||||
return sanitized
|
||||
```
|
||||
|
||||
### 5. Rastreamento de Custos
|
||||
|
||||
```python
|
||||
import tiktoken
|
||||
|
||||
@before_llm_call
|
||||
def track_token_usage(context: LLMCallHookContext) -> None:
|
||||
"""Rastreia tokens de entrada."""
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
total_tokens = sum(
|
||||
len(encoding.encode(msg.get("content", "")))
|
||||
for msg in context.messages
|
||||
)
|
||||
print(f"📊 Tokens de entrada: ~{total_tokens}")
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def track_response_tokens(context: LLMCallHookContext) -> None:
|
||||
"""Rastreia tokens de resposta."""
|
||||
if context.response:
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
tokens = len(encoding.encode(context.response))
|
||||
print(f"📊 Tokens de resposta: ~{tokens}")
|
||||
return None
|
||||
```
|
||||
|
||||
### 6. Logging de Debug
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def debug_request(context: LLMCallHookContext) -> None:
|
||||
"""Debug de requisição LLM."""
|
||||
print(f"""
|
||||
🔍 Debug de Chamada LLM:
|
||||
- Agente: {context.agent.role}
|
||||
- Tarefa: {context.task.description[:50]}...
|
||||
- Iteração: {context.iterations}
|
||||
- Contagem de Mensagens: {len(context.messages)}
|
||||
- Última Mensagem: {context.messages[-1] if context.messages else 'Nenhuma'}
|
||||
""")
|
||||
return None
|
||||
|
||||
@after_llm_call
|
||||
def debug_response(context: LLMCallHookContext) -> None:
|
||||
"""Debug de resposta LLM."""
|
||||
if context.response:
|
||||
print(f"✅ Preview da Resposta: {context.response[:100]}...")
|
||||
return None
|
||||
```
|
||||
|
||||
## Gerenciamento de Hooks
|
||||
|
||||
### Desregistrando Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
unregister_before_llm_call_hook,
|
||||
unregister_after_llm_call_hook
|
||||
)
|
||||
|
||||
# Desregistrar hook específico
|
||||
def my_hook(context):
|
||||
...
|
||||
|
||||
register_before_llm_call_hook(my_hook)
|
||||
# Mais tarde...
|
||||
unregister_before_llm_call_hook(my_hook) # Retorna True se encontrado
|
||||
```
|
||||
|
||||
### Limpando Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_llm_call_hooks,
|
||||
clear_after_llm_call_hooks,
|
||||
clear_all_llm_call_hooks
|
||||
)
|
||||
|
||||
# Limpar tipo específico de hook
|
||||
count = clear_before_llm_call_hooks()
|
||||
print(f"Limpou {count} hooks antes")
|
||||
|
||||
# Limpar todos os hooks LLM
|
||||
before_count, after_count = clear_all_llm_call_hooks()
|
||||
print(f"Limpou {before_count} hooks antes e {after_count} hooks depois")
|
||||
```
|
||||
|
||||
## Padrões Avançados
|
||||
|
||||
### Execução Condicional de Hook
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def conditional_blocking(context: LLMCallHookContext) -> bool | None:
|
||||
"""Bloqueia apenas em condições específicas."""
|
||||
# Bloquear apenas para agentes específicos
|
||||
if context.agent.role == "researcher" and context.iterations > 10:
|
||||
return False
|
||||
|
||||
# Bloquear apenas para tarefas específicas
|
||||
if "sensível" in context.task.description.lower() and context.iterations > 5:
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### Modificações com Consciência de Contexto
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def adaptive_prompting(context: LLMCallHookContext) -> None:
|
||||
"""Adiciona contexto diferente baseado na iteração."""
|
||||
if context.iterations == 0:
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "Comece com uma visão geral de alto nível."
|
||||
})
|
||||
elif context.iterations > 3:
|
||||
context.messages.append({
|
||||
"role": "system",
|
||||
"content": "Foque em detalhes específicos e forneça exemplos."
|
||||
})
|
||||
return None
|
||||
```
|
||||
|
||||
## Melhores Práticas
|
||||
|
||||
1. **Mantenha Hooks Focados**: Cada hook deve ter uma responsabilidade única
|
||||
2. **Evite Computação Pesada**: Hooks executam em cada chamada LLM
|
||||
3. **Trate Erros Graciosamente**: Use try-except para prevenir falhas de hooks
|
||||
4. **Use Type Hints**: Aproveite `LLMCallHookContext` para melhor suporte IDE
|
||||
5. **Documente Comportamento do Hook**: Especialmente para condições de bloqueio
|
||||
6. **Teste Hooks Independentemente**: Teste unitário de hooks antes de usar em produção
|
||||
7. **Limpe Hooks em Testes**: Use `clear_all_llm_call_hooks()` entre execuções de teste
|
||||
8. **Modifique In-Place**: Sempre modifique `context.messages` in-place, nunca substitua
|
||||
|
||||
## Tratamento de Erros
|
||||
|
||||
```python
|
||||
@before_llm_call
|
||||
def safe_hook(context: LLMCallHookContext) -> bool | None:
|
||||
try:
|
||||
# Sua lógica de hook
|
||||
if some_condition:
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ Erro no hook: {e}")
|
||||
# Decida: permitir ou bloquear em erro
|
||||
return None # Permitir execução apesar do erro
|
||||
```
|
||||
|
||||
## Segurança de Tipos
|
||||
|
||||
```python
|
||||
from crewai.hooks import LLMCallHookContext, BeforeLLMCallHookType, AfterLLMCallHookType
|
||||
|
||||
# Anotações de tipo explícitas
|
||||
def my_before_hook(context: LLMCallHookContext) -> bool | None:
|
||||
return None
|
||||
|
||||
def my_after_hook(context: LLMCallHookContext) -> str | None:
|
||||
return None
|
||||
|
||||
# Registro type-safe
|
||||
register_before_llm_call_hook(my_before_hook)
|
||||
register_after_llm_call_hook(my_after_hook)
|
||||
```
|
||||
|
||||
## Solução de Problemas
|
||||
|
||||
### Hook Não Está Executando
|
||||
- Verifique se o hook está registrado antes da execução da crew
|
||||
- Verifique se hook anterior retornou `False` (bloqueia hooks subsequentes)
|
||||
- Garanta que assinatura do hook corresponda ao tipo esperado
|
||||
|
||||
### Modificações de Mensagem Não Persistem
|
||||
- Use modificações in-place: `context.messages.append()`
|
||||
- Não substitua a lista: `context.messages = []`
|
||||
|
||||
### Modificações de Resposta Não Funcionam
|
||||
- Retorne a string modificada dos hooks posteriores
|
||||
- Retornar `None` mantém a resposta original
|
||||
|
||||
## Conclusão
|
||||
|
||||
Os Hooks de Chamada LLM fornecem capacidades poderosas para controlar e monitorar interações com modelos de linguagem no CrewAI. Use-os para implementar guardrails de segurança, gates de aprovação, logging, rastreamento de custos e sanitização de respostas. Combinados com tratamento adequado de erros e segurança de tipos, os hooks permitem sistemas de agentes robustos e prontos para produção.
|
||||
|
||||
498
docs/pt-BR/learn/tool-hooks.mdx
Normal file
498
docs/pt-BR/learn/tool-hooks.mdx
Normal file
@@ -0,0 +1,498 @@
|
||||
---
|
||||
title: Hooks de Chamada de Ferramenta
|
||||
description: Aprenda a usar hooks de chamada de ferramenta para interceptar, modificar e controlar execução de ferramentas no CrewAI
|
||||
mode: "wide"
|
||||
---
|
||||
|
||||
Os Hooks de Chamada de Ferramenta fornecem controle fino sobre a execução de ferramentas durante operações do agente. Esses hooks permitem interceptar chamadas de ferramenta, modificar entradas, transformar saídas, implementar verificações de segurança e adicionar logging ou monitoramento abrangente.
|
||||
|
||||
## Visão Geral
|
||||
|
||||
Os hooks de ferramenta são executados em dois pontos críticos:
|
||||
- **Antes da Chamada de Ferramenta**: Modificar entradas, validar parâmetros ou bloquear execução
|
||||
- **Depois da Chamada de Ferramenta**: Transformar resultados, sanitizar saídas ou registrar detalhes de execução
|
||||
|
||||
## Tipos de Hook
|
||||
|
||||
### Hooks Antes da Chamada de Ferramenta
|
||||
|
||||
Executados antes de cada execução de ferramenta, esses hooks podem:
|
||||
- Inspecionar e modificar entradas de ferramenta
|
||||
- Bloquear execução de ferramenta com base em condições
|
||||
- Implementar gates de aprovação para operações perigosas
|
||||
- Validar parâmetros
|
||||
- Registrar invocações de ferramenta
|
||||
|
||||
**Assinatura:**
|
||||
```python
|
||||
def before_hook(context: ToolCallHookContext) -> bool | None:
|
||||
# Retorne False para bloquear execução
|
||||
# Retorne True ou None para permitir execução
|
||||
...
|
||||
```
|
||||
|
||||
### Hooks Depois da Chamada de Ferramenta
|
||||
|
||||
Executados depois de cada execução de ferramenta, esses hooks podem:
|
||||
- Modificar ou sanitizar resultados de ferramenta
|
||||
- Adicionar metadados ou formatação
|
||||
- Registrar resultados de execução
|
||||
- Implementar validação de resultado
|
||||
- Transformar formatos de saída
|
||||
|
||||
**Assinatura:**
|
||||
```python
|
||||
def after_hook(context: ToolCallHookContext) -> str | None:
|
||||
# Retorne string de resultado modificado
|
||||
# Retorne None para manter resultado original
|
||||
...
|
||||
```
|
||||
|
||||
## Contexto do Hook de Ferramenta
|
||||
|
||||
O objeto `ToolCallHookContext` fornece acesso abrangente ao estado de execução da ferramenta:
|
||||
|
||||
```python
|
||||
class ToolCallHookContext:
|
||||
tool_name: str # Nome da ferramenta sendo chamada
|
||||
tool_input: dict[str, Any] # Parâmetros de entrada mutáveis da ferramenta
|
||||
tool: CrewStructuredTool # Referência da instância da ferramenta
|
||||
agent: Agent | BaseAgent | None # Agente executando a ferramenta
|
||||
task: Task | None # Tarefa atual
|
||||
crew: Crew | None # Instância da crew
|
||||
tool_result: str | None # Resultado da ferramenta (apenas hooks posteriores)
|
||||
```
|
||||
|
||||
### Modificando Entradas de Ferramenta
|
||||
|
||||
**Importante:** Sempre modifique entradas de ferramenta in-place:
|
||||
|
||||
```python
|
||||
# ✅ Correto - modificar in-place
|
||||
def sanitize_input(context: ToolCallHookContext) -> None:
|
||||
context.tool_input['query'] = context.tool_input['query'].lower()
|
||||
|
||||
# ❌ Errado - substitui referência do dict
|
||||
def wrong_approach(context: ToolCallHookContext) -> None:
|
||||
context.tool_input = {'query': 'nova consulta'}
|
||||
```
|
||||
|
||||
## Métodos de Registro
|
||||
|
||||
### 1. Registro Baseado em Decoradores (Recomendado)
|
||||
|
||||
Use decoradores para sintaxe mais limpa:
|
||||
|
||||
```python
|
||||
from crewai.hooks import before_tool_call, after_tool_call
|
||||
|
||||
@before_tool_call
|
||||
def block_dangerous_tools(context):
|
||||
"""Bloqueia ferramentas perigosas."""
|
||||
dangerous_tools = ['delete_database', 'drop_table', 'rm_rf']
|
||||
if context.tool_name in dangerous_tools:
|
||||
print(f"⛔ Ferramenta perigosa bloqueada: {context.tool_name}")
|
||||
return False # Bloquear execução
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def sanitize_results(context):
|
||||
"""Sanitiza resultados."""
|
||||
if context.tool_result and "password" in context.tool_result.lower():
|
||||
return context.tool_result.replace("password", "[CENSURADO]")
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. Hooks com Escopo de Crew
|
||||
|
||||
Registre hooks para uma instância específica de crew:
|
||||
|
||||
```python
|
||||
from crewai import CrewBase
|
||||
from crewai.project import crew
|
||||
from crewai.hooks import before_tool_call_crew, after_tool_call_crew
|
||||
|
||||
@CrewBase
|
||||
class MyProjCrew:
|
||||
@before_tool_call_crew
|
||||
def validate_tool_inputs(self, context):
|
||||
# Aplica-se apenas a esta crew
|
||||
if context.tool_name == "web_search":
|
||||
if not context.tool_input.get('query'):
|
||||
print("❌ Consulta de busca inválida")
|
||||
return False
|
||||
return None
|
||||
|
||||
@after_tool_call_crew
|
||||
def log_tool_results(self, context):
|
||||
# Logging de ferramenta específico da crew
|
||||
print(f"✅ {context.tool_name} concluída")
|
||||
return None
|
||||
|
||||
@crew
|
||||
def crew(self) -> Crew:
|
||||
return Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential,
|
||||
verbose=True
|
||||
)
|
||||
```
|
||||
|
||||
## Casos de Uso Comuns
|
||||
|
||||
### 1. Guardrails de Segurança
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safety_check(context: ToolCallHookContext) -> bool | None:
|
||||
"""Bloqueia ferramentas que podem causar danos."""
|
||||
destructive_tools = [
|
||||
'delete_file',
|
||||
'drop_table',
|
||||
'remove_user',
|
||||
'system_shutdown'
|
||||
]
|
||||
|
||||
if context.tool_name in destructive_tools:
|
||||
print(f"🛑 Ferramenta destrutiva bloqueada: {context.tool_name}")
|
||||
return False
|
||||
|
||||
# Avisar em operações sensíveis
|
||||
sensitive_tools = ['send_email', 'post_to_social_media', 'charge_payment']
|
||||
if context.tool_name in sensitive_tools:
|
||||
print(f"⚠️ Executando ferramenta sensível: {context.tool_name}")
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 2. Gate de Aprovação Humana
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def require_approval_for_actions(context: ToolCallHookContext) -> bool | None:
|
||||
"""Requer aprovação para ações específicas."""
|
||||
approval_required = [
|
||||
'send_email',
|
||||
'make_purchase',
|
||||
'delete_file',
|
||||
'post_message'
|
||||
]
|
||||
|
||||
if context.tool_name in approval_required:
|
||||
response = context.request_human_input(
|
||||
prompt=f"Aprovar {context.tool_name}?",
|
||||
default_message=f"Entrada: {context.tool_input}\nDigite 'sim' para aprovar:"
|
||||
)
|
||||
|
||||
if response.lower() != 'sim':
|
||||
print(f"❌ Execução de ferramenta negada: {context.tool_name}")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. Validação e Sanitização de Entrada
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def validate_and_sanitize_inputs(context: ToolCallHookContext) -> bool | None:
|
||||
"""Valida e sanitiza entradas."""
|
||||
# Validar consultas de busca
|
||||
if context.tool_name == 'web_search':
|
||||
query = context.tool_input.get('query', '')
|
||||
if len(query) < 3:
|
||||
print("❌ Consulta de busca muito curta")
|
||||
return False
|
||||
|
||||
# Sanitizar consulta
|
||||
context.tool_input['query'] = query.strip().lower()
|
||||
|
||||
# Validar caminhos de arquivo
|
||||
if context.tool_name == 'read_file':
|
||||
path = context.tool_input.get('path', '')
|
||||
if '..' in path or path.startswith('/'):
|
||||
print("❌ Caminho de arquivo inválido")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 4. Sanitização de Resultado
|
||||
|
||||
```python
|
||||
@after_tool_call
|
||||
def sanitize_sensitive_data(context: ToolCallHookContext) -> str | None:
|
||||
"""Sanitiza dados sensíveis."""
|
||||
if not context.tool_result:
|
||||
return None
|
||||
|
||||
import re
|
||||
result = context.tool_result
|
||||
|
||||
# Remover chaves de API
|
||||
result = re.sub(
|
||||
r'(api[_-]?key|token)["\']?\s*[:=]\s*["\']?[\w-]+',
|
||||
r'\1: [CENSURADO]',
|
||||
result,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Remover endereços de email
|
||||
result = re.sub(
|
||||
r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
|
||||
'[EMAIL-CENSURADO]',
|
||||
result
|
||||
)
|
||||
|
||||
# Remover números de cartão de crédito
|
||||
result = re.sub(
|
||||
r'\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b',
|
||||
'[CARTÃO-CENSURADO]',
|
||||
result
|
||||
)
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### 5. Análise de Uso de Ferramenta
|
||||
|
||||
```python
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
tool_stats = defaultdict(lambda: {'count': 0, 'total_time': 0, 'failures': 0})
|
||||
|
||||
@before_tool_call
|
||||
def start_timer(context: ToolCallHookContext) -> None:
|
||||
context.tool_input['_start_time'] = time.time()
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def track_tool_usage(context: ToolCallHookContext) -> None:
|
||||
start_time = context.tool_input.get('_start_time', time.time())
|
||||
duration = time.time() - start_time
|
||||
|
||||
tool_stats[context.tool_name]['count'] += 1
|
||||
tool_stats[context.tool_name]['total_time'] += duration
|
||||
|
||||
if not context.tool_result or 'error' in context.tool_result.lower():
|
||||
tool_stats[context.tool_name]['failures'] += 1
|
||||
|
||||
print(f"""
|
||||
📊 Estatísticas da Ferramenta {context.tool_name}:
|
||||
- Execuções: {tool_stats[context.tool_name]['count']}
|
||||
- Tempo Médio: {tool_stats[context.tool_name]['total_time'] / tool_stats[context.tool_name]['count']:.2f}s
|
||||
- Falhas: {tool_stats[context.tool_name]['failures']}
|
||||
""")
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### 6. Limitação de Taxa
|
||||
|
||||
```python
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
tool_call_history = defaultdict(list)
|
||||
|
||||
@before_tool_call
|
||||
def rate_limit_tools(context: ToolCallHookContext) -> bool | None:
|
||||
"""Limita taxa de chamadas de ferramenta."""
|
||||
tool_name = context.tool_name
|
||||
now = datetime.now()
|
||||
|
||||
# Limpar entradas antigas (mais antigas que 1 minuto)
|
||||
tool_call_history[tool_name] = [
|
||||
call_time for call_time in tool_call_history[tool_name]
|
||||
if now - call_time < timedelta(minutes=1)
|
||||
]
|
||||
|
||||
# Verificar limite de taxa (máximo 10 chamadas por minuto)
|
||||
if len(tool_call_history[tool_name]) >= 10:
|
||||
print(f"🚫 Limite de taxa excedido para {tool_name}")
|
||||
return False
|
||||
|
||||
# Registrar esta chamada
|
||||
tool_call_history[tool_name].append(now)
|
||||
return None
|
||||
```
|
||||
|
||||
### 7. Logging de Debug
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def debug_tool_call(context: ToolCallHookContext) -> None:
|
||||
"""Debug de chamada de ferramenta."""
|
||||
print(f"""
|
||||
🔍 Debug de Chamada de Ferramenta:
|
||||
- Ferramenta: {context.tool_name}
|
||||
- Agente: {context.agent.role if context.agent else 'Desconhecido'}
|
||||
- Tarefa: {context.task.description[:50] if context.task else 'Desconhecida'}...
|
||||
- Entrada: {context.tool_input}
|
||||
""")
|
||||
return None
|
||||
|
||||
@after_tool_call
|
||||
def debug_tool_result(context: ToolCallHookContext) -> None:
|
||||
"""Debug de resultado de ferramenta."""
|
||||
if context.tool_result:
|
||||
result_preview = context.tool_result[:200]
|
||||
print(f"✅ Preview do Resultado: {result_preview}...")
|
||||
else:
|
||||
print("⚠️ Nenhum resultado retornado")
|
||||
return None
|
||||
```
|
||||
|
||||
## Gerenciamento de Hooks
|
||||
|
||||
### Desregistrando Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
unregister_before_tool_call_hook,
|
||||
unregister_after_tool_call_hook
|
||||
)
|
||||
|
||||
# Desregistrar hook específico
|
||||
def my_hook(context):
|
||||
...
|
||||
|
||||
register_before_tool_call_hook(my_hook)
|
||||
# Mais tarde...
|
||||
success = unregister_before_tool_call_hook(my_hook)
|
||||
print(f"Desregistrado: {success}")
|
||||
```
|
||||
|
||||
### Limpando Hooks
|
||||
|
||||
```python
|
||||
from crewai.hooks import (
|
||||
clear_before_tool_call_hooks,
|
||||
clear_after_tool_call_hooks,
|
||||
clear_all_tool_call_hooks
|
||||
)
|
||||
|
||||
# Limpar tipo específico de hook
|
||||
count = clear_before_tool_call_hooks()
|
||||
print(f"Limpou {count} hooks antes")
|
||||
|
||||
# Limpar todos os hooks de ferramenta
|
||||
before_count, after_count = clear_all_tool_call_hooks()
|
||||
print(f"Limpou {before_count} hooks antes e {after_count} hooks depois")
|
||||
```
|
||||
|
||||
## Padrões Avançados
|
||||
|
||||
### Execução Condicional de Hook
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def conditional_blocking(context: ToolCallHookContext) -> bool | None:
|
||||
"""Bloqueia apenas em condições específicas."""
|
||||
# Bloquear apenas para agentes específicos
|
||||
if context.agent and context.agent.role == "junior_agent":
|
||||
if context.tool_name in ['delete_file', 'send_email']:
|
||||
print(f"❌ Agentes júnior não podem usar {context.tool_name}")
|
||||
return False
|
||||
|
||||
# Bloquear apenas durante tarefas específicas
|
||||
if context.task and "sensível" in context.task.description.lower():
|
||||
if context.tool_name == 'web_search':
|
||||
print("❌ Busca na web bloqueada para tarefas sensíveis")
|
||||
return False
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
### Modificação de Entrada com Consciência de Contexto
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def enhance_tool_inputs(context: ToolCallHookContext) -> None:
|
||||
"""Adiciona contexto baseado no papel do agente."""
|
||||
# Adicionar contexto baseado no papel do agente
|
||||
if context.agent and context.agent.role == "researcher":
|
||||
if context.tool_name == 'web_search':
|
||||
# Adicionar restrições de domínio para pesquisadores
|
||||
context.tool_input['domains'] = ['edu', 'gov', 'org']
|
||||
|
||||
# Adicionar contexto baseado na tarefa
|
||||
if context.task and "urgente" in context.task.description.lower():
|
||||
if context.tool_name == 'send_email':
|
||||
context.tool_input['priority'] = 'high'
|
||||
|
||||
return None
|
||||
```
|
||||
|
||||
## Melhores Práticas
|
||||
|
||||
1. **Mantenha Hooks Focados**: Cada hook deve ter uma responsabilidade única
|
||||
2. **Evite Computação Pesada**: Hooks executam em cada chamada de ferramenta
|
||||
3. **Trate Erros Graciosamente**: Use try-except para prevenir falhas de hooks
|
||||
4. **Use Type Hints**: Aproveite `ToolCallHookContext` para melhor suporte IDE
|
||||
5. **Documente Condições de Bloqueio**: Deixe claro quando/por que ferramentas são bloqueadas
|
||||
6. **Teste Hooks Independentemente**: Teste unitário de hooks antes de usar em produção
|
||||
7. **Limpe Hooks em Testes**: Use `clear_all_tool_call_hooks()` entre execuções de teste
|
||||
8. **Modifique In-Place**: Sempre modifique `context.tool_input` in-place, nunca substitua
|
||||
9. **Registre Decisões Importantes**: Especialmente ao bloquear execução de ferramenta
|
||||
10. **Considere Performance**: Cache validações caras quando possível
|
||||
|
||||
## Tratamento de Erros
|
||||
|
||||
```python
|
||||
@before_tool_call
|
||||
def safe_validation(context: ToolCallHookContext) -> bool | None:
|
||||
try:
|
||||
# Sua lógica de validação
|
||||
if not validate_input(context.tool_input):
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ Erro no hook: {e}")
|
||||
# Decida: permitir ou bloquear em erro
|
||||
return None # Permitir execução apesar do erro
|
||||
```
|
||||
|
||||
## Segurança de Tipos
|
||||
|
||||
```python
|
||||
from crewai.hooks import ToolCallHookContext, BeforeToolCallHookType, AfterToolCallHookType
|
||||
|
||||
# Anotações de tipo explícitas
|
||||
def my_before_hook(context: ToolCallHookContext) -> bool | None:
|
||||
return None
|
||||
|
||||
def my_after_hook(context: ToolCallHookContext) -> str | None:
|
||||
return None
|
||||
|
||||
# Registro type-safe
|
||||
register_before_tool_call_hook(my_before_hook)
|
||||
register_after_tool_call_hook(my_after_hook)
|
||||
```
|
||||
|
||||
## Solução de Problemas
|
||||
|
||||
### Hook Não Está Executando
|
||||
- Verifique se hook está registrado antes da execução da crew
|
||||
- Verifique se hook anterior retornou `False` (bloqueia execução e hooks subsequentes)
|
||||
- Garanta que assinatura do hook corresponda ao tipo esperado
|
||||
|
||||
### Modificações de Entrada Não Funcionam
|
||||
- Use modificações in-place: `context.tool_input['key'] = value`
|
||||
- Não substitua o dict: `context.tool_input = {}`
|
||||
|
||||
### Modificações de Resultado Não Funcionam
|
||||
- Retorne a string modificada dos hooks posteriores
|
||||
- Retornar `None` mantém o resultado original
|
||||
- Garanta que a ferramenta realmente retornou um resultado
|
||||
|
||||
### Ferramenta Bloqueada Inesperadamente
|
||||
- Verifique todos os hooks antes por condições de bloqueio
|
||||
- Verifique ordem de execução do hook
|
||||
- Adicione logging de debug para identificar qual hook está bloqueando
|
||||
|
||||
## Conclusão
|
||||
|
||||
Os Hooks de Chamada de Ferramenta fornecem capacidades poderosas para controlar e monitorar execução de ferramentas no CrewAI. Use-os para implementar guardrails de segurança, gates de aprovação, validação de entrada, sanitização de resultado, logging e análise. Combinados com tratamento adequado de erros e segurança de tipos, os hooks permitem sistemas de agentes seguros e prontos para produção com observabilidade abrangente.
|
||||
|
||||
@@ -93,11 +93,14 @@ Depois de executar o aplicativo, você pode visualizar os traços na [Datadog LL
|
||||
|
||||
Ao clicar em um rastreamento, você verá os detalhes do rastreamento, incluindo o total de tokens usados, o número de chamadas LLM, os modelos usados e o custo estimado. Clicar em um intervalo específico reduzirá esses detalhes e mostrará a entrada, a saída e os metadados relacionados.
|
||||
|
||||

|
||||
|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-1.png" alt="Visualização do rastreamento de observabilidade do Datadog LLM" />
|
||||
</Frame>
|
||||
Além disso, você pode visualizar a visualização do gráfico de execução do rastreamento, que mostra o controle e o fluxo de dados do rastreamento, que será dimensionado com agentes maiores para mostrar transferências e relacionamentos entre chamadas LLM, chamadas de ferramentas e interações de agentes.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-2.png" alt="Visualização do fluxo de execução do agente de observabilidade do Datadog LLM" />
|
||||
</Frame>
|
||||
|
||||
## Referências
|
||||
|
||||
|
||||
@@ -733,9 +733,7 @@ Aqui está um exemplo básico para rotear requisições ao OpenAI, usando especi
|
||||
- Coletam metadados relevantes para filtragem de logs
|
||||
- Impõem permissões de acesso
|
||||
|
||||
Crie chaves de API através de:
|
||||
- [Portkey App](https://app.portkey.ai/)
|
||||
- [API Key Management API](/pt-BR/api-reference/admin-api/control-plane/api-keys/create-api-key)
|
||||
Crie chaves de API através do [Portkey App](https://app.portkey.ai/)
|
||||
|
||||
Exemplo usando Python SDK:
|
||||
```python
|
||||
@@ -758,7 +756,7 @@ Aqui está um exemplo básico para rotear requisições ao OpenAI, usando especi
|
||||
)
|
||||
```
|
||||
|
||||
Para instruções detalhadas de gerenciamento de chaves, veja nossa [documentação de API Keys](/pt-BR/api-reference/admin-api/control-plane/api-keys/create-api-key).
|
||||
Para instruções detalhadas de gerenciamento de chaves, veja a [documentação Portkey](https://portkey.ai/docs).
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Etapa 4: Implante & Monitore">
|
||||
|
||||
@@ -18,7 +18,7 @@ Essas ferramentas permitem que seus agentes interajam com serviços em nuvem, ac
|
||||
Escreva e faça upload de arquivos para o armazenamento Amazon S3.
|
||||
</Card>
|
||||
|
||||
<Card title="Bedrock Invoke Agent" icon="aws" href="/pt-BR/tools/cloud-storage/bedrockinvokeagenttool">
|
||||
<Card title="Bedrock Invoke Agent" icon="aws" href="/pt-BR/tools/integration/bedrockinvokeagenttool">
|
||||
Acione agentes Amazon Bedrock para tarefas orientadas por IA.
|
||||
</Card>
|
||||
|
||||
|
||||
@@ -23,13 +23,15 @@ Veja um exemplo mínimo de como utilizar a ferramenta:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import QdrantVectorSearchTool
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Inicialize a ferramenta
|
||||
# Inicialize a ferramenta com QdrantConfig
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
)
|
||||
)
|
||||
|
||||
# Crie um agente que utiliza a ferramenta
|
||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
||||
def get_openai_embedding(text):
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model="text-embedding-3-small"
|
||||
model="text-embedding-3-large"
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||
# Extrair texto do PDF
|
||||
text_chunks = extract_text_from_pdf(pdf_path)
|
||||
|
||||
|
||||
# Criar coleção no Qdrant
|
||||
if qdrant.collection_exists(collection_name):
|
||||
qdrant.delete_collection(collection_name)
|
||||
qdrant.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
||||
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||
)
|
||||
|
||||
# Armazenar embeddings
|
||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||
|
||||
# Inicializar ferramenta de busca Qdrant
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
)
|
||||
)
|
||||
|
||||
# Criar agentes CrewAI
|
||||
search_agent = Agent(
|
||||
role="Senior Semantic Search Agent",
|
||||
goal="Find and analyze documents based on semantic search",
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
information using semantic search in a Qdrant database.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
||||
answer_agent = Agent(
|
||||
role="Senior Answer Assistant",
|
||||
goal="Generate answers to questions based on the context provided",
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
answers to questions based on the context provided.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -180,21 +186,82 @@ print(result)
|
||||
## Parâmetros da Ferramenta
|
||||
|
||||
### Parâmetros Obrigatórios
|
||||
- `qdrant_url` (str): URL do seu servidor Qdrant
|
||||
- `qdrant_api_key` (str): Chave de API para autenticação com o Qdrant
|
||||
- `collection_name` (str): Nome da coleção Qdrant a ser pesquisada
|
||||
- `qdrant_config` (QdrantConfig): Objeto de configuração contendo todas as configurações do Qdrant
|
||||
|
||||
### Parâmetros Opcionais
|
||||
### Parâmetros do QdrantConfig
|
||||
- `qdrant_url` (str): URL do seu servidor Qdrant
|
||||
- `qdrant_api_key` (str, opcional): Chave de API para autenticação com o Qdrant
|
||||
- `collection_name` (str): Nome da coleção Qdrant a ser pesquisada
|
||||
- `limit` (int): Número máximo de resultados a serem retornados (padrão: 3)
|
||||
- `score_threshold` (float): Limite mínimo de similaridade (padrão: 0.35)
|
||||
- `filter` (Any, opcional): Instância de Filter do Qdrant para filtragem avançada (padrão: None)
|
||||
|
||||
### Parâmetros Opcionais da Ferramenta
|
||||
- `custom_embedding_fn` (Callable[[str], list[float]]): Função personalizada para vetorização de textos
|
||||
- `qdrant_package` (str): Caminho base do pacote Qdrant (padrão: "qdrant_client")
|
||||
- `client` (Any): Cliente Qdrant pré-inicializado (opcional)
|
||||
|
||||
## Filtragem Avançada
|
||||
|
||||
A ferramenta QdrantVectorSearchTool oferece recursos poderosos de filtragem para refinar os resultados da busca:
|
||||
|
||||
### Filtragem Dinâmica
|
||||
Use os parâmetros `filter_by` e `filter_value` na sua busca para filtrar resultados dinamicamente:
|
||||
|
||||
```python
|
||||
# O agente usará esses parâmetros ao chamar a ferramenta
|
||||
# O schema da ferramenta aceita filter_by e filter_value
|
||||
# Exemplo: busca com filtro de categoria
|
||||
# Os resultados serão filtrados onde categoria == "tecnologia"
|
||||
```
|
||||
|
||||
### Filtros Pré-definidos com QdrantConfig
|
||||
Para filtragens complexas, use instâncias de Filter do Qdrant na sua configuração:
|
||||
|
||||
```python
|
||||
from qdrant_client.http import models as qmodels
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Criar um filtro para condições específicas
|
||||
preset_filter = qmodels.Filter(
|
||||
must=[
|
||||
qmodels.FieldCondition(
|
||||
key="categoria",
|
||||
match=qmodels.MatchValue(value="pesquisa")
|
||||
),
|
||||
qmodels.FieldCondition(
|
||||
key="ano",
|
||||
match=qmodels.MatchValue(value=2024)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Inicializar ferramenta com filtro pré-definido
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
filter=preset_filter # Filtro pré-definido aplicado a todas as buscas
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Combinando Filtros
|
||||
A ferramenta combina automaticamente os filtros pré-definidos do `QdrantConfig` com os filtros dinâmicos de `filter_by` e `filter_value`:
|
||||
|
||||
```python
|
||||
# Se QdrantConfig tem um filtro pré-definido para categoria="pesquisa"
|
||||
# E a busca usa filter_by="ano", filter_value=2024
|
||||
# Ambos os filtros serão combinados (lógica AND)
|
||||
```
|
||||
|
||||
## Parâmetros de Busca
|
||||
|
||||
A ferramenta aceita estes parâmetros em seu schema:
|
||||
- `query` (str): Consulta de busca para encontrar documentos similares
|
||||
- `filter_by` (str, opcional): Campo de metadado para filtrar
|
||||
- `filter_value` (str, opcional): Valor para filtrar
|
||||
- `filter_value` (Any, opcional): Valor para filtrar
|
||||
|
||||
## Formato de Retorno
|
||||
|
||||
@@ -214,7 +281,7 @@ A ferramenta retorna resultados no formato JSON:
|
||||
|
||||
## Embedding Padrão
|
||||
|
||||
Por padrão, a ferramenta utiliza o modelo `text-embedding-3-small` da OpenAI para vetorização. Isso requer:
|
||||
Por padrão, a ferramenta utiliza o modelo `text-embedding-3-large` da OpenAI para vetorização. Isso requer:
|
||||
- Chave de API da OpenAI definida na variável de ambiente: `OPENAI_API_KEY`
|
||||
|
||||
## Embeddings Personalizados
|
||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
||||
# Tokenizar e obter saídas do modelo
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||
outputs = model(**inputs)
|
||||
|
||||
|
||||
# Usar mean pooling para obter o embedding do texto
|
||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||
|
||||
|
||||
# Converter para lista de floats e retornar
|
||||
return embeddings[0].tolist()
|
||||
|
||||
# Usar embeddings personalizados com a ferramenta
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection"
|
||||
),
|
||||
custom_embedding_fn=custom_embeddings # Passe sua função personalizada
|
||||
)
|
||||
```
|
||||
@@ -270,4 +341,4 @@ Variáveis de ambiente obrigatórias:
|
||||
export QDRANT_URL="your_qdrant_url" # Se não for informado no construtor
|
||||
export QDRANT_API_KEY="your_api_key" # Se não for informado no construtor
|
||||
export OPENAI_API_KEY="your_openai_key" # Se estiver usando embeddings padrão
|
||||
```
|
||||
```
|
||||
|
||||
@@ -46,23 +46,25 @@ tool = DirectorySearchTool(directory='/path/to/directory')
|
||||
O DirectorySearchTool utiliza OpenAI para embeddings e sumarização por padrão. As opções de personalização dessas configurações incluem a alteração do provedor de modelo e configurações, ampliando a flexibilidade para usuários avançados.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DirectorySearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # As opções incluem ollama, google, anthropic, llama2 e mais
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Configurações adicionais aqui
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -56,25 +56,25 @@ Os seguintes parâmetros podem ser usados para customizar o comportamento da `DO
|
||||
Por padrão, a ferramenta utiliza o OpenAI tanto para embeddings quanto para sumarização. Para customizar o modelo, você pode usar um dicionário de configuração como no exemplo:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DOCXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # ou google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -48,27 +48,25 @@ tool = MDXSearchTool(mdx='path/to/your/document.mdx')
|
||||
A ferramenta utiliza, por padrão, o OpenAI para embeddings e sumarização. Para personalizar, utilize um dicionário de configuração conforme exemplo abaixo:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = MDXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # As opções incluem google, openai, anthropic, llama2, etc.
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Parâmetros opcionais podem ser incluídos aqui.
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# Um título opcional para os embeddings pode ser adicionado aqui.
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -45,28 +45,60 @@ tool = PDFSearchTool(pdf='path/to/your/document.pdf')
|
||||
|
||||
## Modelo e embeddings personalizados
|
||||
|
||||
Por padrão, a ferramenta utiliza OpenAI tanto para embeddings quanto para sumarização. Para personalizar o modelo, você pode usar um dicionário de configuração como no exemplo abaixo:
|
||||
Por padrão, a ferramenta utiliza OpenAI para embeddings e sumarização. Para personalizar, use um dicionário de configuração conforme abaixo. Observação: um banco vetorial (vectordb) é necessário, pois os embeddings gerados precisam ser armazenados e consultados.
|
||||
|
||||
```python Code
|
||||
from crewai_tools import PDFSearchTool
|
||||
from chromadb.config import Settings # Persistência no Chroma
|
||||
|
||||
tool = PDFSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # ou google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# Obrigatório: provedor de embeddings + configuração
|
||||
"embedding_model": {
|
||||
# Provedores suportados: "openai", "azure", "google-generativeai", "google-vertex",
|
||||
# "voyageai", "cohere", "huggingface", "jina", "sentence-transformer",
|
||||
# "text2vec", "ollama", "openclip", "instructor", "onnx", "roboflow", "watsonx", "custom"
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
# "model" é mapeado internamente para "model_name".
|
||||
"model": "text-embedding-3-small",
|
||||
# Opcional: chave da API (se ausente, usa variáveis de ambiente do provedor)
|
||||
# "api_key": "sk-...",
|
||||
|
||||
# Exemplos específicos por provedor
|
||||
# --- Google ---
|
||||
# (defina provider="google-generativeai")
|
||||
# "model": "models/embedding-001",
|
||||
# "task_type": "retrieval_document",
|
||||
|
||||
# --- Cohere ---
|
||||
# (defina provider="cohere")
|
||||
# "model": "embed-english-v3.0",
|
||||
|
||||
# --- Ollama (local) ---
|
||||
# (defina provider="ollama")
|
||||
# "model": "nomic-embed-text",
|
||||
},
|
||||
},
|
||||
|
||||
# Obrigatório: configuração do banco vetorial
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# Exemplo Chroma:
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Exemplo Qdrant:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Observação: o nome da coleção é controlado pela ferramenta (padrão: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -57,25 +57,39 @@ Por padrão, a ferramenta utiliza o OpenAI tanto para embeddings quanto para sum
|
||||
Para personalizar o modelo, você pode usar um dicionário de configuração como o exemplo a seguir:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = TXTSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # ou google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# Obrigatório: provedor de embeddings + configuração
|
||||
"embedding_model": {
|
||||
"provider": "openai", # ou google-generativeai, cohere, ollama, ...
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...", # opcional se variável de ambiente estiver definida
|
||||
# Exemplos por provedor:
|
||||
# Google → model: "models/embedding-001", task_type: "retrieval_document"
|
||||
},
|
||||
},
|
||||
|
||||
# Obrigatório: configuração do banco vetorial
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# Configurações do Chroma (persistência opcional)
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Exemplo de parâmetros de vetor do Qdrant:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Observação: o nome da coleção é controlado pela ferramenta (padrão: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -54,25 +54,25 @@ Este parâmetro é opcional durante a inicialização da ferramenta, mas deve se
|
||||
Por padrão, a ferramenta utiliza a OpenAI tanto para embeddings quanto para sumarização. Para personalizar o modelo, você pode usar um dicionário de configuração conforme o exemplo a seguir:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = XMLSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # ou google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -11,7 +11,7 @@ mode: "wide"
|
||||
<Card
|
||||
title="Bedrock Invoke Agent Tool"
|
||||
icon="cloud"
|
||||
href="/en/tools/tool-integrations/bedrockinvokeagenttool"
|
||||
href="/pt-BR/tools/integration/bedrockinvokeagenttool"
|
||||
color="#0891B2"
|
||||
>
|
||||
Invoke Amazon Bedrock Agents from CrewAI to orchestrate actions across AWS services.
|
||||
@@ -20,7 +20,7 @@ mode: "wide"
|
||||
<Card
|
||||
title="CrewAI Automation Tool"
|
||||
icon="bolt"
|
||||
href="/en/tools/tool-integrations/crewaiautomationtool"
|
||||
href="/pt-BR/tools/integration/crewaiautomationtool"
|
||||
color="#7C3AED"
|
||||
>
|
||||
Automate deployment and operations by integrating CrewAI with external platforms and workflows.
|
||||
|
||||
@@ -12,7 +12,7 @@ dependencies = [
|
||||
"pytube>=15.0.0",
|
||||
"requests>=2.32.5",
|
||||
"docker>=7.1.0",
|
||||
"crewai==1.2.1",
|
||||
"crewai==1.5.0",
|
||||
"lancedb>=0.5.4",
|
||||
"tiktoken>=0.8.0",
|
||||
"beautifulsoup4>=4.13.4",
|
||||
|
||||
@@ -287,4 +287,4 @@ __all__ = [
|
||||
"ZapierActionTools",
|
||||
]
|
||||
|
||||
__version__ = "1.2.1"
|
||||
__version__ = "1.5.0"
|
||||
|
||||
@@ -229,6 +229,7 @@ class CrewAIRagAdapter(Adapter):
|
||||
continue
|
||||
else:
|
||||
metadata: dict[str, Any] = base_metadata.copy()
|
||||
source_content = SourceContent(source_ref)
|
||||
|
||||
if data_type in [
|
||||
DataType.PDF_FILE,
|
||||
@@ -239,13 +240,12 @@ class CrewAIRagAdapter(Adapter):
|
||||
DataType.XML,
|
||||
DataType.MDX,
|
||||
]:
|
||||
if not os.path.isfile(source_ref):
|
||||
if not source_content.is_url() and not source_content.path_exists():
|
||||
raise FileNotFoundError(f"File does not exist: {source_ref}")
|
||||
|
||||
loader = data_type.get_loader()
|
||||
chunker = data_type.get_chunker()
|
||||
|
||||
source_content = SourceContent(source_ref)
|
||||
loader_result: LoaderResult = loader.load(source_content)
|
||||
|
||||
chunks = chunker.chunk(loader_result.content)
|
||||
|
||||
@@ -22,22 +22,23 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
"""Tool for crawling websites using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
"""Tool for crawling websites using Firecrawl v2 API. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
config (dict): Optional. It contains Firecrawl v2 API parameters.
|
||||
|
||||
Default configuration options:
|
||||
max_depth (int): Maximum depth to crawl. Default: 2
|
||||
Default configuration options (Firecrawl v2 API):
|
||||
max_discovery_depth (int): Maximum depth for discovering pages. Default: 2
|
||||
ignore_sitemap (bool): Whether to ignore sitemap. Default: True
|
||||
limit (int): Maximum number of pages to crawl. Default: 100
|
||||
allow_backward_links (bool): Allow crawling backward links. Default: False
|
||||
limit (int): Maximum number of pages to crawl. Default: 10
|
||||
allow_external_links (bool): Allow crawling external links. Default: False
|
||||
scrape_options (ScrapeOptions): Options for scraping content
|
||||
- formats (list[str]): Content formats to return. Default: ["markdown", "screenshot", "links"]
|
||||
allow_subdomains (bool): Allow crawling subdomains. Default: False
|
||||
delay (int): Delay between requests in milliseconds. Default: None
|
||||
scrape_options (dict): Options for scraping content
|
||||
- formats (list[str]): Content formats to return. Default: ["markdown"]
|
||||
- only_main_content (bool): Only return main content. Default: True
|
||||
- timeout (int): Timeout in milliseconds. Default: 30000
|
||||
- timeout (int): Timeout in milliseconds. Default: 10000
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -49,14 +50,15 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
api_key: str | None = None
|
||||
config: dict[str, Any] | None = Field(
|
||||
default_factory=lambda: {
|
||||
"maxDepth": 2,
|
||||
"ignoreSitemap": True,
|
||||
"max_discovery_depth": 2,
|
||||
"ignore_sitemap": True,
|
||||
"limit": 10,
|
||||
"allowBackwardLinks": False,
|
||||
"allowExternalLinks": False,
|
||||
"scrapeOptions": {
|
||||
"formats": ["markdown", "screenshot", "links"],
|
||||
"onlyMainContent": True,
|
||||
"allow_external_links": False,
|
||||
"allow_subdomains": False,
|
||||
"delay": None,
|
||||
"scrape_options": {
|
||||
"formats": ["markdown"],
|
||||
"only_main_content": True,
|
||||
"timeout": 10000,
|
||||
},
|
||||
}
|
||||
@@ -107,7 +109,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
return self._firecrawl.crawl_url(url, poll_interval=2, params=self.config)
|
||||
return self._firecrawl.crawl(url=url, poll_interval=2, **self.config)
|
||||
|
||||
|
||||
try:
|
||||
|
||||
@@ -22,20 +22,27 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||
|
||||
|
||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
"""Tool for scraping webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
"""Tool for scraping webpages using Firecrawl v2 API. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
config (dict): Optional. It contains Firecrawl v2 API parameters.
|
||||
|
||||
Default configuration options:
|
||||
Default configuration options (Firecrawl v2 API):
|
||||
formats (list[str]): Content formats to return. Default: ["markdown"]
|
||||
onlyMainContent (bool): Only return main content. Default: True
|
||||
includeTags (list[str]): Tags to include. Default: []
|
||||
excludeTags (list[str]): Tags to exclude. Default: []
|
||||
headers (dict): Headers to include. Default: {}
|
||||
waitFor (int): Time to wait for page to load in ms. Default: 0
|
||||
json_options (dict): Options for JSON extraction. Default: None
|
||||
only_main_content (bool): Only return main content excluding headers, navs, footers, etc. Default: True
|
||||
include_tags (list[str]): Tags to include in the output. Default: []
|
||||
exclude_tags (list[str]): Tags to exclude from the output. Default: []
|
||||
max_age (int): Returns cached version if younger than this age in milliseconds. Default: 172800000 (2 days)
|
||||
headers (dict): Headers to send with the request (e.g., cookies, user-agent). Default: {}
|
||||
wait_for (int): Delay in milliseconds before fetching content. Default: 0
|
||||
mobile (bool): Emulate scraping from a mobile device. Default: False
|
||||
skip_tls_verification (bool): Skip TLS certificate verification. Default: True
|
||||
timeout (int): Request timeout in milliseconds. Default: None
|
||||
remove_base64_images (bool): Remove base64 images from output. Default: True
|
||||
block_ads (bool): Enable ad-blocking and cookie popup blocking. Default: True
|
||||
proxy (str): Proxy type ("basic", "stealth", "auto"). Default: "auto"
|
||||
store_in_cache (bool): Store page in Firecrawl index and cache. Default: True
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -48,11 +55,18 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
config: dict[str, Any] = Field(
|
||||
default_factory=lambda: {
|
||||
"formats": ["markdown"],
|
||||
"onlyMainContent": True,
|
||||
"includeTags": [],
|
||||
"excludeTags": [],
|
||||
"only_main_content": True,
|
||||
"include_tags": [],
|
||||
"exclude_tags": [],
|
||||
"max_age": 172800000, # 2 days cache
|
||||
"headers": {},
|
||||
"waitFor": 0,
|
||||
"wait_for": 0,
|
||||
"mobile": False,
|
||||
"skip_tls_verification": True,
|
||||
"remove_base64_images": True,
|
||||
"block_ads": True,
|
||||
"proxy": "auto",
|
||||
"store_in_cache": True,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -95,7 +109,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
return self._firecrawl.scrape_url(url, params=self.config)
|
||||
return self._firecrawl.scrape(url=url, **self.config)
|
||||
|
||||
|
||||
try:
|
||||
|
||||
@@ -23,19 +23,24 @@ class FirecrawlSearchToolSchema(BaseModel):
|
||||
|
||||
|
||||
class FirecrawlSearchTool(BaseTool):
|
||||
"""Tool for searching webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
"""Tool for searching webpages using Firecrawl v2 API. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
config (dict): Optional. It contains Firecrawl v2 API parameters.
|
||||
|
||||
Default configuration options:
|
||||
limit (int): Maximum number of pages to crawl. Default: 5
|
||||
tbs (str): Time before search. Default: None
|
||||
lang (str): Language. Default: "en"
|
||||
country (str): Country. Default: "us"
|
||||
location (str): Location. Default: None
|
||||
timeout (int): Timeout in milliseconds. Default: 60000
|
||||
Default configuration options (Firecrawl v2 API):
|
||||
limit (int): Maximum number of search results to return. Default: 5
|
||||
tbs (str): Time-based search filter (e.g., "qdr:d" for past day). Default: None
|
||||
location (str): Location for search results. Default: None
|
||||
timeout (int): Request timeout in milliseconds. Default: None
|
||||
scrape_options (dict): Options for scraping the search results. Default: {"formats": ["markdown"]}
|
||||
- formats (list[str]): Content formats to return. Default: ["markdown"]
|
||||
- only_main_content (bool): Only return main content. Default: True
|
||||
- include_tags (list[str]): Tags to include. Default: []
|
||||
- exclude_tags (list[str]): Tags to exclude. Default: []
|
||||
- wait_for (int): Delay before fetching content in ms. Default: 0
|
||||
- timeout (int): Request timeout in milliseconds. Default: None
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -49,10 +54,15 @@ class FirecrawlSearchTool(BaseTool):
|
||||
default_factory=lambda: {
|
||||
"limit": 5,
|
||||
"tbs": None,
|
||||
"lang": "en",
|
||||
"country": "us",
|
||||
"location": None,
|
||||
"timeout": 60000,
|
||||
"timeout": None,
|
||||
"scrape_options": {
|
||||
"formats": ["markdown"],
|
||||
"only_main_content": True,
|
||||
"include_tags": [],
|
||||
"exclude_tags": [],
|
||||
"wait_for": 0,
|
||||
},
|
||||
}
|
||||
)
|
||||
_firecrawl: FirecrawlApp | None = PrivateAttr(None)
|
||||
@@ -106,7 +116,7 @@ class FirecrawlSearchTool(BaseTool):
|
||||
|
||||
return self._firecrawl.search(
|
||||
query=query,
|
||||
params=self.config,
|
||||
**self.config,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
@@ -12,9 +12,17 @@ from pydantic.types import ImportString
|
||||
|
||||
|
||||
class QdrantToolSchema(BaseModel):
|
||||
query: str = Field(..., description="Query to search in Qdrant DB.")
|
||||
filter_by: str | None = None
|
||||
filter_value: str | None = None
|
||||
query: str = Field(
|
||||
..., description="Query to search in Qdrant DB - always required."
|
||||
)
|
||||
filter_by: str | None = Field(
|
||||
default=None,
|
||||
description="Parameter to filter the search by. When filtering, needs to be used in conjunction with filter_value.",
|
||||
)
|
||||
filter_value: Any | None = Field(
|
||||
default=None,
|
||||
description="Value to filter the search by. When filtering, needs to be used in conjunction with filter_by.",
|
||||
)
|
||||
|
||||
|
||||
class QdrantConfig(BaseModel):
|
||||
@@ -25,7 +33,9 @@ class QdrantConfig(BaseModel):
|
||||
collection_name: str
|
||||
limit: int = 3
|
||||
score_threshold: float = 0.35
|
||||
filter_conditions: list[tuple[str, Any]] = Field(default_factory=list)
|
||||
filter: Any | None = Field(
|
||||
default=None, description="Qdrant Filter instance for advanced filtering."
|
||||
)
|
||||
|
||||
|
||||
class QdrantVectorSearchTool(BaseTool):
|
||||
@@ -76,23 +86,26 @@ class QdrantVectorSearchTool(BaseTool):
|
||||
filter_value: Any | None = None,
|
||||
) -> str:
|
||||
"""Perform vector similarity search."""
|
||||
filter_ = self.qdrant_package.http.models.Filter
|
||||
field_condition = self.qdrant_package.http.models.FieldCondition
|
||||
match_value = self.qdrant_package.http.models.MatchValue
|
||||
conditions = self.qdrant_config.filter_conditions.copy()
|
||||
if filter_by and filter_value is not None:
|
||||
conditions.append((filter_by, filter_value))
|
||||
|
||||
search_filter = (
|
||||
filter_(
|
||||
must=[
|
||||
field_condition(key=k, match=match_value(value=v))
|
||||
for k, v in conditions
|
||||
]
|
||||
)
|
||||
if conditions
|
||||
else None
|
||||
self.qdrant_config.filter.model_copy()
|
||||
if self.qdrant_config.filter is not None
|
||||
else self.qdrant_package.http.models.Filter(must=[])
|
||||
)
|
||||
if filter_by and filter_value is not None:
|
||||
if not hasattr(search_filter, "must") or not isinstance(
|
||||
search_filter.must, list
|
||||
):
|
||||
search_filter.must = []
|
||||
search_filter.must.append(
|
||||
self.qdrant_package.http.models.FieldCondition(
|
||||
key=filter_by,
|
||||
match=self.qdrant_package.http.models.MatchValue(
|
||||
value=filter_value
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
query_vector = (
|
||||
self.custom_embedding_fn(query)
|
||||
if self.custom_embedding_fn
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,289 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"url": "https://firecrawl.dev", "includeTags": [], "excludeTags": [],
|
||||
"onlyMainContent": true, "waitFor": 0, "skipTlsVerification": true, "removeBase64Images":
|
||||
true, "fastMode": false, "blockAds": true, "storeInCache": true, "maxAge": 172800000,
|
||||
"formats": ["markdown"], "headers": {}, "mobile": false, "proxy": "auto", "origin":
|
||||
"python-sdk@4.5.0"}'
|
||||
headers:
|
||||
Accept:
|
||||
- '*/*'
|
||||
Accept-Encoding:
|
||||
- gzip, deflate, zstd
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Length:
|
||||
- '350'
|
||||
Content-Type:
|
||||
- application/json
|
||||
User-Agent:
|
||||
- python-requests/2.32.5
|
||||
method: POST
|
||||
uri: https://api.firecrawl.dev/v2/scrape
|
||||
response:
|
||||
body:
|
||||
string: "{\"success\":true,\"data\":{\"markdown\":\"We just raised our Series
|
||||
A and shipped Firecrawl /v2 \U0001F389. [Read the blog.](https://www.firecrawl.dev/blog/firecrawl-v2-series-a-announcement)\\n\\n[2
|
||||
Months Free \u2014 Annually](https://www.firecrawl.dev/pricing)\\n\\n# Turn
|
||||
websites into LLM-ready data\\n\\nPower your AI apps with clean web data\\n\\nfrom
|
||||
any website. [It's also open source.](https://github.com/firecrawl/firecrawl)\\n\\nScrape\\n\\nSearch\\nNew\\n\\nMap\\n\\nCrawl\\n\\nScrape\\n\\nLogo\\n\\nNavigation\\n\\nButton\\n\\nH1
|
||||
Title\\n\\nDescription\\n\\nCTA Button\\n\\n\\\\[ .JSON \\\\]\\n\\n```json\\n1[\\\\\\n2
|
||||
\ {\\\\\\n3 \\\"url\\\": \\\"https://example.com\\\",\\\\\\n4 \\\"markdown\\\":
|
||||
\\\"# Getting Started...\\\",\\\\\\n5 \\\"json\\\": { \\\"title\\\": \\\"Guide\\\",
|
||||
\\\"docs\\\": \\\"...\\\" },\\\\\\n6 \\\"screenshot\\\": \\\"https://example.com/hero.png\\\"\\\\\\n7
|
||||
\ }\\\\\\n8]\\n```\\n\\nScrape Completed\\n\\nTrusted by5000+\\n\\ncompaniesof
|
||||
all sizes\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\\\[01/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nMain Features\\n\\n//\\n\\nDeveloper First\\n\\n//\\n\\n##
|
||||
Startscraping today\\n\\nEnhance your apps with industry leading web scraping
|
||||
and crawling capabilities.\\n\\nScrape\\n\\nGet llm-ready data from websites.
|
||||
Markdown, JSON, screenshot, etc.\\n\\nSearch\\n\\nNew\\n\\nSearch the web
|
||||
and get full content from results.\\n\\nCrawl\\n\\nCrawl all the pages on
|
||||
a website and get data for each page.\\n\\nPython\\n\\nNode.js\\n\\nCurl\\n\\nCopy
|
||||
code\\n\\n```python\\n1# pip install firecrawl-py\\n2from firecrawl import
|
||||
Firecrawl\\n3\\n4app = Firecrawl(api_key=\\\"fc-YOUR_API_KEY\\\")\\n5\\n6#
|
||||
Scrape a website:\\n7app.scrape('firecrawl.dev')\\n8\\n9\\n10\\n```\\n\\n\\\\[
|
||||
.MD \\\\]\\n\\n```markdown\\n1# Firecrawl\\n2\\n3Firecrawl is a powerful web
|
||||
scraping\\n4library that makes it easy to extract\\n5data from websites.\\n6\\n7##
|
||||
Installation\\n8\\n9To install Firecrawl, run:\\n10\\n11\\n```\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIntegrations\\n\\n###
|
||||
Use well-known tools\\n\\nAlready fully integrated with the greatest existing
|
||||
tools and workflows.\\n\\n[See all integrations](https://www.firecrawl.dev/app)\\n\\n\\n\\nmendableai/firecrawl\\n\\nPublic\\n\\nStar\\n\\n65.3K\\n\\n\\\\[python-SDK\\\\]
|
||||
improvs/async\\n\\n#1337\\n\\n\xB7\\n\\nApr 18, 2025\\n\\n\xB7\\n\\n\\n\\nrafaelsideguide\\n\\nfeat(extract):
|
||||
cost limit\\n\\n#1473\\n\\n\xB7\\n\\nApr 17, 2025\\n\\n\xB7\\n\\n\\n\\nmogery\\n\\nfeat(scrape):
|
||||
get job result from GCS, avoid Redis\\n\\n#1461\\n\\n\xB7\\n\\nApr 15, 2025\\n\\n\xB7\\n\\n\\n\\nmogery\\n\\nExtract
|
||||
v2/rerank improvs\\n\\n#1437\\n\\n\xB7\\n\\nApr 11, 2025\\n\\n\xB7\\n\\n\\n\\nrafaelsideguide\\n\\n\\n\\n\\n\\n+90\\n\\nOpen
|
||||
Source\\n\\n### Code you can trust\\n\\nDeveloped transparently and collaboratively.
|
||||
Join our community of contributors.\\n\\n[Check out our repo](https://github.com/firecrawl/firecrawl)\\n\\n\\\\[02/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nCore\\n\\n//\\n\\nBuilt to outperform\\n\\n//\\n\\n##
|
||||
Core principles, provenperformance\\n\\nBuilt from the ground up to outperform
|
||||
traditional scrapers.\\n\\nNo proxy headaches\\n\\nReliable.Covers 96% of
|
||||
the web,\\n\\nincluding JS-heavy and protected pages. No proxies, no puppets,
|
||||
just clean data.\\n\\nFirecrawl\\n\\n96%\\n\\n\\n\\nPuppeteer\\n\\n79%\\n\\ncURL\\n\\n75%\\n\\nSpeed
|
||||
that feels invisible\\n\\nBlazingly fast.Delivers results in less than 1 second,
|
||||
fast for real-time agents\\n\\nand dynamic apps.\\n\\nURL\\n\\nCrawl\\n\\nScrape\\n\\nfirecrawl.dev/docs\\n\\n50ms\\n\\n51ms\\n\\nfirecrawl.dev/templates\\n\\n52ms\\n\\n50ms\\n\\nfirecrawl.dev/changelog\\n\\n49ms\\n\\n52ms\\n\\nfirecrawl.dev/about\\n\\n52ms\\n\\n50ms\\n\\nfirecrawl.dev/changelog\\n\\n50ms\\n\\n52ms\\n\\nfirecrawl.dev/playground\\n\\n51ms\\n\\n49ms\\n\\n\\\\[
|
||||
CTA \\\\]\\n\\n\\\\[ CRAWL \\\\]\\n\\n\\\\[ SCRAPE \\\\]\\n\\n\\\\[ CTA \\\\]\\n\\n//\\n\\nGet
|
||||
started\\n\\n//\\n\\nReady to build?\\n\\nStart getting Web Data for free
|
||||
and scale seamlessly as your project expands. No credit card needed.\\n\\n[Start
|
||||
for free](https://www.firecrawl.dev/signin) [See our plans](https://www.firecrawl.dev/pricing)\\n\\n\\\\[03/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nFeatures\\n\\n//\\n\\nZero configuration\\n\\n//\\n\\n##
|
||||
We handle the hard stuff\\n\\nRotating proxies, orchestration, rate limits,
|
||||
js-blocked content and more.\\n\\nDocs to data\\n\\nMedia parsing.Firecrawl
|
||||
can parse and output content from web hosted pdfs, docx, and more.\\n\\nhttps://example.com/docs/report.pdf\\n\\nhttps://example.com/files/brief.docx\\n\\nhttps://example.com/docs/guide.html\\n\\ndocx\\n\\nParsing...\\n\\nKnows
|
||||
the moment\\n\\nSmart wait.Firecrawl intelligently waits for content to load,
|
||||
making scraping faster and more reliable.\\n\\nhttps://example-spa.com\\n\\nRequest
|
||||
Sent\\n\\nScrapes the real thing\\n\\nCached, when you need it.Selective caching,
|
||||
you choose your caching patterns, growing web index.\\n\\n\\n\\nUser\\n\\nFirecrawl\\n\\nCache\\n\\nInvisible
|
||||
access\\n\\nStealth mode.Crawls the web without\\n\\nbeing blocked, mimics
|
||||
real users to access protected or dynamic content.\\n\\nInteractive scraping\\n\\nActions.Click,
|
||||
scroll, write, wait, press and more before extracting content.\\n\\nhttps://example.com\\n\\nNavigate\\n\\nClick\\n\\nType\\n\\nWait\\n\\nScroll\\n\\nPress\\n\\nScreenshot\\n\\nScrape\\n\\n\\\\[04/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nPricing\\n\\n//\\n\\nTransparent\\n\\n//\\n\\n## Flexible
|
||||
pricing\\n\\nExplore transparent pricing built for real-world scraping. Start
|
||||
for free, then scale as you grow.\\n\\n\U0001F1FA\U0001F1F8USD\\n\\nFree Plan\\n\\nA
|
||||
lightweight way to try scraping.\\n\\nNo cost, no card, no hassle.\\n\\n500
|
||||
credits\\n\\n$0123456789\\n\\none-time\\n\\nGet started\\n\\nScrape 500 pages\\n\\n2
|
||||
concurrent requests\\n\\nLow rate limits\\n\\nHobby\\n\\nGreat for side projects
|
||||
and small tools.\\n\\nFast, simple, no overkill.\\n\\n3,000 credits\\n\\n$01234567890123456789\\n\\n/monthly\\n\\nBilled
|
||||
yearly\\n\\n2 months free\\n\\nSubscribe\\n\\nScrape 3,000 pages\\n\\n5 concurrent
|
||||
requests\\n\\nBasic support\\n\\n$9 per extra 1k credits\\n\\nStandard\\n\\nMost
|
||||
popular\\n\\nPerfect for scaling with less effort.\\n\\nSimple, solid, dependable.\\n\\n100,000
|
||||
credits\\n\\n$01234567890123456789\\n\\n/monthly\\n\\nBilled yearly\\n\\n2
|
||||
months free\\n\\nSubscribe\\n\\nScrape 100,000 pages\\n\\n50 concurrent requests\\n\\nStandard
|
||||
support\\n\\n$47 per extra 35k credits\\n\\nGrowth\\n\\nBuilt for high volume
|
||||
and speed.\\n\\nFirecrawl at full force.\\n\\n500,000 credits\\n\\n$012345678901234567890123456789\\n\\n/monthly\\n\\nBilled
|
||||
yearly\\n\\n2 months free\\n\\nSubscribe\\n\\nScrape 500,000 pages\\n\\n100
|
||||
concurrent requests\\n\\nPriority support\\n\\n$177 per extra 175k credits\\n\\nExtra
|
||||
credits are available via auto-recharge packs. [Enable](https://www.firecrawl.dev/signin/signup)\\n\\nEnterprise\\n\\nPower
|
||||
at your pace\\n\\nUnlimited credits. Custom RPMs.\\n\\n[Contact sales](https://fk4bvu0n5qp.typeform.com/to/Ej6oydlg)
|
||||
[More details](https://www.firecrawl.dev/enterprise)\\n\\nBulk discounts\\n\\nTop
|
||||
priority support\\n\\nCustom concurrency limits\\n\\nImproved stealth proxies\\n\\nSLAs\\n\\nAdvanced
|
||||
security & controls\\n\\n\\\\[05/ 07 \\\\]\\n\\n\xB7\\n\\nTestimonials\\n\\n//\\n\\nCommunity\\n\\n//\\n\\n##
|
||||
People love building withFirecrawl\\n\\nDiscover why developers choose
|
||||
Firecrawl every day.\\n\\n[Morgan
|
||||
Linton@morganlinton\\\"If you're coding with AI, and haven't discovered @firecrawl\\\\_dev
|
||||
yet, prepare to have your mind blown \U0001F92F\\\"](https://x.com/morganlinton/status/1839454165703204955)
|
||||
[Chris
|
||||
DeWeese@chrisdeweese\\\\_\\\"Started using @firecrawl\\\\_dev for a project,
|
||||
I wish I used this sooner.\\\"](https://x.com/chrisdeweese_/status/1853587120406876601)
|
||||
[Alex
|
||||
Reibman@AlexReibman\\\"Moved our internal agent's web scraping tool from Apify
|
||||
to Firecrawl because it benchmarked 50x faster with AgentOps.\\\"](https://x.com/AlexReibman/status/1780299595484131836)
|
||||
[Tom
|
||||
- Morpho@TomReppelin\\\"I found gold today. Thank you @firecrawl\\\\_dev\\\"](https://x.com/TomReppelin/status/1844382491014201613)\\n\\n[Morgan
|
||||
Linton@morganlinton\\\"If you're coding with AI, and haven't discovered @firecrawl\\\\_dev
|
||||
yet, prepare to have your mind blown \U0001F92F\\\"](https://x.com/morganlinton/status/1839454165703204955)
|
||||
[Chris
|
||||
DeWeese@chrisdeweese\\\\_\\\"Started using @firecrawl\\\\_dev for a project,
|
||||
I wish I used this sooner.\\\"](https://x.com/chrisdeweese_/status/1853587120406876601)
|
||||
[Alex
|
||||
Reibman@AlexReibman\\\"Moved our internal agent's web scraping tool from Apify
|
||||
to Firecrawl because it benchmarked 50x faster with AgentOps.\\\"](https://x.com/AlexReibman/status/1780299595484131836)
|
||||
[Tom
|
||||
- Morpho@TomReppelin\\\"I found gold today. Thank you @firecrawl\\\\_dev\\\"](https://x.com/TomReppelin/status/1844382491014201613)\\n\\n[Bardia@thepericulum\\\"The
|
||||
Firecrawl team ships. I wanted types for their node SDK, and less than an
|
||||
hour later, I got them.\\\"](https://x.com/thepericulum/status/1781397799487078874)
|
||||
[Matt
|
||||
Busigin@mbusigin\\\"Firecrawl is dope. Congrats guys \U0001F44F\\\"](https://x.com/mbusigin/status/1836065372010656069)
|
||||
[Sumanth@Sumanth\\\\_077\\\"Web
|
||||
scraping will never be the same!\\\\\\\\\\n\\\\\\\\\\nFirecrawl is an open-source
|
||||
framework that takes a URL, crawls it, and conver...\\\"](https://x.com/Sumanth_077/status/1940049003074478511)
|
||||
[Steven
|
||||
Tey@steventey\\\"Open-source Clay alternative just dropped\\\\\\\\\\n\\\\\\\\\\nUpload
|
||||
a CSV of emails and...\\\"](https://x.com/steventey/status/1932945651761098889)\\n\\n[Bardia@thepericulum\\\"The
|
||||
Firecrawl team ships. I wanted types for their node SDK, and less than an
|
||||
hour later, I got them.\\\"](https://x.com/thepericulum/status/1781397799487078874)
|
||||
[Matt
|
||||
Busigin@mbusigin\\\"Firecrawl is dope. Congrats guys \U0001F44F\\\"](https://x.com/mbusigin/status/1836065372010656069)
|
||||
[Sumanth@Sumanth\\\\_077\\\"Web
|
||||
scraping will never be the same!\\\\\\\\\\n\\\\\\\\\\nFirecrawl is an open-source
|
||||
framework that takes a URL, crawls it, and conver...\\\"](https://x.com/Sumanth_077/status/1940049003074478511)
|
||||
[Steven
|
||||
Tey@steventey\\\"Open-source Clay alternative just dropped\\\\\\\\\\n\\\\\\\\\\nUpload
|
||||
a CSV of emails and...\\\"](https://x.com/steventey/status/1932945651761098889)\\n\\n\\\\[06/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nUse Cases\\n\\n//\\n\\nUse cases\\n\\n//\\n\\n## Transform
|
||||
\ web data into AI-powered solutions\\n\\nDiscover how Firecrawl customers
|
||||
are getting the most out of our API.\\n\\n[View all use cases](https://docs.firecrawl.dev/use-cases/overview)\\n\\nChat
|
||||
with context\\n\\nSmarter AI chats\\n\\nPower your AI assistants with real-time,
|
||||
accurate web content.\\n\\n[View docs](https://docs.firecrawl.dev/introduction)\\n\\n\\n\\nAI Assistant\\n\\nwithFirecrawl\\n\\nReal-time\xB7Updated
|
||||
2 min ago\\n\\nAsk anything...\\n\\nKnow your leads\\n\\nLead enrichment\\n\\nEnhance
|
||||
your sales data with\\n\\nweb information.\\n\\n[Check out Extract](https://www.firecrawl.dev/extract)\\n\\nExtracting
|
||||
leads from directory...\\n\\nTech startups\\n\\nWith contact info\\n\\nDecision
|
||||
makers\\n\\nFunding stage\\n\\nReady to engage\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nKnow your leads\\n\\nMCPs\\n\\nAdd
|
||||
powerful scraping to your\\n\\ncode editors.\\n\\n[Get started](https://docs.firecrawl.dev/mcp-server)\\n\\n\\n\\nClaude Code\\n\\n\\n\\nCursor\\n\\n\\n\\nWindsurf\\n\\n\u273B\\n\\nWelcome
|
||||
to Claude Code!\\n\\n/help for help, /status for your current setup\\n\\n>Try
|
||||
\\\"how do I log an error?\\\"\\n\\nBuild with context\\n\\nAI platforms\\n\\nLet
|
||||
your customers build AI apps\\n\\nwith web data.\\n\\n[Check out Map](https://docs.firecrawl.dev/features/map)\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nExtracting
|
||||
text...\\n\\nNo insight missed\\n\\nDeep research\\n\\nExtract comprehensive
|
||||
information for\\n\\nin-depth research.\\n\\n[Build your own with Search](https://docs.firecrawl.dev/features/search)\\n\\nDeep
|
||||
research in progress...\\n\\nAcademic papers\\n\\n0 found\\n\\nNews articles\\n\\n0
|
||||
found\\n\\nExpert opinions\\n\\n0 found\\n\\nResearch reports\\n\\n0 found\\n\\nIndustry
|
||||
data\\n\\n0 found\\n\\nAsk anything...\\n\\n\\\\[ CTA \\\\]\\n\\n\\\\[ CRAWL
|
||||
\\\\]\\n\\n\\\\[ SCRAPE \\\\]\\n\\n\\\\[ CTA \\\\]\\n\\n//\\n\\nGet started\\n\\n//\\n\\nReady
|
||||
to build?\\n\\nStart getting Web Data for free and scale seamlessly as your
|
||||
project expands. No credit card needed.\\n\\n[Start for free](https://www.firecrawl.dev/signin)
|
||||
[See our plans](https://www.firecrawl.dev/pricing)\\n\\n\\\\[07/ 07 \\\\]\\n\\n\xB7\\n\\nFAQ\\n\\n//\\n\\nFAQ\\n\\n//\\n\\n##
|
||||
Frequently askedquestions\\n\\nEverything you need to know about Firecrawl.\\n\\nGeneral\\n\\nWhat
|
||||
is Firecrawl?\\n\\nWhat sites work?\\n\\nWho can benefit from using Firecrawl?\\n\\nIs
|
||||
Firecrawl open-source?\\n\\nWhat is the difference between Firecrawl and other
|
||||
web scrapers?\\n\\nWhat is the difference between the open-source version
|
||||
and the hosted version?\\n\\nScraping & Crawling\\n\\nHow does Firecrawl handle
|
||||
dynamic content on websites?\\n\\nWhy is it not crawling all the pages?\\n\\nCan
|
||||
Firecrawl crawl websites without a sitemap?\\n\\nWhat formats can Firecrawl
|
||||
convert web data into?\\n\\nHow does Firecrawl ensure the cleanliness of the
|
||||
data?\\n\\nIs Firecrawl suitable for large-scale data scraping projects?\\n\\nDoes
|
||||
it respect robots.txt?\\n\\nWhat measures does Firecrawl take to handle web
|
||||
scraping challenges like rate limits and caching?\\n\\nDoes Firecrawl handle
|
||||
captcha or authentication?\\n\\nAPI Related\\n\\nWhere can I find my API key?\\n\\nBilling\\n\\nIs
|
||||
Firecrawl free?\\n\\nIs there a pay-per-use plan instead of monthly?\\n\\nDo
|
||||
credits roll over to the next month?\\n\\nHow many credits do scraping and
|
||||
crawling cost?\\n\\nDo you charge for failed requests?\\n\\nWhat payment methods
|
||||
do you accept?\\n\\nFOOTER\\n\\nThe easiest way to extract\\n\\ndata from
|
||||
the web\\n\\nBacked by\\n\\nY Combinator\\n\\n[Linkedin](https://www.linkedin.com/company/firecrawl)
|
||||
[Github](https://github.com/firecrawl/firecrawl)\\n\\nSOC II \xB7 Type 2\\n\\nAICPA\\n\\nSOC
|
||||
2\\n\\n[X (Twitter)](https://x.com/firecrawl_dev) [Discord](https://discord.gg/gSmWdAkdwd)\\n\\nProducts\\n\\n[Playground](https://www.firecrawl.dev/playground)
|
||||
[Extract](https://www.firecrawl.dev/extract) [Pricing](https://www.firecrawl.dev/pricing)
|
||||
[Templates](https://www.firecrawl.dev/templates) [Changelog](https://www.firecrawl.dev/changelog)\\n\\nUse
|
||||
Cases\\n\\n[AI Platforms](https://docs.firecrawl.dev/use-cases/ai-platforms)
|
||||
[Lead Enrichment](https://docs.firecrawl.dev/use-cases/lead-enrichment) [SEO
|
||||
Platforms](https://docs.firecrawl.dev/use-cases/seo-platforms) [Deep Research](https://docs.firecrawl.dev/use-cases/deep-research)\\n\\nDocumentation\\n\\n[Getting
|
||||
started](https://docs.firecrawl.dev/introduction) [API Reference](https://docs.firecrawl.dev/api-reference/introduction)
|
||||
[Integrations](https://www.firecrawl.dev/app) [Examples](https://docs.firecrawl.dev/use-cases/overview)
|
||||
[SDKs](https://docs.firecrawl.dev/sdks/overview)\\n\\nCompany\\n\\n[Blog](https://www.firecrawl.dev/blog)
|
||||
[Careers](https://www.firecrawl.dev/careers) [Creator & OSS program](https://www.firecrawl.dev/creator-oss-program)
|
||||
[Student program](https://www.firecrawl.dev/student-program)\\n\\n\xA9 2025
|
||||
Firecrawl\\n\\n[Terms of Service](https://www.firecrawl.dev/terms-of-service)
|
||||
[Privacy Policy](https://www.firecrawl.dev/privacy-policy) [Report Abuse](mailto:help@firecrawl.com?subject=Issue:)\\n\\n[All
|
||||
systems normal](https://status.firecrawl.dev/)\\n\\nStripeM-Inner\",\"metadata\":{\"twitter:title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"publisher\":\"Firecrawl\",\"ogUrl\":\"https://www.firecrawl.dev\",\"robots\":\"follow,
|
||||
index\",\"title\":\"Firecrawl - The Web Data API for AI\",\"ogDescription\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"ogImage\":\"https://www.firecrawl.dev/og.png\",\"viewport\":\"width=device-width,
|
||||
initial-scale=1, maximum-scale=1, user-scalable=no\",\"og:url\":\"https://www.firecrawl.dev\",\"og:site_name\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"og:type\":\"website\",\"twitter:image\":\"https://www.firecrawl.dev/og.png\",\"author\":\"Firecrawl\",\"og:title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"favicon\":\"https://www.firecrawl.dev/favicon.png\",\"description\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"referrer\":\"origin-when-cross-origin\",\"twitter:site\":\"@Vercel\",\"ogSiteName\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"og:image\":\"https://www.firecrawl.dev/og.png\",\"twitter:card\":\"summary_large_image\",\"twitter:creator\":\"@Vercel\",\"twitter:description\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"language\":\"en\",\"keywords\":\"Firecrawl,Markdown,Data,Mendable,Langchain\",\"creator\":\"Firecrawl\",\"ogTitle\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"og:description\":\"The web crawling, scraping,
|
||||
and search API for AI. Built for scale. Firecrawl delivers the entire internet
|
||||
to AI agents and builders. Clean, structured, and ready to reason with.\",\"scrapeId\":\"e78d8060-d581-4e5e-b25a-90cfdad48530\",\"sourceURL\":\"https://firecrawl.dev\",\"url\":\"https://www.firecrawl.dev/\",\"statusCode\":200,\"contentType\":\"text/html;
|
||||
charset=utf-8\",\"proxyUsed\":\"basic\",\"cacheState\":\"hit\",\"cachedAt\":\"2025-10-29T13:09:07.713Z\",\"creditsUsed\":1}}}"
|
||||
headers:
|
||||
Access-Control-Allow-Origin:
|
||||
- '*'
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
||||
Content-Length:
|
||||
- '24693'
|
||||
Content-Type:
|
||||
- application/json; charset=utf-8
|
||||
Date:
|
||||
- Wed, 29 Oct 2025 14:34:03 GMT
|
||||
ETag:
|
||||
- W/"6075-Q1W6uMv95JKEZARbtaiPYYMojlU"
|
||||
Via:
|
||||
- 1.1 google
|
||||
X-Powered-By:
|
||||
- Express
|
||||
X-Response-Time:
|
||||
- 4719.998ms
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,937 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"query": "firecrawl", "limit": 5, "scrapeOptions": {"includeTags": [],
|
||||
"excludeTags": [], "onlyMainContent": true, "waitFor": 0, "skipTlsVerification":
|
||||
true, "removeBase64Images": true, "fastMode": false, "blockAds": true, "storeInCache":
|
||||
true, "maxAge": 14400000, "formats": ["markdown"], "mobile": false}, "origin":
|
||||
"python-sdk@4.5.0"}'
|
||||
headers:
|
||||
Accept:
|
||||
- '*/*'
|
||||
Accept-Encoding:
|
||||
- gzip, deflate, zstd
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Length:
|
||||
- '338'
|
||||
Content-Type:
|
||||
- application/json
|
||||
User-Agent:
|
||||
- python-requests/2.32.5
|
||||
method: POST
|
||||
uri: https://api.firecrawl.dev/v2/search
|
||||
response:
|
||||
body:
|
||||
string: "{\"success\":true,\"data\":{\"web\":[{\"url\":\"https://www.firecrawl.dev/\",\"title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"description\":\"The web crawling, scraping,
|
||||
and search API for AI. Built for scale. Firecrawl delivers the entire internet
|
||||
to AI agents and builders.\",\"position\":1,\"markdown\":\"We just raised
|
||||
our Series A and shipped Firecrawl /v2 \U0001F389. [Read the blog.](https://www.firecrawl.dev/blog/firecrawl-v2-series-a-announcement)\\n\\n[2
|
||||
Months Free \u2014 Annually](https://www.firecrawl.dev/pricing)\\n\\n# Turn
|
||||
websites into LLM-ready data\\n\\nPower your AI apps with clean web data\\n\\nfrom
|
||||
any website. [It's also open source.](https://github.com/firecrawl/firecrawl)\\n\\nScrape\\n\\nSearch\\nNew\\n\\nMap\\n\\nCrawl\\n\\nScrape\\n\\nLogo\\n\\nNavigation\\n\\nButton\\n\\nH1
|
||||
Title\\n\\nDescription\\n\\nCTA Button\\n\\n\\\\[ .JSON \\\\]\\n\\n```json\\n1[\\\\\\n2
|
||||
\ {\\\\\\n3 \\\"url\\\": \\\"https://example.com\\\",\\\\\\n4 \\\"markdown\\\":
|
||||
\\\"# Getting Started...\\\",\\\\\\n5 \\\"json\\\": { \\\"title\\\": \\\"Guide\\\",
|
||||
\\\"docs\\\": \\\"...\\\" },\\\\\\n6 \\\"screenshot\\\": \\\"https://example.com/hero.png\\\"\\\\\\n7
|
||||
\ }\\\\\\n8]\\n```\\n\\nScrape Completed\\n\\nTrusted by5000+\\n\\ncompaniesof
|
||||
all sizes\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\\\[01/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nMain Features\\n\\n//\\n\\nDeveloper First\\n\\n//\\n\\n##
|
||||
Startscraping today\\n\\nEnhance your apps with industry leading web scraping
|
||||
and crawling capabilities.\\n\\nScrape\\n\\nGet llm-ready data from websites.
|
||||
Markdown, JSON, screenshot, etc.\\n\\nSearch\\n\\nNew\\n\\nSearch the web
|
||||
and get full content from results.\\n\\nCrawl\\n\\nCrawl all the pages on
|
||||
a website and get data for each page.\\n\\nPython\\n\\nNode.js\\n\\nCurl\\n\\nCopy
|
||||
code\\n\\n```python\\n1# pip install firecrawl-py\\n2from firecrawl import
|
||||
Firecrawl\\n3\\n4app = Firecrawl(api_key=\\\"fc-YOUR_API_KEY\\\")\\n5\\n6#
|
||||
Scrape a website:\\n7app.scrape('firecrawl.dev')\\n8\\n9\\n10\\n```\\n\\n\\\\[
|
||||
.MD \\\\]\\n\\n```markdown\\n1# Firecrawl\\n2\\n3Firecrawl is a powerful web
|
||||
scraping\\n4library that makes it easy to extract\\n5data from websites.\\n6\\n7##
|
||||
Installation\\n8\\n9To install Firecrawl, run:\\n10\\n11\\n```\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIntegrations\\n\\n###
|
||||
Use well-known tools\\n\\nAlready fully integrated with the greatest existing
|
||||
tools and workflows.\\n\\n[See all integrations](https://www.firecrawl.dev/app)\\n\\n\\n\\nmendableai/firecrawl\\n\\nPublic\\n\\nStar\\n\\n65.3K\\n\\n\\\\[python-SDK\\\\]
|
||||
improvs/async\\n\\n#1337\\n\\n\xB7\\n\\nApr 18, 2025\\n\\n\xB7\\n\\n\\n\\nrafaelsideguide\\n\\nfeat(extract):
|
||||
cost limit\\n\\n#1473\\n\\n\xB7\\n\\nApr 17, 2025\\n\\n\xB7\\n\\n\\n\\nmogery\\n\\nfeat(scrape):
|
||||
get job result from GCS, avoid Redis\\n\\n#1461\\n\\n\xB7\\n\\nApr 15, 2025\\n\\n\xB7\\n\\n\\n\\nmogery\\n\\nExtract
|
||||
v2/rerank improvs\\n\\n#1437\\n\\n\xB7\\n\\nApr 11, 2025\\n\\n\xB7\\n\\n\\n\\nrafaelsideguide\\n\\n\\n\\n\\n\\n+90\\n\\nOpen
|
||||
Source\\n\\n### Code you can trust\\n\\nDeveloped transparently and collaboratively.
|
||||
Join our community of contributors.\\n\\n[Check out our repo](https://github.com/firecrawl/firecrawl)\\n\\n\\\\[02/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nCore\\n\\n//\\n\\nBuilt to outperform\\n\\n//\\n\\n##
|
||||
Core principles, provenperformance\\n\\nBuilt from the ground up to outperform
|
||||
traditional scrapers.\\n\\nNo proxy headaches\\n\\nReliable.Covers 96% of
|
||||
the web,\\n\\nincluding JS-heavy and protected pages. No proxies, no puppets,
|
||||
just clean data.\\n\\nFirecrawl\\n\\n96%\\n\\n\\n\\nPuppeteer\\n\\n79%\\n\\ncURL\\n\\n75%\\n\\nSpeed
|
||||
that feels invisible\\n\\nBlazingly fast.Delivers results in less than 1 second,
|
||||
fast for real-time agents\\n\\nand dynamic apps.\\n\\nURL\\n\\nCrawl\\n\\nScrape\\n\\nfirecrawl.dev/docs\\n\\n50ms\\n\\n51ms\\n\\nfirecrawl.dev/templates\\n\\n52ms\\n\\n50ms\\n\\nfirecrawl.dev/changelog\\n\\n49ms\\n\\n52ms\\n\\nfirecrawl.dev/about\\n\\n52ms\\n\\n50ms\\n\\nfirecrawl.dev/changelog\\n\\n50ms\\n\\n52ms\\n\\nfirecrawl.dev/playground\\n\\n51ms\\n\\n49ms\\n\\n\\\\[
|
||||
CTA \\\\]\\n\\n\\\\[ CRAWL \\\\]\\n\\n\\\\[ SCRAPE \\\\]\\n\\n\\\\[ CTA \\\\]\\n\\n//\\n\\nGet
|
||||
started\\n\\n//\\n\\nReady to build?\\n\\nStart getting Web Data for free
|
||||
and scale seamlessly as your project expands. No credit card needed.\\n\\n[Start
|
||||
for free](https://www.firecrawl.dev/signin) [See our plans](https://www.firecrawl.dev/pricing)\\n\\n\\\\[03/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nFeatures\\n\\n//\\n\\nZero configuration\\n\\n//\\n\\n##
|
||||
We handle the hard stuff\\n\\nRotating proxies, orchestration, rate limits,
|
||||
js-blocked content and more.\\n\\nDocs to data\\n\\nMedia parsing.Firecrawl
|
||||
can parse and output content from web hosted pdfs, docx, and more.\\n\\nhttps://example.com/docs/report.pdf\\n\\nhttps://example.com/files/brief.docx\\n\\nhttps://example.com/docs/guide.html\\n\\ndocx\\n\\nParsing...\\n\\nKnows
|
||||
the moment\\n\\nSmart wait.Firecrawl intelligently waits for content to load,
|
||||
making scraping faster and more reliable.\\n\\nhttps://example-spa.com\\n\\nRequest
|
||||
Sent\\n\\nScrapes the real thing\\n\\nCached, when you need it.Selective caching,
|
||||
you choose your caching patterns, growing web index.\\n\\n\\n\\nUser\\n\\nFirecrawl\\n\\nCache\\n\\nInvisible
|
||||
access\\n\\nStealth mode.Crawls the web without\\n\\nbeing blocked, mimics
|
||||
real users to access protected or dynamic content.\\n\\nInteractive scraping\\n\\nActions.Click,
|
||||
scroll, write, wait, press and more before extracting content.\\n\\nhttps://example.com\\n\\nNavigate\\n\\nClick\\n\\nType\\n\\nWait\\n\\nScroll\\n\\nPress\\n\\nScreenshot\\n\\nScrape\\n\\n\\\\[04/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nPricing\\n\\n//\\n\\nTransparent\\n\\n//\\n\\n## Flexible
|
||||
pricing\\n\\nExplore transparent pricing built for real-world scraping. Start
|
||||
for free, then scale as you grow.\\n\\n\U0001F1FA\U0001F1F8USD\\n\\nFree Plan\\n\\nA
|
||||
lightweight way to try scraping.\\n\\nNo cost, no card, no hassle.\\n\\n500
|
||||
credits\\n\\n$0123456789\\n\\none-time\\n\\nGet started\\n\\nScrape 500 pages\\n\\n2
|
||||
concurrent requests\\n\\nLow rate limits\\n\\nHobby\\n\\nGreat for side projects
|
||||
and small tools.\\n\\nFast, simple, no overkill.\\n\\n3,000 credits\\n\\n$01234567890123456789\\n\\n/monthly\\n\\nBilled
|
||||
yearly\\n\\n2 months free\\n\\nSubscribe\\n\\nScrape 3,000 pages\\n\\n5 concurrent
|
||||
requests\\n\\nBasic support\\n\\n$9 per extra 1k credits\\n\\nStandard\\n\\nMost
|
||||
popular\\n\\nPerfect for scaling with less effort.\\n\\nSimple, solid, dependable.\\n\\n100,000
|
||||
credits\\n\\n$01234567890123456789\\n\\n/monthly\\n\\nBilled yearly\\n\\n2
|
||||
months free\\n\\nSubscribe\\n\\nScrape 100,000 pages\\n\\n50 concurrent requests\\n\\nStandard
|
||||
support\\n\\n$47 per extra 35k credits\\n\\nGrowth\\n\\nBuilt for high volume
|
||||
and speed.\\n\\nFirecrawl at full force.\\n\\n500,000 credits\\n\\n$012345678901234567890123456789\\n\\n/monthly\\n\\nBilled
|
||||
yearly\\n\\n2 months free\\n\\nSubscribe\\n\\nScrape 500,000 pages\\n\\n100
|
||||
concurrent requests\\n\\nPriority support\\n\\n$177 per extra 175k credits\\n\\nExtra
|
||||
credits are available via auto-recharge packs. [Enable](https://www.firecrawl.dev/signin/signup)\\n\\nEnterprise\\n\\nPower
|
||||
at your pace\\n\\nUnlimited credits. Custom RPMs.\\n\\n[Contact sales](https://fk4bvu0n5qp.typeform.com/to/Ej6oydlg)
|
||||
[More details](https://www.firecrawl.dev/enterprise)\\n\\nBulk discounts\\n\\nTop
|
||||
priority support\\n\\nCustom concurrency limits\\n\\nImproved stealth proxies\\n\\nSLAs\\n\\nAdvanced
|
||||
security & controls\\n\\n\\\\[05/ 07 \\\\]\\n\\n\xB7\\n\\nTestimonials\\n\\n//\\n\\nCommunity\\n\\n//\\n\\n##
|
||||
People love building withFirecrawl\\n\\nDiscover why developers choose
|
||||
Firecrawl every day.\\n\\n[Morgan
|
||||
Linton@morganlinton\\\"If you're coding with AI, and haven't discovered @firecrawl\\\\_dev
|
||||
yet, prepare to have your mind blown \U0001F92F\\\"](https://x.com/morganlinton/status/1839454165703204955)
|
||||
[Chris
|
||||
DeWeese@chrisdeweese\\\\_\\\"Started using @firecrawl\\\\_dev for a project,
|
||||
I wish I used this sooner.\\\"](https://x.com/chrisdeweese_/status/1853587120406876601)
|
||||
[Alex
|
||||
Reibman@AlexReibman\\\"Moved our internal agent's web scraping tool from Apify
|
||||
to Firecrawl because it benchmarked 50x faster with AgentOps.\\\"](https://x.com/AlexReibman/status/1780299595484131836)
|
||||
[Tom
|
||||
- Morpho@TomReppelin\\\"I found gold today. Thank you @firecrawl\\\\_dev\\\"](https://x.com/TomReppelin/status/1844382491014201613)\\n\\n[Morgan
|
||||
Linton@morganlinton\\\"If you're coding with AI, and haven't discovered @firecrawl\\\\_dev
|
||||
yet, prepare to have your mind blown \U0001F92F\\\"](https://x.com/morganlinton/status/1839454165703204955)
|
||||
[Chris
|
||||
DeWeese@chrisdeweese\\\\_\\\"Started using @firecrawl\\\\_dev for a project,
|
||||
I wish I used this sooner.\\\"](https://x.com/chrisdeweese_/status/1853587120406876601)
|
||||
[Alex
|
||||
Reibman@AlexReibman\\\"Moved our internal agent's web scraping tool from Apify
|
||||
to Firecrawl because it benchmarked 50x faster with AgentOps.\\\"](https://x.com/AlexReibman/status/1780299595484131836)
|
||||
[Tom
|
||||
- Morpho@TomReppelin\\\"I found gold today. Thank you @firecrawl\\\\_dev\\\"](https://x.com/TomReppelin/status/1844382491014201613)\\n\\n[Bardia@thepericulum\\\"The
|
||||
Firecrawl team ships. I wanted types for their node SDK, and less than an
|
||||
hour later, I got them.\\\"](https://x.com/thepericulum/status/1781397799487078874)
|
||||
[Matt
|
||||
Busigin@mbusigin\\\"Firecrawl is dope. Congrats guys \U0001F44F\\\"](https://x.com/mbusigin/status/1836065372010656069)
|
||||
[Sumanth@Sumanth\\\\_077\\\"Web
|
||||
scraping will never be the same!\\\\\\\\\\n\\\\\\\\\\nFirecrawl is an open-source
|
||||
framework that takes a URL, crawls it, and conver...\\\"](https://x.com/Sumanth_077/status/1940049003074478511)
|
||||
[Steven
|
||||
Tey@steventey\\\"Open-source Clay alternative just dropped\\\\\\\\\\n\\\\\\\\\\nUpload
|
||||
a CSV of emails and...\\\"](https://x.com/steventey/status/1932945651761098889)\\n\\n[Bardia@thepericulum\\\"The
|
||||
Firecrawl team ships. I wanted types for their node SDK, and less than an
|
||||
hour later, I got them.\\\"](https://x.com/thepericulum/status/1781397799487078874)
|
||||
[Matt
|
||||
Busigin@mbusigin\\\"Firecrawl is dope. Congrats guys \U0001F44F\\\"](https://x.com/mbusigin/status/1836065372010656069)
|
||||
[Sumanth@Sumanth\\\\_077\\\"Web
|
||||
scraping will never be the same!\\\\\\\\\\n\\\\\\\\\\nFirecrawl is an open-source
|
||||
framework that takes a URL, crawls it, and conver...\\\"](https://x.com/Sumanth_077/status/1940049003074478511)
|
||||
[Steven
|
||||
Tey@steventey\\\"Open-source Clay alternative just dropped\\\\\\\\\\n\\\\\\\\\\nUpload
|
||||
a CSV of emails and...\\\"](https://x.com/steventey/status/1932945651761098889)\\n\\n\\\\[06/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nUse Cases\\n\\n//\\n\\nUse cases\\n\\n//\\n\\n## Transform
|
||||
\ web data into AI-powered solutions\\n\\nDiscover how Firecrawl customers
|
||||
are getting the most out of our API.\\n\\n[View all use cases](https://docs.firecrawl.dev/use-cases/overview)\\n\\nChat
|
||||
with context\\n\\nSmarter AI chats\\n\\nPower your AI assistants with real-time,
|
||||
accurate web content.\\n\\n[View docs](https://docs.firecrawl.dev/introduction)\\n\\n\\n\\nAI Assistant\\n\\nwithFirecrawl\\n\\nReal-time\xB7Updated
|
||||
2 min ago\\n\\nAsk anything...\\n\\nKnow your leads\\n\\nLead enrichment\\n\\nEnhance
|
||||
your sales data with\\n\\nweb information.\\n\\n[Check out Extract](https://www.firecrawl.dev/extract)\\n\\nExtracting
|
||||
leads from directory...\\n\\nTech startups\\n\\nWith contact info\\n\\nDecision
|
||||
makers\\n\\nFunding stage\\n\\nReady to engage\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nKnow your leads\\n\\nMCPs\\n\\nAdd
|
||||
powerful scraping to your\\n\\ncode editors.\\n\\n[Get started](https://docs.firecrawl.dev/mcp-server)\\n\\n\\n\\nClaude Code\\n\\n\\n\\nCursor\\n\\n\\n\\nWindsurf\\n\\n\u273B\\n\\nWelcome
|
||||
to Claude Code!\\n\\n/help for help, /status for your current setup\\n\\n>Try
|
||||
\\\"how do I log an error?\\\"\\n\\nBuild with context\\n\\nAI platforms\\n\\nLet
|
||||
your customers build AI apps\\n\\nwith web data.\\n\\n[Check out Map](https://docs.firecrawl.dev/features/map)\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nExtracting
|
||||
text...\\n\\nNo insight missed\\n\\nDeep research\\n\\nExtract comprehensive
|
||||
information for\\n\\nin-depth research.\\n\\n[Build your own with Search](https://docs.firecrawl.dev/features/search)\\n\\nDeep
|
||||
research in progress...\\n\\nAcademic papers\\n\\n0 found\\n\\nNews articles\\n\\n0
|
||||
found\\n\\nExpert opinions\\n\\n0 found\\n\\nResearch reports\\n\\n0 found\\n\\nIndustry
|
||||
data\\n\\n0 found\\n\\nAsk anything...\\n\\n\\\\[ CTA \\\\]\\n\\n\\\\[ CRAWL
|
||||
\\\\]\\n\\n\\\\[ SCRAPE \\\\]\\n\\n\\\\[ CTA \\\\]\\n\\n//\\n\\nGet started\\n\\n//\\n\\nReady
|
||||
to build?\\n\\nStart getting Web Data for free and scale seamlessly as your
|
||||
project expands. No credit card needed.\\n\\n[Start for free](https://www.firecrawl.dev/signin)
|
||||
[See our plans](https://www.firecrawl.dev/pricing)\\n\\n\\\\[07/ 07 \\\\]\\n\\n\xB7\\n\\nFAQ\\n\\n//\\n\\nFAQ\\n\\n//\\n\\n##
|
||||
Frequently askedquestions\\n\\nEverything you need to know about Firecrawl.\\n\\nGeneral\\n\\nWhat
|
||||
is Firecrawl?\\n\\nWhat sites work?\\n\\nWho can benefit from using Firecrawl?\\n\\nIs
|
||||
Firecrawl open-source?\\n\\nWhat is the difference between Firecrawl and other
|
||||
web scrapers?\\n\\nWhat is the difference between the open-source version
|
||||
and the hosted version?\\n\\nScraping & Crawling\\n\\nHow does Firecrawl handle
|
||||
dynamic content on websites?\\n\\nWhy is it not crawling all the pages?\\n\\nCan
|
||||
Firecrawl crawl websites without a sitemap?\\n\\nWhat formats can Firecrawl
|
||||
convert web data into?\\n\\nHow does Firecrawl ensure the cleanliness of the
|
||||
data?\\n\\nIs Firecrawl suitable for large-scale data scraping projects?\\n\\nDoes
|
||||
it respect robots.txt?\\n\\nWhat measures does Firecrawl take to handle web
|
||||
scraping challenges like rate limits and caching?\\n\\nDoes Firecrawl handle
|
||||
captcha or authentication?\\n\\nAPI Related\\n\\nWhere can I find my API key?\\n\\nBilling\\n\\nIs
|
||||
Firecrawl free?\\n\\nIs there a pay-per-use plan instead of monthly?\\n\\nDo
|
||||
credits roll over to the next month?\\n\\nHow many credits do scraping and
|
||||
crawling cost?\\n\\nDo you charge for failed requests?\\n\\nWhat payment methods
|
||||
do you accept?\\n\\nFOOTER\\n\\nThe easiest way to extract\\n\\ndata from
|
||||
the web\\n\\nBacked by\\n\\nY Combinator\\n\\n[Linkedin](https://www.linkedin.com/company/firecrawl)
|
||||
[Github](https://github.com/firecrawl/firecrawl)\\n\\nSOC II \xB7 Type 2\\n\\nAICPA\\n\\nSOC
|
||||
2\\n\\n[X (Twitter)](https://x.com/firecrawl_dev) [Discord](https://discord.gg/gSmWdAkdwd)\\n\\nProducts\\n\\n[Playground](https://www.firecrawl.dev/playground)
|
||||
[Extract](https://www.firecrawl.dev/extract) [Pricing](https://www.firecrawl.dev/pricing)
|
||||
[Templates](https://www.firecrawl.dev/templates) [Changelog](https://www.firecrawl.dev/changelog)\\n\\nUse
|
||||
Cases\\n\\n[AI Platforms](https://docs.firecrawl.dev/use-cases/ai-platforms)
|
||||
[Lead Enrichment](https://docs.firecrawl.dev/use-cases/lead-enrichment) [SEO
|
||||
Platforms](https://docs.firecrawl.dev/use-cases/seo-platforms) [Deep Research](https://docs.firecrawl.dev/use-cases/deep-research)\\n\\nDocumentation\\n\\n[Getting
|
||||
started](https://docs.firecrawl.dev/introduction) [API Reference](https://docs.firecrawl.dev/api-reference/introduction)
|
||||
[Integrations](https://www.firecrawl.dev/app) [Examples](https://docs.firecrawl.dev/use-cases/overview)
|
||||
[SDKs](https://docs.firecrawl.dev/sdks/overview)\\n\\nCompany\\n\\n[Blog](https://www.firecrawl.dev/blog)
|
||||
[Careers](https://www.firecrawl.dev/careers) [Creator & OSS program](https://www.firecrawl.dev/creator-oss-program)
|
||||
[Student program](https://www.firecrawl.dev/student-program)\\n\\n\xA9 2025
|
||||
Firecrawl\\n\\n[Terms of Service](https://www.firecrawl.dev/terms-of-service)
|
||||
[Privacy Policy](https://www.firecrawl.dev/privacy-policy) [Report Abuse](mailto:help@firecrawl.com?subject=Issue:)\\n\\n[All
|
||||
systems normal](https://status.firecrawl.dev/)\\n\\nStripeM-Inner\",\"metadata\":{\"favicon\":\"https://www.firecrawl.dev/favicon.png\",\"ogUrl\":\"https://www.firecrawl.dev\",\"ogImage\":\"https://www.firecrawl.dev/og.png\",\"referrer\":\"origin-when-cross-origin\",\"ogDescription\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"robots\":\"follow, index\",\"twitter:card\":\"summary_large_image\",\"og:site_name\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"twitter:title\":\"Firecrawl - The Web Data API
|
||||
for AI\",\"og:image\":\"https://www.firecrawl.dev/og.png\",\"title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"og:description\":\"The web crawling, scraping,
|
||||
and search API for AI. Built for scale. Firecrawl delivers the entire internet
|
||||
to AI agents and builders. Clean, structured, and ready to reason with.\",\"twitter:image\":\"https://www.firecrawl.dev/og.png\",\"viewport\":\"width=device-width,
|
||||
initial-scale=1, maximum-scale=1, user-scalable=no\",\"ogSiteName\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"keywords\":\"Firecrawl,Markdown,Data,Mendable,Langchain\",\"author\":\"Firecrawl\",\"og:title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"twitter:description\":\"The web crawling, scraping,
|
||||
and search API for AI. Built for scale. Firecrawl delivers the entire internet
|
||||
to AI agents and builders. Clean, structured, and ready to reason with.\",\"description\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"twitter:site\":\"@Vercel\",\"og:url\":\"https://www.firecrawl.dev\",\"og:type\":\"website\",\"ogTitle\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"language\":\"en\",\"creator\":\"Firecrawl\",\"publisher\":\"Firecrawl\",\"twitter:creator\":\"@Vercel\",\"scrapeId\":\"57b0586f-36e8-4923-aaa2-88ff58c03999\",\"sourceURL\":\"https://www.firecrawl.dev/\",\"url\":\"https://www.firecrawl.dev/\",\"statusCode\":200,\"contentType\":\"text/html;
|
||||
charset=utf-8\",\"proxyUsed\":\"basic\",\"cacheState\":\"hit\",\"cachedAt\":\"2025-10-29T13:09:07.713Z\"}},{\"url\":\"https://github.com/firecrawl/firecrawl\",\"title\":\"firecrawl/firecrawl:
|
||||
The Web Data API for AI - Turn entire ... - GitHub\",\"description\":\"Firecrawl
|
||||
is an API service that takes a URL, crawls it, and converts it into clean
|
||||
markdown or structured data. We crawl all accessible subpages and give you
|
||||
...\",\"position\":2,\"category\":\"github\",\"markdown\":\"[Skip to content](https://github.com/firecrawl/firecrawl#start-of-content)\\n\\nYou
|
||||
signed in with another tab or window. [Reload](https://github.com/firecrawl/firecrawl)
|
||||
to refresh your session.You signed out in another tab or window. [Reload](https://github.com/firecrawl/firecrawl)
|
||||
to refresh your session.You switched accounts on another tab or window. [Reload](https://github.com/firecrawl/firecrawl)
|
||||
to refresh your session.Dismiss alert\\n\\n{{ message }}\\n\\n[firecrawl](https://github.com/firecrawl)/
|
||||
**[firecrawl](https://github.com/firecrawl/firecrawl)** Public\\n\\n- Couldn't
|
||||
load subscription status.\\nRetry\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n### Uh
|
||||
oh!\\n\\n\\n\\n\\n\\n\\n\\nThere was an error while loading. [Please reload
|
||||
this page](https://github.com/firecrawl/firecrawl).\\n\\n- [Fork\\\\\\\\\\n5.1k](https://github.com/login?return_to=%2Ffirecrawl%2Ffirecrawl)\\n-
|
||||
[Star\\\\\\\\\\n65.2k](https://github.com/login?return_to=%2Ffirecrawl%2Ffirecrawl)\\n\\n\\n\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data\\n\\n\\n[firecrawl.dev](https://firecrawl.dev/ \\\"https://firecrawl.dev\\\")\\n\\n###
|
||||
License\\n\\n[AGPL-3.0 license](https://github.com/firecrawl/firecrawl/blob/main/LICENSE)\\n\\n[65.2k\\\\\\\\\\nstars](https://github.com/firecrawl/firecrawl/stargazers)
|
||||
[5.1k\\\\\\\\\\nforks](https://github.com/firecrawl/firecrawl/forks) [Branches](https://github.com/firecrawl/firecrawl/branches)
|
||||
[Tags](https://github.com/firecrawl/firecrawl/tags) [Activity](https://github.com/firecrawl/firecrawl/activity)\\n\\n[Star](https://github.com/login?return_to=%2Ffirecrawl%2Ffirecrawl)\\n\\nCouldn't
|
||||
load subscription status.\\nRetry\\n\\n### Uh oh!\\n\\nThere was an error
|
||||
while loading. [Please reload this page](https://github.com/firecrawl/firecrawl).\\n\\n#
|
||||
firecrawl/firecrawl\\n\\nmain\\n\\n[**887** Branches](https://github.com/firecrawl/firecrawl/branches)
|
||||
[**28** Tags](https://github.com/firecrawl/firecrawl/tags)\\n\\n[Go to Branches
|
||||
page](https://github.com/firecrawl/firecrawl/branches)[Go to Tags page](https://github.com/firecrawl/firecrawl/tags)\\n\\nGo
|
||||
to file\\n\\nCode\\n\\nOpen more actions menu\\n\\n## Folders and files\\n\\n|
|
||||
Name | Name | Last commit message | Last commit date |\\n| --- | --- | ---
|
||||
| --- |\\n| ## Latest commit<br>[](https://github.com/amplitudesxd)[amplitudesxd](https://github.com/firecrawl/firecrawl/commits?author=amplitudesxd)<br>[chore:
|
||||
update last scrape rpc (](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc)
|
||||
[#2339](https://github.com/firecrawl/firecrawl/pull/2339) [)](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc)<br>success<br>20
|
||||
hours agoOct 27, 2025<br>[37de287](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc)\_\xB7\_20
|
||||
hours agoOct 27, 2025<br>## History<br>[4,487 Commits](https://github.com/firecrawl/firecrawl/commits/main/)
|
||||
<br>Open commit details<br>[View commit history for this file.](https://github.com/firecrawl/firecrawl/commits/main/)
|
||||
|\\n| [.github](https://github.com/firecrawl/firecrawl/tree/main/.github \\\".github\\\")
|
||||
| [.github](https://github.com/firecrawl/firecrawl/tree/main/.github \\\".github\\\")
|
||||
| [fix(ci): temp disabled prod env tests](https://github.com/firecrawl/firecrawl/commit/42fc149c1ab738da0e15e772817774aa35273f8e
|
||||
\\\"fix(ci): temp disabled prod env tests\\\") | 5 days agoOct 23, 2025 |\\n|
|
||||
[apps](https://github.com/firecrawl/firecrawl/tree/main/apps \\\"apps\\\")
|
||||
| [apps](https://github.com/firecrawl/firecrawl/tree/main/apps \\\"apps\\\")
|
||||
| [chore: update last scrape rpc (](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc
|
||||
\\\"chore: update last scrape rpc (#2339)\\\") [#2339](https://github.com/firecrawl/firecrawl/pull/2339)
|
||||
[)](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc
|
||||
\\\"chore: update last scrape rpc (#2339)\\\") | 20 hours agoOct 27, 2025
|
||||
|\\n| [examples](https://github.com/firecrawl/firecrawl/tree/main/examples
|
||||
\\\"examples\\\") | [examples](https://github.com/firecrawl/firecrawl/tree/main/examples
|
||||
\\\"examples\\\") | [Merge pull request](https://github.com/firecrawl/firecrawl/commit/7ad57003b4ad8b230ba8252129e52bafa62dfae9
|
||||
\\\"Merge pull request #2172 from MAVRICK-1/firecrawl-gemini-screenshot-editor
|
||||
\ feat: Add Firecrawl + Gemini 2.5 Flash Image CLI Editor\\\") [#2172](https://github.com/firecrawl/firecrawl/pull/2172)
|
||||
[from MAVRICK-1/firecrawl-gemini-screenshot-e\u2026](https://github.com/firecrawl/firecrawl/commit/7ad57003b4ad8b230ba8252129e52bafa62dfae9
|
||||
\\\"Merge pull request #2172 from MAVRICK-1/firecrawl-gemini-screenshot-editor
|
||||
\ feat: Add Firecrawl + Gemini 2.5 Flash Image CLI Editor\\\") | last monthSep
|
||||
23, 2025 |\\n| [img](https://github.com/firecrawl/firecrawl/tree/main/img
|
||||
\\\"img\\\") | [img](https://github.com/firecrawl/firecrawl/tree/main/img
|
||||
\\\"img\\\") | [updated readme](https://github.com/firecrawl/firecrawl/commit/4f904e774831dc598681d3e998d0e5e15abcec27
|
||||
\\\"updated readme\\\") | 2 months agoAug 18, 2025 |\\n| [.gitattributes](https://github.com/firecrawl/firecrawl/blob/main/.gitattributes
|
||||
\\\".gitattributes\\\") | [.gitattributes](https://github.com/firecrawl/firecrawl/blob/main/.gitattributes
|
||||
\\\".gitattributes\\\") | [Initial commit](https://github.com/firecrawl/firecrawl/commit/a6c2a878119321a196f720cce4195e086f1c6b46
|
||||
\\\"Initial commit\\\") | last yearApr 15, 2024 |\\n| [.gitignore](https://github.com/firecrawl/firecrawl/blob/main/.gitignore
|
||||
\\\".gitignore\\\") | [.gitignore](https://github.com/firecrawl/firecrawl/blob/main/.gitignore
|
||||
\\\".gitignore\\\") | [Nick: init](https://github.com/firecrawl/firecrawl/commit/ab3fa4838458c8303a67dd30fdd75a16b89cc20b
|
||||
\\\"Nick: init\\\") | 3 weeks agoOct 10, 2025 |\\n| [.gitmodules](https://github.com/firecrawl/firecrawl/blob/main/.gitmodules
|
||||
\\\".gitmodules\\\") | [.gitmodules](https://github.com/firecrawl/firecrawl/blob/main/.gitmodules
|
||||
\\\".gitmodules\\\") | [mendableai -> firecrawl](https://github.com/firecrawl/firecrawl/commit/2f3bc4e7a7b1a67a29c06df629f79402ee1aad1b
|
||||
\\\"mendableai -> firecrawl\\\") | 2 months agoAug 18, 2025 |\\n| [CLAUDE.md](https://github.com/firecrawl/firecrawl/blob/main/CLAUDE.md
|
||||
\\\"CLAUDE.md\\\") | [CLAUDE.md](https://github.com/firecrawl/firecrawl/blob/main/CLAUDE.md
|
||||
\\\"CLAUDE.md\\\") | [add claude file](https://github.com/firecrawl/firecrawl/commit/3f0873c788823258a7d9f55d1c8772aed4e1a8de
|
||||
\\\"add claude file\\\") | 2 months agoAug 6, 2025 |\\n| [CONTRIBUTING.md](https://github.com/firecrawl/firecrawl/blob/main/CONTRIBUTING.md
|
||||
\\\"CONTRIBUTING.md\\\") | [CONTRIBUTING.md](https://github.com/firecrawl/firecrawl/blob/main/CONTRIBUTING.md
|
||||
\\\"CONTRIBUTING.md\\\") | [Add Rust to CONTRIBUTING (](https://github.com/firecrawl/firecrawl/commit/f396cb20b54c3c2d7e64882642c5df6310a01002
|
||||
\\\"Add Rust to CONTRIBUTING (#2180)\\\") [#2180](https://github.com/firecrawl/firecrawl/pull/2180)
|
||||
[)](https://github.com/firecrawl/firecrawl/commit/f396cb20b54c3c2d7e64882642c5df6310a01002
|
||||
\\\"Add Rust to CONTRIBUTING (#2180)\\\") | last monthSep 18, 2025 |\\n| [LICENSE](https://github.com/firecrawl/firecrawl/blob/main/LICENSE
|
||||
\\\"LICENSE\\\") | [LICENSE](https://github.com/firecrawl/firecrawl/blob/main/LICENSE
|
||||
\\\"LICENSE\\\") | [Update SDKs to MIT license](https://github.com/firecrawl/firecrawl/commit/afb49e21e7cff595ebad9ce0b7aba13b88f39cf8
|
||||
\\\"Update SDKs to MIT license\\\") | last yearJul 8, 2024 |\\n| [README.md](https://github.com/firecrawl/firecrawl/blob/main/README.md
|
||||
\\\"README.md\\\") | [README.md](https://github.com/firecrawl/firecrawl/blob/main/README.md
|
||||
\\\"README.md\\\") | [Update README.md](https://github.com/firecrawl/firecrawl/commit/a21430e97818d95099bb365be711d9227bd75590
|
||||
\\\"Update README.md\\\") | 3 weeks agoOct 6, 2025 |\\n| [SELF\\\\_HOST.md](https://github.com/firecrawl/firecrawl/blob/main/SELF_HOST.md
|
||||
\\\"SELF_HOST.md\\\") | [SELF\\\\_HOST.md](https://github.com/firecrawl/firecrawl/blob/main/SELF_HOST.md
|
||||
\\\"SELF_HOST.md\\\") | [Allow self-hosted webhook delivery to private IP
|
||||
addresses (](https://github.com/firecrawl/firecrawl/commit/5756b834884d481382ce1f5674836a56b7fee33d
|
||||
\\\"Allow self-hosted webhook delivery to private IP addresses (#2232)\\\")
|
||||
[#2232](https://github.com/firecrawl/firecrawl/pull/2232) [)](https://github.com/firecrawl/firecrawl/commit/5756b834884d481382ce1f5674836a56b7fee33d
|
||||
\\\"Allow self-hosted webhook delivery to private IP addresses (#2232)\\\")
|
||||
| 27 days agoOct 1, 2025 |\\n| [docker-compose.yaml](https://github.com/firecrawl/firecrawl/blob/main/docker-compose.yaml
|
||||
\\\"docker-compose.yaml\\\") | [docker-compose.yaml](https://github.com/firecrawl/firecrawl/blob/main/docker-compose.yaml
|
||||
\\\"docker-compose.yaml\\\") | [Fix a self-hosted docker-compose.yaml bug
|
||||
caused by a recent firecraw\u2026](https://github.com/firecrawl/firecrawl/commit/7d4100b274889977fa1ba26344532d9d8747494c
|
||||
\\\"Fix a self-hosted docker-compose.yaml bug caused by a recent firecrawl
|
||||
change (#2252) Add EXTRACT_WORKER_PORT to docker-compose environment\\\")
|
||||
| 3 weeks agoOct 4, 2025 |\\n| View all files |\\n\\n## Repository files navigation\\n\\n###
|
||||
[](https://raw.githubusercontent.com/firecrawl/firecrawl/main/img/firecrawl_logo.png)\\n\\n[Permalink:
|
||||
](https://github.com/firecrawl/firecrawl#----)\\n\\n[](https://github.com/firecrawl/firecrawl/blob/main/LICENSE)[](https://pepy.tech/project/firecrawl-py)[](https://github.com/firecrawl/firecrawl/graphs/contributors)[](https://firecrawl.dev/)\\n\\n[](https://twitter.com/firecrawl_dev)[](https://www.linkedin.com/company/104100957)[](https://discord.com/invite/gSmWdAkdwd)\\n\\n#
|
||||
\U0001F525 Firecrawl\\n\\n[Permalink: \U0001F525 Firecrawl](https://github.com/firecrawl/firecrawl#-firecrawl)\\n\\nEmpower
|
||||
your AI apps with clean data from any website. Featuring advanced scraping,
|
||||
crawling, and data extraction capabilities.\\n\\n_This repository is in development,
|
||||
and we\u2019re still integrating custom modules into the mono repo. It's not
|
||||
fully ready for self-hosted deployment yet, but you can run it locally._\\n\\n##
|
||||
What is Firecrawl?\\n\\n[Permalink: What is Firecrawl?](https://github.com/firecrawl/firecrawl#what-is-firecrawl)\\n\\n[Firecrawl](https://firecrawl.dev/?ref=github)
|
||||
is an API service that takes a URL, crawls it, and converts it into clean
|
||||
markdown or structured data. We crawl all accessible subpages and give you
|
||||
clean data for each. No sitemap required. Check out our [documentation](https://docs.firecrawl.dev/).\\n\\nLooking
|
||||
for our MCP? Check out the [repo here](https://github.com/firecrawl/firecrawl-mcp-server).\\n\\n_Pst.
|
||||
hey, you, join our stargazers :)_\\n\\n[](https://github.com/firecrawl/firecrawl)\\n\\n##
|
||||
How to use it?\\n\\n[Permalink: How to use it?](https://github.com/firecrawl/firecrawl#how-to-use-it)\\n\\nWe
|
||||
provide an easy to use API with our hosted version. You can find the playground
|
||||
and documentation [here](https://firecrawl.dev/playground). You can also self
|
||||
host the backend if you'd like.\\n\\nCheck out the following resources to
|
||||
get started:\\n\\n- [x] **API**: [Documentation](https://docs.firecrawl.dev/api-reference/introduction)\\n-
|
||||
[x] **SDKs**: [Python](https://docs.firecrawl.dev/sdks/python), [Node](https://docs.firecrawl.dev/sdks/node)\\n-
|
||||
[x] **LLM Frameworks**: [Langchain (python)](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/),
|
||||
[Langchain (js)](https://js.langchain.com/docs/integrations/document_loaders/web_loaders/firecrawl),
|
||||
[Llama Index](https://docs.llamaindex.ai/en/latest/examples/data_connectors/WebPageDemo/#using-firecrawl-reader),
|
||||
[Crew.ai](https://docs.crewai.com/), [Composio](https://composio.dev/tools/firecrawl/all),
|
||||
[PraisonAI](https://docs.praison.ai/firecrawl/), [Superinterface](https://superinterface.ai/docs/assistants/functions/firecrawl),
|
||||
[Vectorize](https://docs.vectorize.io/integrations/source-connectors/firecrawl)\\n-
|
||||
[x] **Low-code Frameworks**: [Dify](https://dify.ai/blog/dify-ai-blog-integrated-with-firecrawl),
|
||||
[Langflow](https://docs.langflow.org/), [Flowise AI](https://docs.flowiseai.com/integrations/langchain/document-loaders/firecrawl),
|
||||
[Cargo](https://docs.getcargo.io/integration/firecrawl), [Pipedream](https://pipedream.com/apps/firecrawl/)\\n-
|
||||
[x] **Community SDKs**: [Go](https://docs.firecrawl.dev/sdks/go), [Rust](https://docs.firecrawl.dev/sdks/rust)\\n-
|
||||
[x] **Others**: [Zapier](https://zapier.com/apps/firecrawl/integrations),
|
||||
[Pabbly Connect](https://www.pabbly.com/connect/integrations/firecrawl/)\\n-
|
||||
[ ] Want an SDK or Integration? Let us know by opening an issue.\\n\\nTo
|
||||
run locally, refer to guide [here](https://github.com/firecrawl/firecrawl/blob/main/CONTRIBUTING.md).\\n\\n###
|
||||
API Key\\n\\n[Permalink: API Key](https://github.com/firecrawl/firecrawl#api-key)\\n\\nTo
|
||||
use the API, you need to sign up on [Firecrawl](https://firecrawl.dev/) and
|
||||
get an API key.\\n\\n### Features\\n\\n[Permalink: Features](https://github.com/firecrawl/firecrawl#features)\\n\\n-
|
||||
[**Scrape**](https://github.com/firecrawl/firecrawl#scraping): scrapes a URL
|
||||
and get its content in LLM-ready format (markdown, structured data via [LLM
|
||||
Extract](https://github.com/firecrawl/firecrawl#llm-extraction-beta), screenshot,
|
||||
html)\\n- [**Crawl**](https://github.com/firecrawl/firecrawl#crawling): scrapes
|
||||
all the URLs of a web page and return content in LLM-ready format\\n- [**Map**](https://github.com/firecrawl/firecrawl#map):
|
||||
input a website and get all the website urls - extremely fast\\n- [**Search**](https://github.com/firecrawl/firecrawl#search):
|
||||
search the web and get full content from results\\n- [**Extract**](https://github.com/firecrawl/firecrawl#extract):
|
||||
get structured data from single page, multiple pages or entire websites with
|
||||
AI.\\n\\n### Powerful Capabilities\\n\\n[Permalink: Powerful Capabilities](https://github.com/firecrawl/firecrawl#powerful-capabilities)\\n\\n-
|
||||
**LLM-ready formats**: markdown, structured data, screenshot, HTML, links,
|
||||
metadata\\n- **The hard stuff**: proxies, anti-bot mechanisms, dynamic content
|
||||
(js-rendered), output parsing, orchestration\\n- **Customizability**: exclude
|
||||
tags, crawl behind auth walls with custom headers, max crawl depth, etc...\\n-
|
||||
**Media parsing**: pdfs, docx, images\\n- **Reliability first**: designed
|
||||
to get the data you need - no matter how hard it is\\n- **Actions**: click,
|
||||
scroll, input, wait and more before extracting data\\n- **Batching**: scrape
|
||||
thousands of URLs at the same time with a new async endpoint\\n- **Change
|
||||
Tracking**: monitor and detect changes in website content over time\\n\\nYou
|
||||
can find all of Firecrawl's capabilities and how to use them in our [documentation](https://docs.firecrawl.dev/)\\n\\n###
|
||||
Crawling\\n\\n[Permalink: Crawling](https://github.com/firecrawl/firecrawl#crawling)\\n\\nUsed
|
||||
to crawl a URL and all accessible subpages. This submits a crawl job and returns
|
||||
a job ID to check the status of the crawl.\\n\\n```\\ncurl -X POST https://api.firecrawl.dev/v2/crawl
|
||||
\\\\\\n -H 'Content-Type: application/json' \\\\\\n -H 'Authorization:
|
||||
Bearer fc-YOUR_API_KEY' \\\\\\n -d '{\\n \\\"url\\\": \\\"https://docs.firecrawl.dev\\\",\\n
|
||||
\ \\\"limit\\\": 10,\\n \\\"scrapeOptions\\\": {\\n \\\"formats\\\":
|
||||
[\\\"markdown\\\", \\\"html\\\"]\\n }\\n }'\\n```\\n\\nReturns a crawl
|
||||
job id and the url to check the status of the crawl.\\n\\n```\\n{\\n \\\"success\\\":
|
||||
true,\\n \\\"id\\\": \\\"123-456-789\\\",\\n \\\"url\\\": \\\"https://api.firecrawl.dev/v2/crawl/123-456-789\\\"\\n}\\n```\\n\\n###
|
||||
Check Crawl Job\\n\\n[Permalink: Check Crawl Job](https://github.com/firecrawl/firecrawl#check-crawl-job)\\n\\nUsed
|
||||
to check the status of a crawl job and get its result.\\n\\n```\\ncurl -X
|
||||
GET https://api.firecrawl.dev/v2/crawl/123-456-789 \\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\n -H 'Authorization: Bearer YOUR_API_KEY'\\n```\\n\\n```\\n{\\n
|
||||
\ \\\"status\\\": \\\"completed\\\",\\n \\\"total\\\": 36,\\n \\\"creditsUsed\\\":
|
||||
36,\\n \\\"expiresAt\\\": \\\"2024-00-00T00:00:00.000Z\\\",\\n \\\"data\\\":
|
||||
[\\\\\\n {\\\\\\n \\\"markdown\\\": \\\"[Firecrawl Docs home page!...\\\",\\\\\\n
|
||||
\ \\\"html\\\": \\\"<!DOCTYPE html><html lang=\\\\\\\"en\\\\\\\" class=\\\\\\\"js-focus-visible
|
||||
lg:[--scroll-mt:9.5rem]\\\\\\\" data-js-focus-visible=\\\\\\\"\\\\\\\">...\\\",\\\\\\n
|
||||
\ \\\"metadata\\\": {\\\\\\n \\\"title\\\": \\\"Build a 'Chat with
|
||||
website' using Groq Llama 3 | Firecrawl\\\",\\\\\\n \\\"language\\\":
|
||||
\\\"en\\\",\\\\\\n \\\"sourceURL\\\": \\\"https://docs.firecrawl.dev/learn/rag-llama3\\\",\\\\\\n
|
||||
\ \\\"description\\\": \\\"Learn how to use Firecrawl, Groq Llama 3,
|
||||
and Langchain to build a 'Chat with your website' bot.\\\",\\\\\\n \\\"ogLocaleAlternate\\\":
|
||||
[],\\\\\\n \\\"statusCode\\\": 200\\\\\\n }\\\\\\n }\\\\\\n
|
||||
\ ]\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n### Scraping\\\\\\n\\\\\\n[Permalink: Scraping](https://github.com/firecrawl/firecrawl#scraping)\\\\\\n\\\\\\nUsed
|
||||
to scrape a URL and get its content in the specified formats.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"https://docs.firecrawl.dev\\\",\\\\\\n
|
||||
\ \\\"formats\\\" : [\\\"markdown\\\", \\\"html\\\"]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\nResponse:\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"data\\\": {\\\\\\n \\\"markdown\\\":
|
||||
\\\"Launch Week I is here! [See our Day 2 Release \U0001F680](https://www.firecrawl.dev/blog/launch-week-i-day-2-doubled-rate-limits)[\U0001F4A5
|
||||
Get 2 months free...\\\",\\\\\\n \\\"html\\\": \\\"<!DOCTYPE html><html
|
||||
lang=\\\\\\\"en\\\\\\\" class=\\\\\\\"light\\\\\\\" style=\\\\\\\"color-scheme:
|
||||
light;\\\\\\\"><body class=\\\\\\\"__variable_36bd41 __variable_d7dc5d font-inter
|
||||
...\\\",\\\\\\n \\\"metadata\\\": {\\\\\\n \\\"title\\\": \\\"Home
|
||||
- Firecrawl\\\",\\\\\\n \\\"description\\\": \\\"Firecrawl crawls and
|
||||
converts any website into clean markdown.\\\",\\\\\\n \\\"language\\\":
|
||||
\\\"en\\\",\\\\\\n \\\"keywords\\\": \\\"Firecrawl,Markdown,Data,Mendable,Langchain\\\",\\\\\\n
|
||||
\ \\\"robots\\\": \\\"follow, index\\\",\\\\\\n \\\"ogTitle\\\":
|
||||
\\\"Firecrawl\\\",\\\\\\n \\\"ogDescription\\\": \\\"Turn any website
|
||||
into LLM-ready data.\\\",\\\\\\n \\\"ogUrl\\\": \\\"https://www.firecrawl.dev/\\\",\\\\\\n
|
||||
\ \\\"ogImage\\\": \\\"https://www.firecrawl.dev/og.png?123\\\",\\\\\\n
|
||||
\ \\\"ogLocaleAlternate\\\": [],\\\\\\n \\\"ogSiteName\\\": \\\"Firecrawl\\\",\\\\\\n
|
||||
\ \\\"sourceURL\\\": \\\"https://firecrawl.dev\\\",\\\\\\n \\\"statusCode\\\":
|
||||
200\\\\\\n }\\\\\\n }\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n### Map\\\\\\n\\\\\\n[Permalink:
|
||||
Map](https://github.com/firecrawl/firecrawl#map)\\\\\\n\\\\\\nUsed to map
|
||||
a URL and get urls of the website. This returns most links present on the
|
||||
website.\\\\\\n\\\\\\n```\\\\\\ncurl -X POST https://api.firecrawl.dev/v2/map
|
||||
\\\\\\\\\\n -H 'Content-Type: application/json' \\\\\\\\\\n -H 'Authorization:
|
||||
Bearer YOUR_API_KEY' \\\\\\\\\\n -d '{\\\\\\n \\\"url\\\": \\\"https://firecrawl.dev\\\"\\\\\\n
|
||||
\ }'\\\\\\n```\\\\\\n\\\\\\nResponse:\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n \\\"success\\\":
|
||||
true,\\\\\\n \\\"links\\\": [\\\\\\n { \\\"url\\\": \\\"https://firecrawl.dev\\\",
|
||||
\\\"title\\\": \\\"Firecrawl\\\", \\\"description\\\": \\\"Firecrawl is a
|
||||
tool that allows you to crawl a website and get the data you need.\\\" },\\\\\\n
|
||||
\ { \\\"url\\\": \\\"https://www.firecrawl.dev/pricing\\\", \\\"title\\\":
|
||||
\\\"Firecrawl Pricing\\\", \\\"description\\\": \\\"Firecrawl Pricing\\\"
|
||||
},\\\\\\n { \\\"url\\\": \\\"https://www.firecrawl.dev/blog\\\", \\\"title\\\":
|
||||
\\\"Firecrawl Blog\\\", \\\"description\\\": \\\"Firecrawl Blog\\\" },\\\\\\n
|
||||
\ { \\\"url\\\": \\\"https://www.firecrawl.dev/playground\\\", \\\"title\\\":
|
||||
\\\"Firecrawl Playground\\\", \\\"description\\\": \\\"Firecrawl Playground\\\"
|
||||
},\\\\\\n { \\\"url\\\": \\\"https://www.firecrawl.dev/smart-crawl\\\",
|
||||
\\\"title\\\": \\\"Firecrawl Smart Crawl\\\", \\\"description\\\": \\\"Firecrawl
|
||||
Smart Crawl\\\" }\\\\\\n ]\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n#### Map with search\\\\\\n\\\\\\n[Permalink:
|
||||
Map with search](https://github.com/firecrawl/firecrawl#map-with-search)\\\\\\n\\\\\\nMap
|
||||
with `search` param allows you to search for specific urls inside a website.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/map \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"https://firecrawl.dev\\\",\\\\\\n \\\"search\\\":
|
||||
\\\"docs\\\"\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\nResponse will be an ordered
|
||||
list from the most relevant to the least relevant.\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"links\\\": [\\\\\\n { \\\"url\\\":
|
||||
\\\"https://docs.firecrawl.dev\\\", \\\"title\\\": \\\"Firecrawl Docs\\\",
|
||||
\\\"description\\\": \\\"Firecrawl Docs\\\" },\\\\\\n { \\\"url\\\": \\\"https://docs.firecrawl.dev/sdks/python\\\",
|
||||
\\\"title\\\": \\\"Firecrawl Python SDK\\\", \\\"description\\\": \\\"Firecrawl
|
||||
Python SDK\\\" },\\\\\\n { \\\"url\\\": \\\"https://docs.firecrawl.dev/learn/rag-llama3\\\",
|
||||
\\\"title\\\": \\\"Firecrawl RAG Llama 3\\\", \\\"description\\\": \\\"Firecrawl
|
||||
RAG Llama 3\\\" }\\\\\\n ]\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n### Search\\\\\\n\\\\\\n[Permalink:
|
||||
Search](https://github.com/firecrawl/firecrawl#search)\\\\\\n\\\\\\nSearch
|
||||
the web and get full content from results\\\\\\n\\\\\\nFirecrawl\u2019s search
|
||||
API allows you to perform web searches and optionally scrape the search results
|
||||
in one operation.\\\\\\n\\\\\\n- Choose specific output formats (markdown,
|
||||
HTML, links, screenshots)\\\\\\n- Search the web with customizable parameters
|
||||
(language, country, etc.)\\\\\\n- Optionally retrieve content from search
|
||||
results in various formats\\\\\\n- Control the number of results and set timeouts\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/search \\\\\\\\\\n -H \\\"Content-Type:
|
||||
application/json\\\" \\\\\\\\\\n -H \\\"Authorization: Bearer fc-YOUR_API_KEY\\\"
|
||||
\\\\\\\\\\n -d '{\\\\\\n \\\"query\\\": \\\"what is firecrawl?\\\",\\\\\\n
|
||||
\ \\\"limit\\\": 5\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n#### Response\\\\\\n\\\\\\n[Permalink:
|
||||
Response](https://github.com/firecrawl/firecrawl#response)\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"data\\\": [\\\\\\n {\\\\\\n \\\"url\\\":
|
||||
\\\"https://firecrawl.dev\\\",\\\\\\n \\\"title\\\": \\\"Firecrawl |
|
||||
Home Page\\\",\\\\\\n \\\"description\\\": \\\"Turn websites into LLM-ready
|
||||
data with Firecrawl\\\"\\\\\\n },\\\\\\n {\\\\\\n \\\"url\\\":
|
||||
\\\"https://docs.firecrawl.dev\\\",\\\\\\n \\\"title\\\": \\\"Documentation
|
||||
| Firecrawl\\\",\\\\\\n \\\"description\\\": \\\"Learn how to use Firecrawl
|
||||
in your own applications\\\"\\\\\\n }\\\\\\n ]\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n####
|
||||
With content scraping\\\\\\n\\\\\\n[Permalink: With content scraping](https://github.com/firecrawl/firecrawl#with-content-scraping)\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/search \\\\\\\\\\n -H \\\"Content-Type:
|
||||
application/json\\\" \\\\\\\\\\n -H \\\"Authorization: Bearer fc-YOUR_API_KEY\\\"
|
||||
\\\\\\\\\\n -d '{\\\\\\n \\\"query\\\": \\\"what is firecrawl?\\\",\\\\\\n
|
||||
\ \\\"limit\\\": 5,\\\\\\n \\\"scrapeOptions\\\": {\\\\\\n \\\"formats\\\":
|
||||
[\\\"markdown\\\", \\\"links\\\"]\\\\\\n }\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n###
|
||||
Extract (Beta)\\\\\\n\\\\\\n[Permalink: Extract (Beta)](https://github.com/firecrawl/firecrawl#extract-beta)\\\\\\n\\\\\\nGet
|
||||
structured data from entire websites with a prompt and/or a schema.\\\\\\n\\\\\\nYou
|
||||
can extract structured data from one or multiple URLs, including wildcards:\\\\\\n\\\\\\nSingle
|
||||
Page:\\\\\\nExample: [https://firecrawl.dev/some-page](https://firecrawl.dev/some-page)\\\\\\n\\\\\\nMultiple
|
||||
Pages / Full Domain\\\\\\nExample: [https://firecrawl.dev/](https://firecrawl.dev/)\\\\*\\\\\\n\\\\\\nWhen
|
||||
you use /\\\\*, Firecrawl will automatically crawl and parse all URLs it can
|
||||
discover in that domain, then extract the requested data.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/extract \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"urls\\\": [\\\\\\n \\\"https://firecrawl.dev/*\\\",\\\\\\n
|
||||
\ \\\"https://docs.firecrawl.dev/\\\",\\\\\\n \\\"https://www.ycombinator.com/companies\\\"\\\\\\n
|
||||
\ ],\\\\\\n \\\"prompt\\\": \\\"Extract the company mission, whether
|
||||
it is open source, and whether it is in Y Combinator from the page.\\\",\\\\\\n
|
||||
\ \\\"schema\\\": {\\\\\\n \\\"type\\\": \\\"object\\\",\\\\\\n
|
||||
\ \\\"properties\\\": {\\\\\\n \\\"company_mission\\\": {\\\\\\n
|
||||
\ \\\"type\\\": \\\"string\\\"\\\\\\n },\\\\\\n \\\"is_open_source\\\":
|
||||
{\\\\\\n \\\"type\\\": \\\"boolean\\\"\\\\\\n },\\\\\\n
|
||||
\ \\\"is_in_yc\\\": {\\\\\\n \\\"type\\\": \\\"boolean\\\"\\\\\\n
|
||||
\ }\\\\\\n },\\\\\\n \\\"required\\\": [\\\\\\n \\\"company_mission\\\",\\\\\\n
|
||||
\ \\\"is_open_source\\\",\\\\\\n \\\"is_in_yc\\\"\\\\\\n
|
||||
\ ]\\\\\\n }\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"id\\\": \\\"44aa536d-f1cb-4706-ab87-ed0386685740\\\",\\\\\\n
|
||||
\ \\\"urlTrace\\\": []\\\\\\n}\\\\\\n```\\\\\\n\\\\\\nIf you are using the
|
||||
sdks, it will auto pull the response for you:\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"data\\\": {\\\\\\n \\\"company_mission\\\":
|
||||
\\\"Firecrawl is the easiest way to extract data from the web. Developers
|
||||
use us to reliably convert URLs into LLM-ready markdown or structured data
|
||||
with a single API call.\\\",\\\\\\n \\\"supports_sso\\\": false,\\\\\\n
|
||||
\ \\\"is_open_source\\\": true,\\\\\\n \\\"is_in_yc\\\": true\\\\\\n
|
||||
\ }\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n### LLM Extraction (Beta)\\\\\\n\\\\\\n[Permalink:
|
||||
LLM Extraction (Beta)](https://github.com/firecrawl/firecrawl#llm-extraction-beta)\\\\\\n\\\\\\nUsed
|
||||
to extract structured data from scraped pages.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"https://www.mendable.ai/\\\",\\\\\\n \\\"formats\\\":
|
||||
[\\\\\\n {\\\\\\n \\\"type\\\": \\\"json\\\",\\\\\\n \\\"schema\\\":
|
||||
{\\\\\\n \\\"type\\\": \\\"object\\\",\\\\\\n \\\"properties\\\":
|
||||
{\\\\\\n \\\"company_mission\\\": { \\\"type\\\": \\\"string\\\"
|
||||
},\\\\\\n \\\"supports_sso\\\": { \\\"type\\\": \\\"boolean\\\"
|
||||
},\\\\\\n \\\"is_open_source\\\": { \\\"type\\\": \\\"boolean\\\"
|
||||
},\\\\\\n \\\"is_in_yc\\\": { \\\"type\\\": \\\"boolean\\\" }\\\\\\n
|
||||
\ }\\\\\\n }\\\\\\n }\\\\\\n ]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"data\\\": {\\\\\\n \\\"content\\\":
|
||||
\\\"Raw Content\\\",\\\\\\n \\\"metadata\\\": {\\\\\\n \\\"title\\\":
|
||||
\\\"Mendable\\\",\\\\\\n \\\"description\\\": \\\"Mendable allows you
|
||||
to easily build AI chat applications. Ingest, customize, then deploy with
|
||||
one line of code anywhere you want. Brought to you by SideGuide\\\",\\\\\\n
|
||||
\ \\\"robots\\\": \\\"follow, index\\\",\\\\\\n \\\"ogTitle\\\":
|
||||
\\\"Mendable\\\",\\\\\\n \\\"ogDescription\\\": \\\"Mendable allows you
|
||||
to easily build AI chat applications. Ingest, customize, then deploy with
|
||||
one line of code anywhere you want. Brought to you by SideGuide\\\",\\\\\\n
|
||||
\ \\\"ogUrl\\\": \\\"https://mendable.ai/\\\",\\\\\\n \\\"ogImage\\\":
|
||||
\\\"https://mendable.ai/mendable_new_og1.png\\\",\\\\\\n \\\"ogLocaleAlternate\\\":
|
||||
[],\\\\\\n \\\"ogSiteName\\\": \\\"Mendable\\\",\\\\\\n \\\"sourceURL\\\":
|
||||
\\\"https://mendable.ai/\\\"\\\\\\n },\\\\\\n \\\"json\\\": {\\\\\\n
|
||||
\ \\\"company_mission\\\": \\\"Train a secure AI on your technical resources
|
||||
that answers customer and employee questions so your team doesn't have to\\\",\\\\\\n
|
||||
\ \\\"supports_sso\\\": true,\\\\\\n \\\"is_open_source\\\": false,\\\\\\n
|
||||
\ \\\"is_in_yc\\\": true\\\\\\n }\\\\\\n }\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n###
|
||||
Extracting without a schema (New)\\\\\\n\\\\\\n[Permalink: Extracting without
|
||||
a schema (New)](https://github.com/firecrawl/firecrawl#extracting-without-a-schema-new)\\\\\\n\\\\\\nYou
|
||||
can now extract without a schema by just passing a `prompt` to the endpoint.
|
||||
The llm chooses the structure of the data.\\\\\\n\\\\\\n```\\\\\\ncurl -X
|
||||
POST https://api.firecrawl.dev/v2/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"https://docs.firecrawl.dev/\\\",\\\\\\n
|
||||
\ \\\"formats\\\": [\\\\\\n {\\\\\\n \\\"type\\\": \\\"json\\\",\\\\\\n
|
||||
\ \\\"prompt\\\": \\\"Extract the company mission from the page.\\\"\\\\\\n
|
||||
\ }\\\\\\n ]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n### Interacting
|
||||
with the page with Actions (Cloud-only)\\\\\\n\\\\\\n[Permalink: Interacting
|
||||
with the page with Actions (Cloud-only)](https://github.com/firecrawl/firecrawl#interacting-with-the-page-with-actions-cloud-only)\\\\\\n\\\\\\nFirecrawl
|
||||
allows you to perform various actions on a web page before scraping its content.
|
||||
This is particularly useful for interacting with dynamic content, navigating
|
||||
through pages, or accessing content that requires user interaction.\\\\\\n\\\\\\nHere
|
||||
is an example of how to use actions to navigate to google.com, search for
|
||||
Firecrawl, click on the first result, and take a screenshot.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"google.com\\\",\\\\\\n \\\"formats\\\":
|
||||
[\\\"markdown\\\"],\\\\\\n \\\"actions\\\": [\\\\\\n {\\\"type\\\":
|
||||
\\\"wait\\\", \\\"milliseconds\\\": 2000},\\\\\\n {\\\"type\\\":
|
||||
\\\"click\\\", \\\"selector\\\": \\\"textarea[title=\\\\\\\"Search\\\\\\\"]\\\"},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"wait\\\", \\\"milliseconds\\\": 2000},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"write\\\", \\\"text\\\": \\\"firecrawl\\\"},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"wait\\\", \\\"milliseconds\\\": 2000},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"press\\\", \\\"key\\\": \\\"ENTER\\\"},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"wait\\\", \\\"milliseconds\\\": 3000},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"click\\\", \\\"selector\\\": \\\"h3\\\"},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"wait\\\", \\\"milliseconds\\\": 3000},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"screenshot\\\"}\\\\\\n ]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n###
|
||||
Batch Scraping Multiple URLs (New)\\\\\\n\\\\\\n[Permalink: Batch Scraping
|
||||
Multiple URLs (New)](https://github.com/firecrawl/firecrawl#batch-scraping-multiple-urls-new)\\\\\\n\\\\\\nYou
|
||||
can now batch scrape multiple URLs at the same time. It is very similar to
|
||||
how the /crawl endpoint works. It submits a batch scrape job and returns a
|
||||
job ID to check the status of the batch scrape.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/batch/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"urls\\\": [\\\"https://docs.firecrawl.dev\\\", \\\"https://docs.firecrawl.dev/sdks/overview\\\"],\\\\\\n
|
||||
\ \\\"formats\\\" : [\\\"markdown\\\", \\\"html\\\"]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n##
|
||||
Using Python SDK\\\\\\n\\\\\\n[Permalink: Using Python SDK](https://github.com/firecrawl/firecrawl#using-python-sdk)\\\\\\n\\\\\\n###
|
||||
Installing Python SDK\\\\\\n\\\\\\n[Permalink: Installing Python SDK](https://github.com/firecrawl/firecrawl#installing-python-sdk)\\\\\\n\\\\\\n```\\\\\\npip
|
||||
install firecrawl-py\\\\\\n```\\\\\\n\\\\\\n### Crawl a website\\\\\\n\\\\\\n[Permalink:
|
||||
Crawl a website](https://github.com/firecrawl/firecrawl#crawl-a-website)\\\\\\n\\\\\\n```\\\\\\nfrom
|
||||
firecrawl import Firecrawl\\\\\\n\\\\\\nfirecrawl = Firecrawl(api_key=\\\"fc-YOUR_API_KEY\\\")\\\\\\n\\\\\\n#
|
||||
Scrape a website (returns a Document)\\\\\\ndoc = firecrawl.scrape(\\\\\\n
|
||||
\ \\\"https://firecrawl.dev\\\",\\\\\\n formats=[\\\"markdown\\\", \\\"html\\\"],\\\\\\n)\\\\\\nprint(doc.markdown)\\\\\\n\\\\\\n#
|
||||
Crawl a website\\\\\\nresponse = firecrawl.crawl(\\\\\\n \\\"https://firecrawl.dev\\\",\\\\\\n
|
||||
\ limit=100,\\\\\\n scrape_options={\\\"formats\\\": [\\\"markdown\\\",
|
||||
\\\"html\\\"]},\\\\\\n poll_interval=30,\\\\\\n)\\\\\\nprint(response)\\\\\\n```\\\\\\n\\\\\\n###
|
||||
Extracting structured data from a URL\\\\\\n\\\\\\n[Permalink: Extracting
|
||||
structured data from a URL](https://github.com/firecrawl/firecrawl#extracting-structured-data-from-a-url)\\\\\\n\\\\\\nWith
|
||||
LLM extraction, you can easily extract structured data from any URL. We support
|
||||
pydantic schemas to make it easier for you too. Here is how you to use it:\\\\\\n\\\\\\n```\\\\\\nfrom
|
||||
pydantic import BaseModel, Field\\\\\\nfrom typing import List\\\\\\n\\\\\\nclass
|
||||
Article(BaseModel):\\\\\\n title: str\\\\\\n points: int\\\\\\n by:
|
||||
str\\\\\\n commentsURL: str\\\\\\n\\\\\\nclass TopArticles(BaseModel):\\\\\\n
|
||||
\ top: List[Article] = Field(..., description=\\\"Top 5 stories\\\")\\\\\\n\\\\\\n#
|
||||
Use JSON format with a Pydantic schema\\\\\\ndoc = firecrawl.scrape(\\\\\\n
|
||||
\ \\\"https://news.ycombinator.com\\\",\\\\\\n formats=[{\\\"type\\\":
|
||||
\\\"json\\\", \\\"schema\\\": TopArticles}],\\\\\\n)\\\\\\nprint(doc.json)\\\\\\n```\\\\\\n\\\\\\n##
|
||||
Using the Node SDK\\\\\\n\\\\\\n[Permalink: Using the Node SDK](https://github.com/firecrawl/firecrawl#using-the-node-sdk)\\\\\\n\\\\\\n###
|
||||
Installation\\\\\\n\\\\\\n[Permalink: Installation](https://github.com/firecrawl/firecrawl#installation)\\\\\\n\\\\\\nTo
|
||||
install the Firecrawl Node SDK, you can use npm:\\\\\\n\\\\\\n```\\\\\\nnpm
|
||||
install @mendable/firecrawl-js\\\\\\n```\\\\\\n\\\\\\n### Usage\\\\\\n\\\\\\n[Permalink:
|
||||
Usage](https://github.com/firecrawl/firecrawl#usage)\\\\\\n\\\\\\n1. Get an
|
||||
API key from [firecrawl.dev](https://firecrawl.dev/)\\\\\\n2. Set the API
|
||||
key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter
|
||||
to the `Firecrawl` class.\\\\\\n\\\\\\n```\\\\\\nimport Firecrawl from '@mendable/firecrawl-js';\\\\\\n\\\\\\nconst
|
||||
firecrawl = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });\\\\\\n\\\\\\n//
|
||||
Scrape a website\\\\\\nconst doc = await firecrawl.scrape('https://firecrawl.dev',
|
||||
{\\\\\\n formats: ['markdown', 'html'],\\\\\\n});\\\\\\nconsole.log(doc);\\\\\\n\\\\\\n//
|
||||
Crawl a website\\\\\\nconst response = await firecrawl.crawl('https://firecrawl.dev',
|
||||
{\\\\\\n limit: 100,\\\\\\n scrapeOptions: { formats: ['markdown', 'html']
|
||||
},\\\\\\n});\\\\\\nconsole.log(response);\\\\\\n```\\\\\\n\\\\\\n### Extracting
|
||||
structured data from a URL\\\\\\n\\\\\\n[Permalink: Extracting structured
|
||||
data from a URL](https://github.com/firecrawl/firecrawl#extracting-structured-data-from-a-url-1)\\\\\\n\\\\\\nWith
|
||||
LLM extraction, you can easily extract structured data from any URL. We support
|
||||
zod schema to make it easier for you too. Here is how to use it:\\\\\\n\\\\\\n```\\\\\\nimport
|
||||
Firecrawl from '@mendable/firecrawl-js';\\\\\\nimport { z } from 'zod';\\\\\\n\\\\\\nconst
|
||||
firecrawl = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });\\\\\\n\\\\\\n//
|
||||
Define schema to extract contents into\\\\\\nconst schema = z.object({\\\\\\n
|
||||
\ top: z\\\\\\n .array(\\\\\\n z.object({\\\\\\n title: z.string(),\\\\\\n
|
||||
\ points: z.number(),\\\\\\n by: z.string(),\\\\\\n commentsURL:
|
||||
z.string(),\\\\\\n })\\\\\\n )\\\\\\n .length(5)\\\\\\n .describe('Top
|
||||
5 stories on Hacker News'),\\\\\\n});\\\\\\n\\\\\\n// Use the v2 extract API
|
||||
with direct Zod schema support\\\\\\nconst extractRes = await firecrawl.extract({\\\\\\n
|
||||
\ urls: ['https://news.ycombinator.com'],\\\\\\n schema,\\\\\\n prompt:
|
||||
'Extract the top 5 stories',\\\\\\n});\\\\\\n\\\\\\nconsole.log(extractRes);\\\\\\n```\\\\\\n\\\\\\n##
|
||||
Open Source vs Cloud Offering\\\\\\n\\\\\\n[Permalink: Open Source vs Cloud
|
||||
Offering](https://github.com/firecrawl/firecrawl#open-source-vs-cloud-offering)\\\\\\n\\\\\\nFirecrawl
|
||||
is open source available under the AGPL-3.0 license.\\\\\\n\\\\\\nTo deliver
|
||||
the best possible product, we offer a hosted version of Firecrawl alongside
|
||||
our open-source offering. The cloud solution allows us to continuously innovate
|
||||
and maintain a high-quality, sustainable service for all users.\\\\\\n\\\\\\nFirecrawl
|
||||
Cloud is available at [firecrawl.dev](https://firecrawl.dev/) and offers a
|
||||
range of features that are not available in the open source version:\\\\\\n\\\\\\n[](https://raw.githubusercontent.com/firecrawl/firecrawl/main/img/open-source-cloud.png)\\\\\\n\\\\\\n##
|
||||
Contributing\\\\\\n\\\\\\n[Permalink: Contributing](https://github.com/firecrawl/firecrawl#contributing)\\\\\\n\\\\\\nWe
|
||||
love contributions! Please read our [contributing guide](https://github.com/firecrawl/firecrawl/blob/main/CONTRIBUTING.md)
|
||||
before submitting a pull request. If you'd like to self-host, refer to the
|
||||
[self-hosting guide](https://github.com/firecrawl/firecrawl/blob/main/SELF_HOST.md).\\\\\\n\\\\\\n_It
|
||||
is the sole responsibility of the end users to respect websites' policies
|
||||
when scraping, searching and crawling with Firecrawl. Users are advised to
|
||||
adhere to the applicable privacy policies and terms of use of the websites
|
||||
prior to initiating any scraping activities. By default, Firecrawl respects
|
||||
the directives specified in the websites' robots.txt files when crawling.
|
||||
By utilizing Firecrawl, you expressly agree to comply with these conditions._\\\\\\n\\\\\\n##
|
||||
Contributors\\\\\\n\\\\\\n[Permalink: Contributors](https://github.com/firecrawl/firecrawl#contributors)\\\\\\n\\\\\\n[](https://github.com/firecrawl/firecrawl/graphs/contributors)\\\\\\n\\\\\\n##
|
||||
License Disclaimer\\\\\\n\\\\\\n[Permalink: License Disclaimer](https://github.com/firecrawl/firecrawl#license-disclaimer)\\\\\\n\\\\\\nThis
|
||||
project is primarily licensed under the GNU Affero General Public License
|
||||
v3.0 (AGPL-3.0), as specified in the LICENSE file in the root directory of
|
||||
this repository. However, certain components of this project are licensed
|
||||
under the MIT License. Refer to the LICENSE files in these specific directories
|
||||
for details.\\\\\\n\\\\\\nPlease note:\\\\\\n\\\\\\n- The AGPL-3.0 license
|
||||
applies to all parts of the project unless otherwise specified.\\\\\\n- The
|
||||
SDKs and some UI components are licensed under the MIT License. Refer to the
|
||||
LICENSE files in these specific directories for details.\\\\\\n- When using
|
||||
or contributing to this project, ensure you comply with the appropriate license
|
||||
terms for the specific component you are working with.\\\\\\n\\\\\\nFor more
|
||||
details on the licensing of specific components, please refer to the LICENSE
|
||||
files in the respective directories or contact the project maintainers.\\\\\\n\\\\\\n[\u2191
|
||||
Back to Top \u2191](https://github.com/firecrawl/firecrawl#readme-top)\\\\\\n\\\\\\n##
|
||||
About\\\\\\n\\\\\\n\U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\\\\\\n\\\\\\n\\\\\\n[firecrawl.dev](https://firecrawl.dev/
|
||||
\\\"https://firecrawl.dev\\\")\\\\\\n\\\\\\n### Topics\\\\\\n\\\\\\n[markdown](https://github.com/topics/markdown
|
||||
\\\"Topic: markdown\\\") [crawler](https://github.com/topics/crawler \\\"Topic:
|
||||
crawler\\\") [scraper](https://github.com/topics/scraper \\\"Topic: scraper\\\")
|
||||
[ai](https://github.com/topics/ai \\\"Topic: ai\\\") [html-to-markdown](https://github.com/topics/html-to-markdown
|
||||
\\\"Topic: html-to-markdown\\\") [web-crawler](https://github.com/topics/web-crawler
|
||||
\\\"Topic: web-crawler\\\") [scraping](https://github.com/topics/scraping
|
||||
\\\"Topic: scraping\\\") [web-scraper](https://github.com/topics/web-scraper
|
||||
\\\"Topic: web-scraper\\\") [web-scraping](https://github.com/topics/web-scraping
|
||||
\\\"Topic: web-scraping\\\") [data-extraction](https://github.com/topics/data-extraction
|
||||
\\\"Topic: data-extraction\\\") [webscraping](https://github.com/topics/webscraping
|
||||
\\\"Topic: webscraping\\\") [web-data-extraction](https://github.com/topics/web-data-extraction
|
||||
\\\"Topic: web-data-extraction\\\") [ai-agents](https://github.com/topics/ai-agents
|
||||
\\\"Topic: ai-agents\\\") [web-search](https://github.com/topics/web-search
|
||||
\\\"Topic: web-search\\\") [ai-search](https://github.com/topics/ai-search
|
||||
\\\"Topic: ai-search\\\") [web-data](https://github.com/topics/web-data \\\"Topic:
|
||||
web-data\\\") [llm](https://github.com/topics/llm \\\"Topic: llm\\\") [ai-crawler](https://github.com/topics/ai-crawler
|
||||
\\\"Topic: ai-crawler\\\") [ai-scraping](https://github.com/topics/ai-scraping
|
||||
\\\"Topic: ai-scraping\\\")\\\\\\n\\\\\\n### Resources\\\\\\n\\\\\\n[Readme](https://github.com/firecrawl/firecrawl#readme-ov-file)\\\\\\n\\\\\\n###
|
||||
License\\\\\\n\\\\\\n[AGPL-3.0 license](https://github.com/firecrawl/firecrawl#AGPL-3.0-1-ov-file)\\\\\\n\\\\\\n###
|
||||
Contributing\\\\\\n\\\\\\n[Contributing](https://github.com/firecrawl/firecrawl#contributing-ov-file)\\\\\\n\\\\\\n###
|
||||
Uh oh!\\\\\\n\\\\\\nThere was an error while loading. [Please reload this
|
||||
page](https://github.com/firecrawl/firecrawl).\\\\\\n\\\\\\n[Activity](https://github.com/firecrawl/firecrawl/activity)\\\\\\n\\\\\\n[Custom
|
||||
properties](https://github.com/firecrawl/firecrawl/custom-properties)\\\\\\n\\\\\\n###
|
||||
Stars\\\\\\n\\\\\\n[**65.2k**\\\\\\\\\\nstars](https://github.com/firecrawl/firecrawl/stargazers)\\\\\\n\\\\\\n###
|
||||
Watchers\\\\\\n\\\\\\n[**256**\\\\\\\\\\nwatching](https://github.com/firecrawl/firecrawl/watchers)\\\\\\n\\\\\\n###
|
||||
Forks\\\\\\n\\\\\\n[**5.1k**\\\\\\\\\\nforks](https://github.com/firecrawl/firecrawl/forks)\\\\\\n\\\\\\n[Report
|
||||
repository](https://github.com/contact/report-content?content_url=https%3A%2F%2Fgithub.com%2Ffirecrawl%2Ffirecrawl&report=firecrawl+%28user%29)\\\\\\n\\\\\\n##
|
||||
[Releases\\\\ 28](https://github.com/firecrawl/firecrawl/releases)\\\\\\n\\\\\\n[v2.4.0\\\\\\\\\\nLatest\\\\\\\\\\n\\\\\\\\\\n2
|
||||
weeks agoOct 13, 2025](https://github.com/firecrawl/firecrawl/releases/tag/v2.4.0)\\\\\\n\\\\\\n[\\\\+
|
||||
27 releases](https://github.com/firecrawl/firecrawl/releases)\\\\\\n\\\\\\n##
|
||||
[Packages\\\\ 3](https://github.com/orgs/firecrawl/packages?repo_name=firecrawl)\\\\\\n\\\\\\n-
|
||||
[firecrawl](https://github.com/orgs/firecrawl/packages/container/package/firecrawl)\\\\\\n-
|
||||
[playwright-service](https://github.com/orgs/firecrawl/packages/container/package/playwright-service)\\\\\\n-
|
||||
[nuq-postgres](https://github.com/orgs/firecrawl/packages/container/package/nuq-postgres)\\\\\\n\\\\\\n##
|
||||
[Contributors\\\\ 121](https://github.com/firecrawl/firecrawl/graphs/contributors)\\\\\\n\\\\\\n[\\\\+
|
||||
107 contributors](https://github.com/firecrawl/firecrawl/graphs/contributors)\\\\\\n\\\\\\n##
|
||||
Languages\\\\\\n\\\\\\n- [TypeScript73.5%](https://github.com/firecrawl/firecrawl/search?l=typescript)\\\\\\n-
|
||||
[Python18.9%](https://github.com/firecrawl/firecrawl/search?l=python)\\\\\\n-
|
||||
[Rust6.0%](https://github.com/firecrawl/firecrawl/search?l=rust)\\\\\\n- [Astro0.6%](https://github.com/firecrawl/firecrawl/search?l=astro)\\\\\\n-
|
||||
[JavaScript0.3%](https://github.com/firecrawl/firecrawl/search?l=javascript)\\\\\\n-
|
||||
[Jupyter Notebook0.2%](https://github.com/firecrawl/firecrawl/search?l=jupyter-notebook)\\\\\\n-
|
||||
Other0.5%\",\"metadata\":{\"octolytics-dimension-repository_network_root_id\":\"787076358\",\"visitor-hmac\":\"163b2538b2335f7d4000a770785477e15881e1101708ca5ff1e8c021095f6ca1\",\"og:type\":\"object\",\"language\":\"en\",\"route-action\":\"disambiguate\",\"og:title\":\"GitHub
|
||||
- firecrawl/firecrawl: \U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\",\"octolytics-dimension-repository_public\":\"true\",\"octolytics-dimension-repository_network_root_nwo\":\"firecrawl/firecrawl\",\"browser-errors-url\":\"https://api.github.com/_private/browser/errors\",\"browser-stats-url\":\"https://api.github.com/_private/browser/stats\",\"twitter:title\":\"GitHub
|
||||
- firecrawl/firecrawl: \U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\",\"ui-target\":\"full\",\"og:image\":\"https://repository-images.githubusercontent.com/787076358/f9616c09-3701-41ef-b5a6-fdf912ffb15b\",\"google-site-verification\":\"Apib7-x98H0j5cPqHWwSMm6dNU4GmODRoqxLiDzdx9I\",\"ogSiteName\":\"GitHub\",\"route-pattern\":\"/:user_id/:repository\",\"visitor-payload\":\"eyJyZWZlcnJlciI6IiIsInJlcXVlc3RfaWQiOiJBMTVGOjE3OUI0RDo2MzdFQUREOjg3MzEwOTM6NjkwMTE4MjUiLCJ2aXNpdG9yX2lkIjoiNDkyNzk2MzExNjg5OTIxMTMwMSIsInJlZ2lvbl9lZGdlIjoiaWFkIiwicmVnaW9uX3JlbmRlciI6ImlhZCJ9\",\"og:description\":\"\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data - firecrawl/firecrawl\",\"expected-hostname\":\"github.com\",\"release\":\"66136a30a16cc69206f1249b6ba072daa2174535\",\"title\":\"GitHub
|
||||
- firecrawl/firecrawl: \U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\",\"ogDescription\":\"\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data - firecrawl/firecrawl\",\"twitter:card\":\"summary_large_image\",\"fb:app_id\":\"1401488693436528\",\"color-scheme\":\"light
|
||||
dark\",\"twitter:description\":\"\U0001F525 The Web Data API for AI - Turn
|
||||
entire websites into LLM-ready markdown or structured data - firecrawl/firecrawl\",\"favicon\":\"https://github.githubassets.com/favicons/favicon.svg\",\"viewport\":\"width=device-width\",\"twitter:image\":\"https://repository-images.githubusercontent.com/787076358/f9616c09-3701-41ef-b5a6-fdf912ffb15b\",\"user-login\":\"\",\"description\":\"\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data - firecrawl/firecrawl\",\"octolytics-dimension-repository_nwo\":\"firecrawl/firecrawl\",\"octolytics-dimension-user_id\":\"135057108\",\"twitter:site\":\"@github\",\"og:url\":\"https://github.com/firecrawl/firecrawl\",\"octolytics-dimension-user_login\":\"firecrawl\",\"hostname\":\"github.com\",\"current-catalog-service-hash\":\"f3abb0cc802f3d7b95fc8762b94bdcb13bf39634c40c357301c4aa1d67a256fb\",\"html-safe-nonce\":\"e5653da3800db7de2c8b4a64ff6367242043a9e452608e9a6941a1c9e8346cfc\",\"apple-itunes-app\":\"app-id=1477376905,
|
||||
app-argument=https://github.com/firecrawl/firecrawl\",\"turbo-cache-control\":\"no-preview\",\"og:site_name\":\"GitHub\",\"request-id\":\"A15F:179B4D:637EADD:8731093:69011825\",\"octolytics-url\":\"https://collector.github.com/github/collect\",\"octolytics-dimension-repository_is_fork\":\"false\",\"fetch-nonce\":\"v2:bc80e85b-edaf-e746-9f2c-ffdc75854b07\",\"og:image:alt\":\"\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data - firecrawl/firecrawl\",\"github-keyboard-shortcuts\":\"repository,copilot\",\"ogTitle\":\"GitHub
|
||||
- firecrawl/firecrawl: \U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\",\"ogImage\":\"https://repository-images.githubusercontent.com/787076358/f9616c09-3701-41ef-b5a6-fdf912ffb15b\",\"analytics-location\":\"/<user-name>/<repo-name>\",\"route-controller\":\"files\",\"octolytics-dimension-repository_id\":\"787076358\",\"ogUrl\":\"https://github.com/firecrawl/firecrawl\",\"go-import\":\"github.com/firecrawl/firecrawl
|
||||
git https://github.com/firecrawl/firecrawl.git\",\"hovercard-subject-tag\":\"repository:787076358\",\"theme-color\":\"#1e2327\",\"turbo-body-classes\":\"logged-out
|
||||
env-production page-responsive\",\"scrapeId\":\"ec4d99a0-4c4f-4d1a-9fd2-08b8f891f883\",\"sourceURL\":\"https://github.com/firecrawl/firecrawl\",\"url\":\"https://github.com/firecrawl/firecrawl\",\"statusCode\":200,\"contentType\":\"text/html;
|
||||
charset=utf-8\",\"proxyUsed\":\"basic\",\"cacheState\":\"hit\",\"cachedAt\":\"2025-10-28T19:23:20.106Z\"}},{\"url\":\"https://x.com/firecrawl_dev?lang=en\",\"title\":\"Firecrawl
|
||||
(@firecrawl_dev) / Posts / X\",\"description\":\"Firecrawl (@firecrawl_dev)
|
||||
- Posts - Turn websites into LLM-ready data. Built by @mendableai team Open
|
||||
source: | X (formerly Twitter)\",\"position\":3},{\"url\":\"https://github.com/firecrawl\",\"title\":\"Firecrawl
|
||||
- GitHub\",\"description\":\"Building AI applications? You need clean, structured
|
||||
data from the web. Firecrawl handles the complexity of modern web scraping
|
||||
so you can focus on building ...\",\"position\":4,\"category\":\"github\",\"markdown\":\"[Skip
|
||||
to content](https://github.com/firecrawl#start-of-content)\\n\\nYou signed
|
||||
in with another tab or window. [Reload](https://github.com/firecrawl) to refresh
|
||||
your session.You signed out in another tab or window. [Reload](https://github.com/firecrawl)
|
||||
to refresh your session.You switched accounts on another tab or window. [Reload](https://github.com/firecrawl)
|
||||
to refresh your session.Dismiss alert\\n\\n{{ message }}\\n\\n[README.md](https://github.com/firecrawl/.github/tree/main/profile/README.md)\\n\\n#
|
||||
\U0001F525 Firecrawl\\n\\n[Permalink: \U0001F525 Firecrawl](https://github.com/firecrawl#-firecrawl)\\n\\n[](https://raw.githubusercontent.com/mendableai/firecrawl/main/img/firecrawl_logo.png)\\n\\n###
|
||||
Transform any website into LLM-ready data\\n\\n[Permalink: Transform any website
|
||||
into LLM-ready data](https://github.com/firecrawl#transform-any-website-into-llm-ready-data)\\n\\nAdvanced
|
||||
web scraping, crawling, and data extraction infrastructure for AI applications\\n\\n[](https://firecrawl.dev/)
|
||||
[](https://docs.firecrawl.dev/)
|
||||
[](https://discord.com/invite/gSmWdAkdwd)\\n\\n[](https://github.com/mendableai/firecrawl/blob/main/LICENSE)[](https://github.com/mendableai/firecrawl/stargazers)[](https://pepy.tech/project/firecrawl-py)[](https://x.com/firecrawl_dev)\\n\\n*
|
||||
* *\\n\\n## Why Firecrawl?\\n\\n[Permalink: Why Firecrawl?](https://github.com/firecrawl#why-firecrawl)\\n\\n**Building
|
||||
AI applications?** You need clean, structured data from the web. Firecrawl
|
||||
handles the complexity of modern web scraping so you can focus on building
|
||||
great products.\\n\\n## Our Core Ecosystem\\n\\n[Permalink: Our Core Ecosystem](https://github.com/firecrawl#our-core-ecosystem)\\n\\n###
|
||||
Main Repository\\n\\n[Permalink: Main Repository](https://github.com/firecrawl#main-repository)\\n\\n[](https://github.com/mendableai/firecrawl)\\n\\n**[firecrawl](https://github.com/mendableai/firecrawl)**
|
||||
\\\\- Core API & SDK\\n\\nTurn entire websites into LLM-ready markdown or
|
||||
structured data. Our flagship product with 40k+ stars.\\n\\n### Cloud API\\n\\n[Permalink:
|
||||
Cloud API](https://github.com/firecrawl#cloud-api)\\n\\n[](https://firecrawl.dev/)\\n\\n**[Firecrawl](https://firecrawl.dev/)**
|
||||
\\\\- Hosted API Service\\n\\nProduction-ready web scraping without infrastructure
|
||||
management. Get your API key and start scraping in minutes with our reliable,
|
||||
scalable cloud service.\\n\\n### MCP Integration\\n\\n[Permalink: MCP Integration](https://github.com/firecrawl#mcp-integration)\\n\\n[](https://github.com/mendableai/firecrawl-mcp-server)\\n\\n**[firecrawl-mcp-server](https://github.com/mendableai/firecrawl-mcp-server)**
|
||||
\\\\- Model Context Protocol Server\\n\\nAdd powerful web scraping capabilities
|
||||
to Claude, Cursor, and any MCP-compatible LLM client.\\n\\n## Community &
|
||||
Support\\n\\n[Permalink: Community & Support](https://github.com/firecrawl#community--support)\\n\\n[](https://discord.com/invite/gSmWdAkdwd)[](https://x.com/firecrawl_dev)[](https://www.linkedin.com/company/104100957/)[](https://github.com/mendableai/firecrawl/discussions)[](https://docs.firecrawl.dev/)\\n\\n##
|
||||
Built By Mendable\\n\\n[Permalink: Built By Mendable](https://github.com/firecrawl#built-by-mendable)\\n\\nWe're
|
||||
the team behind [Mendable.ai](https://mendable.ai/), passionate about making
|
||||
web data accessible for AI applications. Firecrawl powers thousands of AI
|
||||
products worldwide.\\n\\n* * *\\n\\n**Ready to build something amazing?**\\n\\n[Get
|
||||
your API key](https://firecrawl.dev/) and start scraping in minutes\\n\\n\\n[Star
|
||||
our main repo](https://github.com/mendableai/firecrawl) \u2022\\n[Try the
|
||||
playground](https://firecrawl.dev/playground) \u2022\\n[Read the docs](https://docs.firecrawl.dev/)\\n\\n##
|
||||
Pinned Loading\\n\\n1. [firecrawl](https://github.com/firecrawl/firecrawl)
|
||||
firecrawlPublic\\n\\n\\n\\n\\n\\n\\n\U0001F525 The Web Data API for AI - Turn
|
||||
entire websites into LLM-ready markdown or structured data\\n\\n\\n\\n\\nTypeScript[64.9k](https://github.com/firecrawl/firecrawl/stargazers)
|
||||
[5.1k](https://github.com/firecrawl/firecrawl/forks)\\n\\n2. [mendable-nextjs-chatbot](https://github.com/firecrawl/mendable-nextjs-chatbot)
|
||||
mendable-nextjs-chatbotPublic template\\n\\n\\n\\n\\n\\n\\nNext.js Starter
|
||||
Template for building chatbots with Mendable\\n\\n\\n\\n\\nTypeScript[256](https://github.com/firecrawl/mendable-nextjs-chatbot/stargazers)
|
||||
[52](https://github.com/firecrawl/mendable-nextjs-chatbot/forks)\\n\\n3. [rag-arena](https://github.com/firecrawl/rag-arena)
|
||||
rag-arenaPublic\\n\\n\\n\\n\\n\\n\\nOpen-source RAG evaluation through users'
|
||||
feedback\\n\\n\\n\\n\\nTypeScript[206](https://github.com/firecrawl/rag-arena/stargazers)
|
||||
[32](https://github.com/firecrawl/rag-arena/forks)\\n\\n4. [QA\\\\_clustering](https://github.com/firecrawl/QA_clustering)
|
||||
QA\\\\_clusteringPublic\\n\\n\\n\\n\\n\\n\\nAnalyzing chat interactions w/
|
||||
LLMs to improve \U0001F99C\U0001F517 Langchain docs\\n\\n\\n\\n\\nJupyter
|
||||
Notebook[80](https://github.com/firecrawl/QA_clustering/stargazers) [12](https://github.com/firecrawl/QA_clustering/forks)\\n\\n5.
|
||||
[data-connectors](https://github.com/firecrawl/data-connectors) data-connectorsPublic\\n\\n\\n\\n\\n\\n\\nLLM-ready
|
||||
data connectors\\n\\n\\n\\n\\nTypeScript[95](https://github.com/firecrawl/data-connectors/stargazers)
|
||||
[23](https://github.com/firecrawl/data-connectors/forks)\\n\\n6. [mendable-py](https://github.com/firecrawl/mendable-py)
|
||||
mendable-pyPublic\\n\\n\\n\\n\\n\\n\\nBuild Production Ready LLM Chat Apps
|
||||
in Minutes\\n\\n\\n\\n\\nPython[33](https://github.com/firecrawl/mendable-py/stargazers)
|
||||
[7](https://github.com/firecrawl/mendable-py/forks)\\n\\n\\n### Repositories\\n\\nLoading\\n\\nType\\n\\nAllPublicSourcesForksArchivedMirrorsTemplates\\n\\nLanguage\\n\\nAllCSSGoJavaJavaScriptJupyter
|
||||
NotebookMDXPythonRustTypeScript\\n\\nSort\\n\\nLast updatedNameStars\\n\\nShowing
|
||||
10 of 61 repositories\\n\\n- [firecrawl](https://github.com/firecrawl/firecrawl)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data\\n\\n\\n\\n\\n\\n\\nfirecrawl/firecrawl\u2019s past year of
|
||||
commit activity\\n\\n\\n\\nTypeScript[64,949](https://github.com/firecrawl/firecrawl/stargazers)AGPL-3.0\\n[5,132](https://github.com/firecrawl/firecrawl/forks)
|
||||
[27](https://github.com/firecrawl/firecrawl/issues) [(2 issues need help)](https://github.com/firecrawl/firecrawl/issues?q=label%3A%22good+first+issue%22+is%3Aissue+is%3Aopen)
|
||||
[85](https://github.com/firecrawl/firecrawl/pulls)\\nUpdated 2 hours agoOct
|
||||
27, 2025\\n\\n- [firecrawl-docs](https://github.com/firecrawl/firecrawl-docs)\\nPublic\\n\\n\\n\\nDocumentation
|
||||
for Firecrawl.\\n\\n\\n\\n\\n\\n\\nfirecrawl/firecrawl-docs\u2019s past year
|
||||
of commit activity\\n\\n\\n\\nMDX[17](https://github.com/firecrawl/firecrawl-docs/stargazers)
|
||||
[35](https://github.com/firecrawl/firecrawl-docs/forks) [10](https://github.com/firecrawl/firecrawl-docs/issues)
|
||||
[5](https://github.com/firecrawl/firecrawl-docs/pulls)\\nUpdated 20 hours
|
||||
agoOct 26, 2025\\n\\n- [open-agent-builder](https://github.com/firecrawl/open-agent-builder)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
Visual workflow builder for AI agents powered by Firecrawl - drag-and-drop
|
||||
web scraping pipelines with real-time execution\\n\\n\\n\\n\\n\\n\\nfirecrawl/open-agent-builder\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nTypeScript[1,673](https://github.com/firecrawl/open-agent-builder/stargazers)
|
||||
[274](https://github.com/firecrawl/open-agent-builder/forks) [4](https://github.com/firecrawl/open-agent-builder/issues)
|
||||
[2](https://github.com/firecrawl/open-agent-builder/pulls)\\nUpdated last
|
||||
weekOct 20, 2025\\n\\n- [firecrawl-mcp-server](https://github.com/firecrawl/firecrawl-mcp-server)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
Official Firecrawl MCP Server - Adds powerful web scraping and search to Cursor,
|
||||
Claude and any other LLM clients.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n[**Uh
|
||||
oh!**](https://github.com/firecrawl/firecrawl-mcp-server/graphs/commit-activity)\\n\\n[There
|
||||
was an error while loading.](https://github.com/firecrawl/firecrawl-mcp-server/graphs/commit-activity)
|
||||
[Please reload this page](https://github.com/firecrawl).\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nfirecrawl/firecrawl-mcp-server\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nJavaScript[4,794](https://github.com/firecrawl/firecrawl-mcp-server/stargazers)MIT\\n[519](https://github.com/firecrawl/firecrawl-mcp-server/forks)
|
||||
[44](https://github.com/firecrawl/firecrawl-mcp-server/issues) [17](https://github.com/firecrawl/firecrawl-mcp-server/pulls)\\nUpdated
|
||||
last weekOct 19, 2025\\n\\n- [n8n-nodes-firecrawl](https://github.com/firecrawl/n8n-nodes-firecrawl)\\nPublic\\n\\n\\n\\nn8n
|
||||
node to interact with Firecrawl\\n\\n\\n\\n\\n\\n\\nfirecrawl/n8n-nodes-firecrawl\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nTypeScript[21](https://github.com/firecrawl/n8n-nodes-firecrawl/stargazers)MIT\\n[13](https://github.com/firecrawl/n8n-nodes-firecrawl/forks)
|
||||
[3](https://github.com/firecrawl/n8n-nodes-firecrawl/issues) [0](https://github.com/firecrawl/n8n-nodes-firecrawl/pulls)\\nUpdated
|
||||
2 weeks agoOct 17, 2025\\n\\n- [.github](https://github.com/firecrawl/.github)\\nPublic\\n\\n\\n\\n\\nfirecrawl/.github\u2019s
|
||||
past year of commit activity\\n\\n\\n\\n0\\n[1](https://github.com/firecrawl/.github/forks)
|
||||
[0](https://github.com/firecrawl/.github/issues) [0](https://github.com/firecrawl/.github/pulls)\\nUpdated
|
||||
2 weeks agoOct 12, 2025\\n\\n- [fire-enrich](https://github.com/firecrawl/fire-enrich)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
AI-powered data enrichment tool that transforms emails into rich datasets
|
||||
with company profiles, funding data, tech stacks, and more using Firecrawl
|
||||
and multi-agent AI\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n[**Uh oh!**](https://github.com/firecrawl/fire-enrich/graphs/commit-activity)\\n\\n[There
|
||||
was an error while loading.](https://github.com/firecrawl/fire-enrich/graphs/commit-activity)
|
||||
[Please reload this page](https://github.com/firecrawl).\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nfirecrawl/fire-enrich\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nTypeScript[953](https://github.com/firecrawl/fire-enrich/stargazers)MIT\\n[239](https://github.com/firecrawl/fire-enrich/forks)
|
||||
[12](https://github.com/firecrawl/fire-enrich/issues) [3](https://github.com/firecrawl/fire-enrich/pulls)\\nUpdated
|
||||
3 weeks agoOct 8, 2025\\n\\n- [firecrawl-java-sdk](https://github.com/firecrawl/firecrawl-java-sdk)\\nPublic\\n\\n\\n\\n\\nfirecrawl/firecrawl-java-sdk\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nJava[11](https://github.com/firecrawl/firecrawl-java-sdk/stargazers)MIT\\n[4](https://github.com/firecrawl/firecrawl-java-sdk/forks)
|
||||
[0](https://github.com/firecrawl/firecrawl-java-sdk/issues) [0](https://github.com/firecrawl/firecrawl-java-sdk/pulls)\\nUpdated
|
||||
last monthSep 28, 2025\\n\\n- [open-lovable](https://github.com/firecrawl/open-lovable)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
Clone and recreate any website as a modern React app in seconds\\n\\n\\n\\n\\n\\n\\nfirecrawl/open-lovable\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nTypeScript[21,320](https://github.com/firecrawl/open-lovable/stargazers)MIT\\n[3,986](https://github.com/firecrawl/open-lovable/forks)
|
||||
[70](https://github.com/firecrawl/open-lovable/issues) [33](https://github.com/firecrawl/open-lovable/pulls)\\nUpdated
|
||||
last monthSep 27, 2025\\n\\n- [mineru-api](https://github.com/firecrawl/mineru-api)\\nPublic\\n\\n\\n\\n\\nfirecrawl/mineru-api\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nPython[12](https://github.com/firecrawl/mineru-api/stargazers)AGPL-3.0\\n[2](https://github.com/firecrawl/mineru-api/forks)
|
||||
[1](https://github.com/firecrawl/mineru-api/issues) [1](https://github.com/firecrawl/mineru-api/pulls)\\nUpdated
|
||||
on Sep 26Sep 26, 2025\\n\\n\\n[View all repositories](https://github.com/orgs/firecrawl/repositories?type=all)\\n\\n[**People**](https://github.com/orgs/firecrawl/people)\\n\\n[](https://github.com/alexnucci)[](https://github.com/micahstairs)[](https://github.com/nickscamara)[](https://github.com/mogery)[](https://github.com/developersdigest)\\n\\n####
|
||||
Top languages\\n\\n[TypeScript](https://github.com/orgs/firecrawl/repositories?language=typescript&type=all)
|
||||
[Python](https://github.com/orgs/firecrawl/repositories?language=python&type=all)
|
||||
[JavaScript](https://github.com/orgs/firecrawl/repositories?language=javascript&type=all)
|
||||
[Go](https://github.com/orgs/firecrawl/repositories?language=go&type=all)
|
||||
[MDX](https://github.com/orgs/firecrawl/repositories?language=mdx&type=all)\\n\\n####
|
||||
Most used topics\\n\\n[ai](https://github.com/search?q=topic%3Aai+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: ai\\\") [firecrawl](https://github.com/search?q=topic%3Afirecrawl+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: firecrawl\\\") [llm](https://github.com/search?q=topic%3Allm+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: llm\\\") [web-crawler](https://github.com/search?q=topic%3Aweb-crawler+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: web-crawler\\\") [web-scraping](https://github.com/search?q=topic%3Aweb-scraping+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: web-scraping\\\")\\n\\nYou can\u2019t perform that action at this
|
||||
time.\",\"metadata\":{\"analytics-location\":\"/<org-login>\",\"apple-itunes-app\":\"app-id=1477376905,
|
||||
app-argument=https://github.com/firecrawl\",\"twitter:card\":\"summary_large_image\",\"google-site-verification\":\"Apib7-x98H0j5cPqHWwSMm6dNU4GmODRoqxLiDzdx9I\",\"description\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"og:image\":\"https://avatars.githubusercontent.com/u/135057108?s=280&v=4\",\"og:type\":\"profile\",\"visitor-payload\":\"eyJyZWZlcnJlciI6IiIsInJlcXVlc3RfaWQiOiI5REFEOjIzM0ExMzo4RDgzNEI6QzQ5OUNCOjY4RkYzODlGIiwidmlzaXRvcl9pZCI6IjQwMDQ0MTI5MTkxMDA5NDY1OTEiLCJyZWdpb25fZWRnZSI6ImlhZCIsInJlZ2lvbl9yZW5kZXIiOiJpYWQifQ==\",\"github-keyboard-shortcuts\":\"copilot\",\"user-login\":\"\",\"viewport\":\"width=device-width\",\"og:description\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"turbo-cache-control\":\"no-preview\",\"fetch-nonce\":\"v2:35a2e032-0081-3f6b-595e-967e32025c6f\",\"og:url\":\"https://github.com/firecrawl\",\"title\":\"Firecrawl
|
||||
\xB7 GitHub\",\"route-pattern\":\"/:user_id(.:format)\",\"route-action\":\"show\",\"octolytics-url\":\"https://collector.github.com/github/collect\",\"og:site_name\":\"GitHub\",\"twitter:title\":\"Firecrawl\",\"request-id\":\"9DAD:233A13:8D834B:C499CB:68FF389F\",\"ogSiteName\":\"GitHub\",\"fb:app_id\":\"1401488693436528\",\"language\":\"en\",\"twitter:image\":\"https://avatars.githubusercontent.com/u/135057108?s=280&v=4\",\"ogImage\":\"https://avatars.githubusercontent.com/u/135057108?s=280&v=4\",\"release\":\"c44b7f7aa5c70f3296484971978c9f4b1b473352\",\"theme-color\":\"#1e2327\",\"color-scheme\":\"light
|
||||
dark\",\"html-safe-nonce\":\"2d932295da6aa360d861f16279839ab109dc4e977a51bc99204477969d7d12c6\",\"ogTitle\":\"Firecrawl\",\"hovercard-subject-tag\":\"organization:135057108\",\"twitter:description\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"hostname\":\"github.com\",\"ogUrl\":\"https://github.com/firecrawl\",\"ogDescription\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"route-controller\":\"profiles\",\"favicon\":\"https://github.githubassets.com/favicons/favicon.svg\",\"visitor-hmac\":\"43cf3bbb9c57a3bcf0b1857fbcabcc78c274e7082e0d28c76cd6d4651bc2a920\",\"twitter:site\":\"@github\",\"ui-target\":\"full\",\"og:title\":\"Firecrawl\",\"expected-hostname\":\"github.com\",\"og:image:alt\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"profile:username\":\"firecrawl\",\"current-catalog-service-hash\":\"4a1c50a83cf6cc4b55b6b9c53e553e3f847c876b87fb333f71f5d05db8f1a7db\",\"turbo-body-classes\":\"logged-out
|
||||
env-production page-responsive\",\"browser-stats-url\":\"https://api.github.com/_private/browser/stats\",\"browser-errors-url\":\"https://api.github.com/_private/browser/errors\",\"scrapeId\":\"65cbc300-be11-4a1a-9d20-c114fb8473a7\",\"sourceURL\":\"https://github.com/firecrawl\",\"url\":\"https://github.com/firecrawl\",\"statusCode\":200,\"contentType\":\"text/html;
|
||||
charset=utf-8\",\"proxyUsed\":\"basic\",\"cacheState\":\"hit\",\"cachedAt\":\"2025-10-27T09:17:20.756Z\"}},{\"url\":\"https://www.linkedin.com/company/firecrawl\",\"title\":\"Firecrawl
|
||||
| LinkedIn\",\"description\":\"Our Dify integration now uses Firecrawl /v2
|
||||
endpoints Scraping is 10x faster thanks to intelligent caching, plus we've
|
||||
added semantic ...\",\"position\":5}]},\"creditsUsed\":3}"
|
||||
headers:
|
||||
Access-Control-Allow-Origin:
|
||||
- '*'
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
||||
Content-Length:
|
||||
- '93428'
|
||||
Content-Type:
|
||||
- application/json; charset=utf-8
|
||||
Date:
|
||||
- Wed, 29 Oct 2025 14:37:39 GMT
|
||||
ETag:
|
||||
- W/"16cf4-kHwVbMu4CCVG2UIt6p1g/gz5M4M"
|
||||
Via:
|
||||
- 1.1 google
|
||||
X-Powered-By:
|
||||
- Express
|
||||
X-Response-Time:
|
||||
- 13172.495ms
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,18 @@
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
)
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_firecrawl_crawl_tool_integration():
|
||||
tool = FirecrawlCrawlWebsiteTool(config={
|
||||
"limit": 2,
|
||||
"max_discovery_depth": 1,
|
||||
"scrape_options": {"formats": ["markdown"]}
|
||||
})
|
||||
result = tool.run(url="https://firecrawl.dev")
|
||||
|
||||
assert result is not None
|
||||
assert hasattr(result, 'status')
|
||||
assert result.status in ["completed", "scraping"]
|
||||
@@ -0,0 +1,15 @@
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
)
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_firecrawl_scrape_tool_integration():
|
||||
tool = FirecrawlScrapeWebsiteTool()
|
||||
result = tool.run(url="https://firecrawl.dev")
|
||||
|
||||
assert result is not None
|
||||
assert hasattr(result, 'markdown')
|
||||
assert len(result.markdown) > 0
|
||||
assert "Firecrawl" in result.markdown or "firecrawl" in result.markdown.lower()
|
||||
12
lib/crewai-tools/tests/tools/firecrawl_search_tool_test.py
Normal file
12
lib/crewai-tools/tests/tools/firecrawl_search_tool_test.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_firecrawl_search_tool_integration():
|
||||
tool = FirecrawlSearchTool()
|
||||
result = tool.run(query="firecrawl")
|
||||
|
||||
assert result is not None
|
||||
assert hasattr(result, 'web') or hasattr(result, 'news') or hasattr(result, 'images')
|
||||
@@ -23,7 +23,6 @@ dependencies = [
|
||||
"chromadb~=1.1.0",
|
||||
"tokenizers>=0.20.3",
|
||||
"openpyxl>=3.1.5",
|
||||
"pyvis>=0.3.2",
|
||||
# Authentication and Security
|
||||
"python-dotenv>=1.1.1",
|
||||
"pyjwt>=2.9.0",
|
||||
@@ -49,7 +48,7 @@ Repository = "https://github.com/crewAIInc/crewAI"
|
||||
|
||||
[project.optional-dependencies]
|
||||
tools = [
|
||||
"crewai-tools==1.2.1",
|
||||
"crewai-tools==1.5.0",
|
||||
]
|
||||
embeddings = [
|
||||
"tiktoken~=0.8.0"
|
||||
@@ -94,10 +93,11 @@ azure-ai-inference = [
|
||||
anthropic = [
|
||||
"anthropic>=0.69.0",
|
||||
]
|
||||
# a2a = [
|
||||
# "a2a-sdk~=0.3.9",
|
||||
# "httpx-sse>=0.4.0",
|
||||
# ]
|
||||
a2a = [
|
||||
"a2a-sdk~=0.3.10",
|
||||
"httpx-auth>=0.23.1",
|
||||
"httpx-sse>=0.4.0",
|
||||
]
|
||||
|
||||
|
||||
[project.scripts]
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any
|
||||
import urllib.request
|
||||
import warnings
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.agent.core import Agent
|
||||
from crewai.crew import Crew
|
||||
from crewai.crews.crew_output import CrewOutput
|
||||
from crewai.flow.flow import Flow
|
||||
@@ -40,7 +40,7 @@ def _suppress_pydantic_deprecation_warnings() -> None:
|
||||
|
||||
_suppress_pydantic_deprecation_warnings()
|
||||
|
||||
__version__ = "1.2.1"
|
||||
__version__ = "1.5.0"
|
||||
_telemetry_submitted = False
|
||||
|
||||
|
||||
|
||||
6
lib/crewai/src/crewai/a2a/__init__.py
Normal file
6
lib/crewai/src/crewai/a2a/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Agent-to-Agent (A2A) protocol communication module for CrewAI."""
|
||||
|
||||
from crewai.a2a.config import A2AConfig
|
||||
|
||||
|
||||
__all__ = ["A2AConfig"]
|
||||
20
lib/crewai/src/crewai/a2a/auth/__init__.py
Normal file
20
lib/crewai/src/crewai/a2a/auth/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""A2A authentication schemas."""
|
||||
|
||||
from crewai.a2a.auth.schemas import (
|
||||
APIKeyAuth,
|
||||
BearerTokenAuth,
|
||||
HTTPBasicAuth,
|
||||
HTTPDigestAuth,
|
||||
OAuth2AuthorizationCode,
|
||||
OAuth2ClientCredentials,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"APIKeyAuth",
|
||||
"BearerTokenAuth",
|
||||
"HTTPBasicAuth",
|
||||
"HTTPDigestAuth",
|
||||
"OAuth2AuthorizationCode",
|
||||
"OAuth2ClientCredentials",
|
||||
]
|
||||
392
lib/crewai/src/crewai/a2a/auth/schemas.py
Normal file
392
lib/crewai/src/crewai/a2a/auth/schemas.py
Normal file
@@ -0,0 +1,392 @@
|
||||
"""Authentication schemes for A2A protocol agents.
|
||||
|
||||
Supported authentication methods:
|
||||
- Bearer tokens
|
||||
- OAuth2 (Client Credentials, Authorization Code)
|
||||
- API Keys (header, query, cookie)
|
||||
- HTTP Basic authentication
|
||||
- HTTP Digest authentication
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import base64
|
||||
from collections.abc import Awaitable, Callable, MutableMapping
|
||||
import time
|
||||
from typing import Literal
|
||||
import urllib.parse
|
||||
|
||||
import httpx
|
||||
from httpx import DigestAuth
|
||||
from pydantic import BaseModel, Field, PrivateAttr
|
||||
|
||||
|
||||
class AuthScheme(ABC, BaseModel):
|
||||
"""Base class for authentication schemes."""
|
||||
|
||||
@abstractmethod
|
||||
async def apply_auth(
|
||||
self, client: httpx.AsyncClient, headers: MutableMapping[str, str]
|
||||
) -> MutableMapping[str, str]:
|
||||
"""Apply authentication to request headers.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making auth requests.
|
||||
headers: Current request headers.
|
||||
|
||||
Returns:
|
||||
Updated headers with authentication applied.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
class BearerTokenAuth(AuthScheme):
|
||||
"""Bearer token authentication (Authorization: Bearer <token>).
|
||||
|
||||
Attributes:
|
||||
token: Bearer token for authentication.
|
||||
"""
|
||||
|
||||
token: str = Field(description="Bearer token")
|
||||
|
||||
async def apply_auth(
|
||||
self, client: httpx.AsyncClient, headers: MutableMapping[str, str]
|
||||
) -> MutableMapping[str, str]:
|
||||
"""Apply Bearer token to Authorization header.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making auth requests.
|
||||
headers: Current request headers.
|
||||
|
||||
Returns:
|
||||
Updated headers with Bearer token in Authorization header.
|
||||
"""
|
||||
headers["Authorization"] = f"Bearer {self.token}"
|
||||
return headers
|
||||
|
||||
|
||||
class HTTPBasicAuth(AuthScheme):
|
||||
"""HTTP Basic authentication.
|
||||
|
||||
Attributes:
|
||||
username: Username for Basic authentication.
|
||||
password: Password for Basic authentication.
|
||||
"""
|
||||
|
||||
username: str = Field(description="Username")
|
||||
password: str = Field(description="Password")
|
||||
|
||||
async def apply_auth(
|
||||
self, client: httpx.AsyncClient, headers: MutableMapping[str, str]
|
||||
) -> MutableMapping[str, str]:
|
||||
"""Apply HTTP Basic authentication.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making auth requests.
|
||||
headers: Current request headers.
|
||||
|
||||
Returns:
|
||||
Updated headers with Basic auth in Authorization header.
|
||||
"""
|
||||
credentials = f"{self.username}:{self.password}"
|
||||
encoded = base64.b64encode(credentials.encode()).decode()
|
||||
headers["Authorization"] = f"Basic {encoded}"
|
||||
return headers
|
||||
|
||||
|
||||
class HTTPDigestAuth(AuthScheme):
|
||||
"""HTTP Digest authentication.
|
||||
|
||||
Note: Uses httpx-auth library for digest implementation.
|
||||
|
||||
Attributes:
|
||||
username: Username for Digest authentication.
|
||||
password: Password for Digest authentication.
|
||||
"""
|
||||
|
||||
username: str = Field(description="Username")
|
||||
password: str = Field(description="Password")
|
||||
|
||||
async def apply_auth(
|
||||
self, client: httpx.AsyncClient, headers: MutableMapping[str, str]
|
||||
) -> MutableMapping[str, str]:
|
||||
"""Digest auth is handled by httpx auth flow, not headers.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making auth requests.
|
||||
headers: Current request headers.
|
||||
|
||||
Returns:
|
||||
Unchanged headers (Digest auth handled by httpx auth flow).
|
||||
"""
|
||||
return headers
|
||||
|
||||
def configure_client(self, client: httpx.AsyncClient) -> None:
|
||||
"""Configure client with Digest auth.
|
||||
|
||||
Args:
|
||||
client: HTTP client to configure with Digest authentication.
|
||||
"""
|
||||
client.auth = DigestAuth(self.username, self.password)
|
||||
|
||||
|
||||
class APIKeyAuth(AuthScheme):
|
||||
"""API Key authentication (header, query, or cookie).
|
||||
|
||||
Attributes:
|
||||
api_key: API key value for authentication.
|
||||
location: Where to send the API key (header, query, or cookie).
|
||||
name: Parameter name for the API key (default: X-API-Key).
|
||||
"""
|
||||
|
||||
api_key: str = Field(description="API key value")
|
||||
location: Literal["header", "query", "cookie"] = Field(
|
||||
default="header", description="Where to send the API key"
|
||||
)
|
||||
name: str = Field(default="X-API-Key", description="Parameter name for the API key")
|
||||
|
||||
async def apply_auth(
|
||||
self, client: httpx.AsyncClient, headers: MutableMapping[str, str]
|
||||
) -> MutableMapping[str, str]:
|
||||
"""Apply API key authentication.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making auth requests.
|
||||
headers: Current request headers.
|
||||
|
||||
Returns:
|
||||
Updated headers with API key (for header/cookie locations).
|
||||
"""
|
||||
if self.location == "header":
|
||||
headers[self.name] = self.api_key
|
||||
elif self.location == "cookie":
|
||||
headers["Cookie"] = f"{self.name}={self.api_key}"
|
||||
return headers
|
||||
|
||||
def configure_client(self, client: httpx.AsyncClient) -> None:
|
||||
"""Configure client for query param API keys.
|
||||
|
||||
Args:
|
||||
client: HTTP client to configure with query param API key hook.
|
||||
"""
|
||||
if self.location == "query":
|
||||
|
||||
async def _add_api_key_param(request: httpx.Request) -> None:
|
||||
url = httpx.URL(request.url)
|
||||
request.url = url.copy_add_param(self.name, self.api_key)
|
||||
|
||||
client.event_hooks["request"].append(_add_api_key_param)
|
||||
|
||||
|
||||
class OAuth2ClientCredentials(AuthScheme):
|
||||
"""OAuth2 Client Credentials flow authentication.
|
||||
|
||||
Attributes:
|
||||
token_url: OAuth2 token endpoint URL.
|
||||
client_id: OAuth2 client identifier.
|
||||
client_secret: OAuth2 client secret.
|
||||
scopes: List of required OAuth2 scopes.
|
||||
"""
|
||||
|
||||
token_url: str = Field(description="OAuth2 token endpoint")
|
||||
client_id: str = Field(description="OAuth2 client ID")
|
||||
client_secret: str = Field(description="OAuth2 client secret")
|
||||
scopes: list[str] = Field(
|
||||
default_factory=list, description="Required OAuth2 scopes"
|
||||
)
|
||||
|
||||
_access_token: str | None = PrivateAttr(default=None)
|
||||
_token_expires_at: float | None = PrivateAttr(default=None)
|
||||
|
||||
async def apply_auth(
|
||||
self, client: httpx.AsyncClient, headers: MutableMapping[str, str]
|
||||
) -> MutableMapping[str, str]:
|
||||
"""Apply OAuth2 access token to Authorization header.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making token requests.
|
||||
headers: Current request headers.
|
||||
|
||||
Returns:
|
||||
Updated headers with OAuth2 access token in Authorization header.
|
||||
"""
|
||||
if (
|
||||
self._access_token is None
|
||||
or self._token_expires_at is None
|
||||
or time.time() >= self._token_expires_at
|
||||
):
|
||||
await self._fetch_token(client)
|
||||
|
||||
if self._access_token:
|
||||
headers["Authorization"] = f"Bearer {self._access_token}"
|
||||
|
||||
return headers
|
||||
|
||||
async def _fetch_token(self, client: httpx.AsyncClient) -> None:
|
||||
"""Fetch OAuth2 access token using client credentials flow.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making token request.
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: If token request fails.
|
||||
"""
|
||||
data = {
|
||||
"grant_type": "client_credentials",
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
}
|
||||
|
||||
if self.scopes:
|
||||
data["scope"] = " ".join(self.scopes)
|
||||
|
||||
response = await client.post(self.token_url, data=data)
|
||||
response.raise_for_status()
|
||||
|
||||
token_data = response.json()
|
||||
self._access_token = token_data["access_token"]
|
||||
expires_in = token_data.get("expires_in", 3600)
|
||||
self._token_expires_at = time.time() + expires_in - 60
|
||||
|
||||
|
||||
class OAuth2AuthorizationCode(AuthScheme):
|
||||
"""OAuth2 Authorization Code flow authentication.
|
||||
|
||||
Note: Requires interactive authorization.
|
||||
|
||||
Attributes:
|
||||
authorization_url: OAuth2 authorization endpoint URL.
|
||||
token_url: OAuth2 token endpoint URL.
|
||||
client_id: OAuth2 client identifier.
|
||||
client_secret: OAuth2 client secret.
|
||||
redirect_uri: OAuth2 redirect URI for callback.
|
||||
scopes: List of required OAuth2 scopes.
|
||||
"""
|
||||
|
||||
authorization_url: str = Field(description="OAuth2 authorization endpoint")
|
||||
token_url: str = Field(description="OAuth2 token endpoint")
|
||||
client_id: str = Field(description="OAuth2 client ID")
|
||||
client_secret: str = Field(description="OAuth2 client secret")
|
||||
redirect_uri: str = Field(description="OAuth2 redirect URI")
|
||||
scopes: list[str] = Field(
|
||||
default_factory=list, description="Required OAuth2 scopes"
|
||||
)
|
||||
|
||||
_access_token: str | None = PrivateAttr(default=None)
|
||||
_refresh_token: str | None = PrivateAttr(default=None)
|
||||
_token_expires_at: float | None = PrivateAttr(default=None)
|
||||
_authorization_callback: Callable[[str], Awaitable[str]] | None = PrivateAttr(
|
||||
default=None
|
||||
)
|
||||
|
||||
def set_authorization_callback(
|
||||
self, callback: Callable[[str], Awaitable[str]] | None
|
||||
) -> None:
|
||||
"""Set callback to handle authorization URL.
|
||||
|
||||
Args:
|
||||
callback: Async function that receives authorization URL and returns auth code.
|
||||
"""
|
||||
self._authorization_callback = callback
|
||||
|
||||
async def apply_auth(
|
||||
self, client: httpx.AsyncClient, headers: MutableMapping[str, str]
|
||||
) -> MutableMapping[str, str]:
|
||||
"""Apply OAuth2 access token to Authorization header.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making token requests.
|
||||
headers: Current request headers.
|
||||
|
||||
Returns:
|
||||
Updated headers with OAuth2 access token in Authorization header.
|
||||
|
||||
Raises:
|
||||
ValueError: If authorization callback is not set.
|
||||
"""
|
||||
|
||||
if self._access_token is None:
|
||||
if self._authorization_callback is None:
|
||||
msg = "Authorization callback not set. Use set_authorization_callback()"
|
||||
raise ValueError(msg)
|
||||
await self._fetch_initial_token(client)
|
||||
elif self._token_expires_at and time.time() >= self._token_expires_at:
|
||||
await self._refresh_access_token(client)
|
||||
|
||||
if self._access_token:
|
||||
headers["Authorization"] = f"Bearer {self._access_token}"
|
||||
|
||||
return headers
|
||||
|
||||
async def _fetch_initial_token(self, client: httpx.AsyncClient) -> None:
|
||||
"""Fetch initial access token using authorization code flow.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making token request.
|
||||
|
||||
Raises:
|
||||
ValueError: If authorization callback is not set.
|
||||
httpx.HTTPStatusError: If token request fails.
|
||||
"""
|
||||
params = {
|
||||
"response_type": "code",
|
||||
"client_id": self.client_id,
|
||||
"redirect_uri": self.redirect_uri,
|
||||
"scope": " ".join(self.scopes),
|
||||
}
|
||||
auth_url = f"{self.authorization_url}?{urllib.parse.urlencode(params)}"
|
||||
|
||||
if self._authorization_callback is None:
|
||||
msg = "Authorization callback not set"
|
||||
raise ValueError(msg)
|
||||
auth_code = await self._authorization_callback(auth_url)
|
||||
|
||||
data = {
|
||||
"grant_type": "authorization_code",
|
||||
"code": auth_code,
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"redirect_uri": self.redirect_uri,
|
||||
}
|
||||
|
||||
response = await client.post(self.token_url, data=data)
|
||||
response.raise_for_status()
|
||||
|
||||
token_data = response.json()
|
||||
self._access_token = token_data["access_token"]
|
||||
self._refresh_token = token_data.get("refresh_token")
|
||||
|
||||
expires_in = token_data.get("expires_in", 3600)
|
||||
self._token_expires_at = time.time() + expires_in - 60
|
||||
|
||||
async def _refresh_access_token(self, client: httpx.AsyncClient) -> None:
|
||||
"""Refresh the access token using refresh token.
|
||||
|
||||
Args:
|
||||
client: HTTP client for making token request.
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: If token refresh request fails.
|
||||
"""
|
||||
if not self._refresh_token:
|
||||
await self._fetch_initial_token(client)
|
||||
return
|
||||
|
||||
data = {
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": self._refresh_token,
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
}
|
||||
|
||||
response = await client.post(self.token_url, data=data)
|
||||
response.raise_for_status()
|
||||
|
||||
token_data = response.json()
|
||||
self._access_token = token_data["access_token"]
|
||||
if "refresh_token" in token_data:
|
||||
self._refresh_token = token_data["refresh_token"]
|
||||
|
||||
expires_in = token_data.get("expires_in", 3600)
|
||||
self._token_expires_at = time.time() + expires_in - 60
|
||||
236
lib/crewai/src/crewai/a2a/auth/utils.py
Normal file
236
lib/crewai/src/crewai/a2a/auth/utils.py
Normal file
@@ -0,0 +1,236 @@
|
||||
"""Authentication utilities for A2A protocol agent communication.
|
||||
|
||||
Provides validation and retry logic for various authentication schemes including
|
||||
OAuth2, API keys, and HTTP authentication methods.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Awaitable, Callable, MutableMapping
|
||||
import re
|
||||
from typing import Final
|
||||
|
||||
from a2a.client.errors import A2AClientHTTPError
|
||||
from a2a.types import (
|
||||
APIKeySecurityScheme,
|
||||
AgentCard,
|
||||
HTTPAuthSecurityScheme,
|
||||
OAuth2SecurityScheme,
|
||||
)
|
||||
from httpx import AsyncClient, Response
|
||||
|
||||
from crewai.a2a.auth.schemas import (
|
||||
APIKeyAuth,
|
||||
AuthScheme,
|
||||
BearerTokenAuth,
|
||||
HTTPBasicAuth,
|
||||
HTTPDigestAuth,
|
||||
OAuth2AuthorizationCode,
|
||||
OAuth2ClientCredentials,
|
||||
)
|
||||
|
||||
|
||||
_auth_store: dict[int, AuthScheme | None] = {}
|
||||
|
||||
_SCHEME_PATTERN: Final[re.Pattern[str]] = re.compile(r"(\w+)\s+(.+?)(?=,\s*\w+\s+|$)")
|
||||
_PARAM_PATTERN: Final[re.Pattern[str]] = re.compile(r'(\w+)=(?:"([^"]*)"|([^\s,]+))')
|
||||
|
||||
_SCHEME_AUTH_MAPPING: Final[dict[type, tuple[type[AuthScheme], ...]]] = {
|
||||
OAuth2SecurityScheme: (
|
||||
OAuth2ClientCredentials,
|
||||
OAuth2AuthorizationCode,
|
||||
BearerTokenAuth,
|
||||
),
|
||||
APIKeySecurityScheme: (APIKeyAuth,),
|
||||
}
|
||||
|
||||
_HTTP_SCHEME_MAPPING: Final[dict[str, type[AuthScheme]]] = {
|
||||
"basic": HTTPBasicAuth,
|
||||
"digest": HTTPDigestAuth,
|
||||
"bearer": BearerTokenAuth,
|
||||
}
|
||||
|
||||
|
||||
def _raise_auth_mismatch(
|
||||
expected_classes: type[AuthScheme] | tuple[type[AuthScheme], ...],
|
||||
provided_auth: AuthScheme,
|
||||
) -> None:
|
||||
"""Raise authentication mismatch error.
|
||||
|
||||
Args:
|
||||
expected_classes: Expected authentication class or tuple of classes.
|
||||
provided_auth: Actually provided authentication instance.
|
||||
|
||||
Raises:
|
||||
A2AClientHTTPError: Always raises with 401 status code.
|
||||
"""
|
||||
if isinstance(expected_classes, tuple):
|
||||
if len(expected_classes) == 1:
|
||||
required = expected_classes[0].__name__
|
||||
else:
|
||||
names = [cls.__name__ for cls in expected_classes]
|
||||
required = f"one of ({', '.join(names)})"
|
||||
else:
|
||||
required = expected_classes.__name__
|
||||
|
||||
msg = (
|
||||
f"AgentCard requires {required} authentication, "
|
||||
f"but {type(provided_auth).__name__} was provided"
|
||||
)
|
||||
raise A2AClientHTTPError(401, msg)
|
||||
|
||||
|
||||
def parse_www_authenticate(header_value: str) -> dict[str, dict[str, str]]:
|
||||
"""Parse WWW-Authenticate header into auth challenges.
|
||||
|
||||
Args:
|
||||
header_value: The WWW-Authenticate header value.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping auth scheme to its parameters.
|
||||
Example: {"Bearer": {"realm": "api", "scope": "read write"}}
|
||||
"""
|
||||
if not header_value:
|
||||
return {}
|
||||
|
||||
challenges: dict[str, dict[str, str]] = {}
|
||||
|
||||
for match in _SCHEME_PATTERN.finditer(header_value):
|
||||
scheme = match.group(1)
|
||||
params_str = match.group(2)
|
||||
|
||||
params: dict[str, str] = {}
|
||||
|
||||
for param_match in _PARAM_PATTERN.finditer(params_str):
|
||||
key = param_match.group(1)
|
||||
value = param_match.group(2) or param_match.group(3)
|
||||
params[key] = value
|
||||
|
||||
challenges[scheme] = params
|
||||
|
||||
return challenges
|
||||
|
||||
|
||||
def validate_auth_against_agent_card(
|
||||
agent_card: AgentCard, auth: AuthScheme | None
|
||||
) -> None:
|
||||
"""Validate that provided auth matches AgentCard security requirements.
|
||||
|
||||
Args:
|
||||
agent_card: The A2A AgentCard containing security requirements.
|
||||
auth: User-provided authentication scheme (or None).
|
||||
|
||||
Raises:
|
||||
A2AClientHTTPError: If auth doesn't match AgentCard requirements (status_code=401).
|
||||
"""
|
||||
|
||||
if not agent_card.security or not agent_card.security_schemes:
|
||||
return
|
||||
|
||||
if not auth:
|
||||
msg = "AgentCard requires authentication but no auth scheme provided"
|
||||
raise A2AClientHTTPError(401, msg)
|
||||
|
||||
first_security_req = agent_card.security[0] if agent_card.security else {}
|
||||
|
||||
for scheme_name in first_security_req.keys():
|
||||
security_scheme_wrapper = agent_card.security_schemes.get(scheme_name)
|
||||
if not security_scheme_wrapper:
|
||||
continue
|
||||
|
||||
scheme = security_scheme_wrapper.root
|
||||
|
||||
if allowed_classes := _SCHEME_AUTH_MAPPING.get(type(scheme)):
|
||||
if not isinstance(auth, allowed_classes):
|
||||
_raise_auth_mismatch(allowed_classes, auth)
|
||||
return
|
||||
|
||||
if isinstance(scheme, HTTPAuthSecurityScheme):
|
||||
if required_class := _HTTP_SCHEME_MAPPING.get(scheme.scheme.lower()):
|
||||
if not isinstance(auth, required_class):
|
||||
_raise_auth_mismatch(required_class, auth)
|
||||
return
|
||||
|
||||
msg = "Could not validate auth against AgentCard security requirements"
|
||||
raise A2AClientHTTPError(401, msg)
|
||||
|
||||
|
||||
async def retry_on_401(
|
||||
request_func: Callable[[], Awaitable[Response]],
|
||||
auth_scheme: AuthScheme | None,
|
||||
client: AsyncClient,
|
||||
headers: MutableMapping[str, str],
|
||||
max_retries: int = 3,
|
||||
) -> Response:
|
||||
"""Retry a request on 401 authentication error.
|
||||
|
||||
Handles 401 errors by:
|
||||
1. Parsing WWW-Authenticate header
|
||||
2. Re-acquiring credentials
|
||||
3. Retrying the request
|
||||
|
||||
Args:
|
||||
request_func: Async function that makes the HTTP request.
|
||||
auth_scheme: Authentication scheme to refresh credentials with.
|
||||
client: HTTP client for making requests.
|
||||
headers: Request headers to update with new auth.
|
||||
max_retries: Maximum number of retry attempts (default: 3).
|
||||
|
||||
Returns:
|
||||
HTTP response from the request.
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: If retries are exhausted or auth scheme is None.
|
||||
"""
|
||||
last_response: Response | None = None
|
||||
last_challenges: dict[str, dict[str, str]] = {}
|
||||
|
||||
for attempt in range(max_retries):
|
||||
response = await request_func()
|
||||
|
||||
if response.status_code != 401:
|
||||
return response
|
||||
|
||||
last_response = response
|
||||
|
||||
if auth_scheme is None:
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
www_authenticate = response.headers.get("WWW-Authenticate", "")
|
||||
challenges = parse_www_authenticate(www_authenticate)
|
||||
last_challenges = challenges
|
||||
|
||||
if attempt >= max_retries - 1:
|
||||
break
|
||||
|
||||
backoff_time = 2**attempt
|
||||
await asyncio.sleep(backoff_time)
|
||||
|
||||
await auth_scheme.apply_auth(client, headers)
|
||||
|
||||
if last_response:
|
||||
last_response.raise_for_status()
|
||||
return last_response
|
||||
|
||||
msg = "retry_on_401 failed without making any requests"
|
||||
if last_challenges:
|
||||
challenge_info = ", ".join(
|
||||
f"{scheme} (realm={params.get('realm', 'N/A')})"
|
||||
for scheme, params in last_challenges.items()
|
||||
)
|
||||
msg = f"{msg}. Server challenges: {challenge_info}"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
|
||||
def configure_auth_client(
|
||||
auth: HTTPDigestAuth | APIKeyAuth, client: AsyncClient
|
||||
) -> None:
|
||||
"""Configure HTTP client with auth-specific settings.
|
||||
|
||||
Only HTTPDigestAuth and APIKeyAuth need client configuration.
|
||||
|
||||
Args:
|
||||
auth: Authentication scheme that requires client configuration.
|
||||
client: HTTP client to configure.
|
||||
"""
|
||||
auth.configure_client(client)
|
||||
64
lib/crewai/src/crewai/a2a/config.py
Normal file
64
lib/crewai/src/crewai/a2a/config.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""A2A configuration types.
|
||||
|
||||
This module is separate from experimental.a2a to avoid circular imports.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
BeforeValidator,
|
||||
Field,
|
||||
HttpUrl,
|
||||
TypeAdapter,
|
||||
)
|
||||
|
||||
from crewai.a2a.auth.schemas import AuthScheme
|
||||
|
||||
|
||||
http_url_adapter = TypeAdapter(HttpUrl)
|
||||
|
||||
Url = Annotated[
|
||||
str,
|
||||
BeforeValidator(
|
||||
lambda value: str(http_url_adapter.validate_python(value, strict=True))
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class A2AConfig(BaseModel):
|
||||
"""Configuration for A2A protocol integration.
|
||||
|
||||
Attributes:
|
||||
endpoint: A2A agent endpoint URL.
|
||||
auth: Authentication scheme (Bearer, OAuth2, API Key, HTTP Basic/Digest).
|
||||
timeout: Request timeout in seconds (default: 120).
|
||||
max_turns: Maximum conversation turns with A2A agent (default: 10).
|
||||
response_model: Optional Pydantic model for structured A2A agent responses.
|
||||
fail_fast: If True, raise error when agent unreachable; if False, skip and continue (default: True).
|
||||
trust_remote_completion_status: If True, return A2A agent's result directly when status is "completed"; if False, always ask server agent to respond (default: False).
|
||||
"""
|
||||
|
||||
endpoint: Url = Field(description="A2A agent endpoint URL")
|
||||
auth: AuthScheme | None = Field(
|
||||
default=None,
|
||||
description="Authentication scheme (Bearer, OAuth2, API Key, HTTP Basic/Digest)",
|
||||
)
|
||||
timeout: int = Field(default=120, description="Request timeout in seconds")
|
||||
max_turns: int = Field(
|
||||
default=10, description="Maximum conversation turns with A2A agent"
|
||||
)
|
||||
response_model: type[BaseModel] | None = Field(
|
||||
default=None,
|
||||
description="Optional Pydantic model for structured A2A agent responses. When specified, the A2A agent is expected to return JSON matching this schema.",
|
||||
)
|
||||
fail_fast: bool = Field(
|
||||
default=True,
|
||||
description="If True, raise an error immediately when the A2A agent is unreachable. If False, skip the A2A agent and continue execution.",
|
||||
)
|
||||
trust_remote_completion_status: bool = Field(
|
||||
default=False,
|
||||
description='If True, return the A2A agent\'s result directly when status is "completed" without asking the server agent to respond. If False, always ask the server agent to respond, allowing it to potentially delegate again.',
|
||||
)
|
||||
29
lib/crewai/src/crewai/a2a/templates.py
Normal file
29
lib/crewai/src/crewai/a2a/templates.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""String templates for A2A (Agent-to-Agent) protocol messaging and status."""
|
||||
|
||||
from string import Template
|
||||
from typing import Final
|
||||
|
||||
|
||||
AVAILABLE_AGENTS_TEMPLATE: Final[Template] = Template(
|
||||
"\n<AVAILABLE_A2A_AGENTS>\n $available_a2a_agents\n</AVAILABLE_A2A_AGENTS>\n"
|
||||
)
|
||||
PREVIOUS_A2A_CONVERSATION_TEMPLATE: Final[Template] = Template(
|
||||
"\n<PREVIOUS_A2A_CONVERSATION>\n"
|
||||
" $previous_a2a_conversation"
|
||||
"\n</PREVIOUS_A2A_CONVERSATION>\n"
|
||||
)
|
||||
CONVERSATION_TURN_INFO_TEMPLATE: Final[Template] = Template(
|
||||
"\n<CONVERSATION_PROGRESS>\n"
|
||||
' turn="$turn_count"\n'
|
||||
' max_turns="$max_turns"\n'
|
||||
" $warning"
|
||||
"\n</CONVERSATION_PROGRESS>\n"
|
||||
)
|
||||
UNAVAILABLE_AGENTS_NOTICE_TEMPLATE: Final[Template] = Template(
|
||||
"\n<A2A_AGENTS_STATUS>\n"
|
||||
" NOTE: A2A agents were configured but are currently unavailable.\n"
|
||||
" You cannot delegate to remote agents for this task.\n\n"
|
||||
" Unavailable Agents:\n"
|
||||
" $unavailable_agents"
|
||||
"\n</A2A_AGENTS_STATUS>\n"
|
||||
)
|
||||
38
lib/crewai/src/crewai/a2a/types.py
Normal file
38
lib/crewai/src/crewai/a2a/types.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Type definitions for A2A protocol message parts."""
|
||||
|
||||
from typing import Any, Literal, Protocol, TypedDict, runtime_checkable
|
||||
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class AgentResponseProtocol(Protocol):
|
||||
"""Protocol for the dynamically created AgentResponse model."""
|
||||
|
||||
a2a_ids: tuple[str, ...]
|
||||
message: str
|
||||
is_a2a: bool
|
||||
|
||||
|
||||
class PartsMetadataDict(TypedDict, total=False):
|
||||
"""Metadata for A2A message parts.
|
||||
|
||||
Attributes:
|
||||
mimeType: MIME type for the part content.
|
||||
schema: JSON schema for the part content.
|
||||
"""
|
||||
|
||||
mimeType: Literal["application/json"]
|
||||
schema: dict[str, Any]
|
||||
|
||||
|
||||
class PartsDict(TypedDict):
|
||||
"""A2A message part containing text and optional metadata.
|
||||
|
||||
Attributes:
|
||||
text: The text content of the message part.
|
||||
metadata: Optional metadata describing the part content.
|
||||
"""
|
||||
|
||||
text: str
|
||||
metadata: NotRequired[PartsMetadataDict]
|
||||
755
lib/crewai/src/crewai/a2a/utils.py
Normal file
755
lib/crewai/src/crewai/a2a/utils.py
Normal file
@@ -0,0 +1,755 @@
|
||||
"""Utility functions for A2A (Agent-to-Agent) protocol delegation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import AsyncIterator, MutableMapping
|
||||
from contextlib import asynccontextmanager
|
||||
from functools import lru_cache
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any
|
||||
import uuid
|
||||
|
||||
from a2a.client import Client, ClientConfig, ClientFactory
|
||||
from a2a.client.errors import A2AClientHTTPError
|
||||
from a2a.types import (
|
||||
AgentCard,
|
||||
Message,
|
||||
Part,
|
||||
Role,
|
||||
TaskArtifactUpdateEvent,
|
||||
TaskState,
|
||||
TaskStatusUpdateEvent,
|
||||
TextPart,
|
||||
TransportProtocol,
|
||||
)
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field, create_model
|
||||
|
||||
from crewai.a2a.auth.schemas import APIKeyAuth, HTTPDigestAuth
|
||||
from crewai.a2a.auth.utils import (
|
||||
_auth_store,
|
||||
configure_auth_client,
|
||||
retry_on_401,
|
||||
validate_auth_against_agent_card,
|
||||
)
|
||||
from crewai.a2a.config import A2AConfig
|
||||
from crewai.a2a.types import PartsDict, PartsMetadataDict
|
||||
from crewai.events.event_bus import crewai_event_bus
|
||||
from crewai.events.types.a2a_events import (
|
||||
A2AConversationStartedEvent,
|
||||
A2ADelegationCompletedEvent,
|
||||
A2ADelegationStartedEvent,
|
||||
A2AMessageSentEvent,
|
||||
A2AResponseReceivedEvent,
|
||||
)
|
||||
from crewai.types.utils import create_literals_from_strings
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from a2a.types import Message, Task as A2ATask
|
||||
|
||||
from crewai.a2a.auth.schemas import AuthScheme
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def _fetch_agent_card_cached(
|
||||
endpoint: str,
|
||||
auth_hash: int,
|
||||
timeout: int,
|
||||
_ttl_hash: int,
|
||||
) -> AgentCard:
|
||||
"""Cached version of fetch_agent_card with auth support.
|
||||
|
||||
Args:
|
||||
endpoint: A2A agent endpoint URL
|
||||
auth_hash: Hash of the auth object
|
||||
timeout: Request timeout
|
||||
_ttl_hash: Time-based hash for cache invalidation (unused in body)
|
||||
|
||||
Returns:
|
||||
Cached AgentCard
|
||||
"""
|
||||
auth = _auth_store.get(auth_hash)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
return loop.run_until_complete(
|
||||
_fetch_agent_card_async(endpoint=endpoint, auth=auth, timeout=timeout)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
def fetch_agent_card(
|
||||
endpoint: str,
|
||||
auth: AuthScheme | None = None,
|
||||
timeout: int = 30,
|
||||
use_cache: bool = True,
|
||||
cache_ttl: int = 300,
|
||||
) -> AgentCard:
|
||||
"""Fetch AgentCard from an A2A endpoint with optional caching.
|
||||
|
||||
Args:
|
||||
endpoint: A2A agent endpoint URL (AgentCard URL)
|
||||
auth: Optional AuthScheme for authentication
|
||||
timeout: Request timeout in seconds
|
||||
use_cache: Whether to use caching (default True)
|
||||
cache_ttl: Cache TTL in seconds (default 300 = 5 minutes)
|
||||
|
||||
Returns:
|
||||
AgentCard object with agent capabilities and skills
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: If the request fails
|
||||
A2AClientHTTPError: If authentication fails
|
||||
"""
|
||||
if use_cache:
|
||||
auth_hash = hash((type(auth).__name__, id(auth))) if auth else 0
|
||||
_auth_store[auth_hash] = auth
|
||||
ttl_hash = int(time.time() // cache_ttl)
|
||||
return _fetch_agent_card_cached(endpoint, auth_hash, timeout, ttl_hash)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
return loop.run_until_complete(
|
||||
_fetch_agent_card_async(endpoint=endpoint, auth=auth, timeout=timeout)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _fetch_agent_card_async(
|
||||
endpoint: str,
|
||||
auth: AuthScheme | None,
|
||||
timeout: int,
|
||||
) -> AgentCard:
|
||||
"""Async implementation of AgentCard fetching.
|
||||
|
||||
Args:
|
||||
endpoint: A2A agent endpoint URL
|
||||
auth: Optional AuthScheme for authentication
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
AgentCard object
|
||||
"""
|
||||
if "/.well-known/agent-card.json" in endpoint:
|
||||
base_url = endpoint.replace("/.well-known/agent-card.json", "")
|
||||
agent_card_path = "/.well-known/agent-card.json"
|
||||
else:
|
||||
url_parts = endpoint.split("/", 3)
|
||||
base_url = f"{url_parts[0]}//{url_parts[2]}"
|
||||
agent_card_path = f"/{url_parts[3]}" if len(url_parts) > 3 else "/"
|
||||
|
||||
headers: MutableMapping[str, str] = {}
|
||||
if auth:
|
||||
async with httpx.AsyncClient(timeout=timeout) as temp_auth_client:
|
||||
if isinstance(auth, (HTTPDigestAuth, APIKeyAuth)):
|
||||
configure_auth_client(auth, temp_auth_client)
|
||||
headers = await auth.apply_auth(temp_auth_client, {})
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout, headers=headers) as temp_client:
|
||||
if auth and isinstance(auth, (HTTPDigestAuth, APIKeyAuth)):
|
||||
configure_auth_client(auth, temp_client)
|
||||
|
||||
agent_card_url = f"{base_url}{agent_card_path}"
|
||||
|
||||
async def _fetch_agent_card_request() -> httpx.Response:
|
||||
return await temp_client.get(agent_card_url)
|
||||
|
||||
try:
|
||||
response = await retry_on_401(
|
||||
request_func=_fetch_agent_card_request,
|
||||
auth_scheme=auth,
|
||||
client=temp_client,
|
||||
headers=temp_client.headers,
|
||||
max_retries=2,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
return AgentCard.model_validate(response.json())
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 401:
|
||||
error_details = ["Authentication failed"]
|
||||
www_auth = e.response.headers.get("WWW-Authenticate")
|
||||
if www_auth:
|
||||
error_details.append(f"WWW-Authenticate: {www_auth}")
|
||||
if not auth:
|
||||
error_details.append("No auth scheme provided")
|
||||
msg = " | ".join(error_details)
|
||||
raise A2AClientHTTPError(401, msg) from e
|
||||
raise
|
||||
|
||||
|
||||
def execute_a2a_delegation(
|
||||
endpoint: str,
|
||||
auth: AuthScheme | None,
|
||||
timeout: int,
|
||||
task_description: str,
|
||||
context: str | None = None,
|
||||
context_id: str | None = None,
|
||||
task_id: str | None = None,
|
||||
reference_task_ids: list[str] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
extensions: dict[str, Any] | None = None,
|
||||
conversation_history: list[Message] | None = None,
|
||||
agent_id: str | None = None,
|
||||
agent_role: Role | None = None,
|
||||
agent_branch: Any | None = None,
|
||||
response_model: type[BaseModel] | None = None,
|
||||
turn_number: int | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Execute a task delegation to a remote A2A agent with multi-turn support.
|
||||
|
||||
Handles:
|
||||
- AgentCard discovery
|
||||
- Authentication setup
|
||||
- Message creation and sending
|
||||
- Response parsing
|
||||
- Multi-turn conversations
|
||||
|
||||
Args:
|
||||
endpoint: A2A agent endpoint URL (AgentCard URL)
|
||||
auth: Optional AuthScheme for authentication (Bearer, OAuth2, API Key, HTTP Basic/Digest)
|
||||
timeout: Request timeout in seconds
|
||||
task_description: The task to delegate
|
||||
context: Optional context information
|
||||
context_id: Context ID for correlating messages/tasks
|
||||
task_id: Specific task identifier
|
||||
reference_task_ids: List of related task IDs
|
||||
metadata: Additional metadata (external_id, request_id, etc.)
|
||||
extensions: Protocol extensions for custom fields
|
||||
conversation_history: Previous Message objects from conversation
|
||||
agent_id: Agent identifier for logging
|
||||
agent_role: Role of the CrewAI agent delegating the task
|
||||
agent_branch: Optional agent tree branch for logging
|
||||
response_model: Optional Pydantic model for structured outputs
|
||||
turn_number: Optional turn number for multi-turn conversations
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- status: "completed", "input_required", "failed", etc.
|
||||
- result: Result string (if completed)
|
||||
- error: Error message (if failed)
|
||||
- history: List of new Message objects from this exchange
|
||||
|
||||
Raises:
|
||||
ImportError: If a2a-sdk is not installed
|
||||
"""
|
||||
is_multiturn = bool(conversation_history and len(conversation_history) > 0)
|
||||
if turn_number is None:
|
||||
turn_number = (
|
||||
len([m for m in (conversation_history or []) if m.role == Role.user]) + 1
|
||||
)
|
||||
crewai_event_bus.emit(
|
||||
agent_branch,
|
||||
A2ADelegationStartedEvent(
|
||||
endpoint=endpoint,
|
||||
task_description=task_description,
|
||||
agent_id=agent_id,
|
||||
is_multiturn=is_multiturn,
|
||||
turn_number=turn_number,
|
||||
),
|
||||
)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
result = loop.run_until_complete(
|
||||
_execute_a2a_delegation_async(
|
||||
endpoint=endpoint,
|
||||
auth=auth,
|
||||
timeout=timeout,
|
||||
task_description=task_description,
|
||||
context=context,
|
||||
context_id=context_id,
|
||||
task_id=task_id,
|
||||
reference_task_ids=reference_task_ids,
|
||||
metadata=metadata,
|
||||
extensions=extensions,
|
||||
conversation_history=conversation_history or [],
|
||||
is_multiturn=is_multiturn,
|
||||
turn_number=turn_number,
|
||||
agent_branch=agent_branch,
|
||||
agent_id=agent_id,
|
||||
agent_role=agent_role,
|
||||
response_model=response_model,
|
||||
)
|
||||
)
|
||||
|
||||
crewai_event_bus.emit(
|
||||
agent_branch,
|
||||
A2ADelegationCompletedEvent(
|
||||
status=result["status"],
|
||||
result=result.get("result"),
|
||||
error=result.get("error"),
|
||||
is_multiturn=is_multiturn,
|
||||
),
|
||||
)
|
||||
|
||||
return result
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _execute_a2a_delegation_async(
|
||||
endpoint: str,
|
||||
auth: AuthScheme | None,
|
||||
timeout: int,
|
||||
task_description: str,
|
||||
context: str | None,
|
||||
context_id: str | None,
|
||||
task_id: str | None,
|
||||
reference_task_ids: list[str] | None,
|
||||
metadata: dict[str, Any] | None,
|
||||
extensions: dict[str, Any] | None,
|
||||
conversation_history: list[Message],
|
||||
is_multiturn: bool = False,
|
||||
turn_number: int = 1,
|
||||
agent_branch: Any | None = None,
|
||||
agent_id: str | None = None,
|
||||
agent_role: str | None = None,
|
||||
response_model: type[BaseModel] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Async implementation of A2A delegation with multi-turn support.
|
||||
|
||||
Args:
|
||||
endpoint: A2A agent endpoint URL
|
||||
auth: Optional AuthScheme for authentication
|
||||
timeout: Request timeout in seconds
|
||||
task_description: Task to delegate
|
||||
context: Optional context
|
||||
context_id: Context ID for correlation
|
||||
task_id: Specific task identifier
|
||||
reference_task_ids: Related task IDs
|
||||
metadata: Additional metadata
|
||||
extensions: Protocol extensions
|
||||
conversation_history: Previous Message objects
|
||||
is_multiturn: Whether this is a multi-turn conversation
|
||||
turn_number: Current turn number
|
||||
agent_branch: Agent tree branch for logging
|
||||
agent_id: Agent identifier for logging
|
||||
agent_role: Agent role for logging
|
||||
response_model: Optional Pydantic model for structured outputs
|
||||
|
||||
Returns:
|
||||
Dictionary with status, result/error, and new history
|
||||
"""
|
||||
agent_card = await _fetch_agent_card_async(endpoint, auth, timeout)
|
||||
|
||||
validate_auth_against_agent_card(agent_card, auth)
|
||||
|
||||
headers: MutableMapping[str, str] = {}
|
||||
if auth:
|
||||
async with httpx.AsyncClient(timeout=timeout) as temp_auth_client:
|
||||
if isinstance(auth, (HTTPDigestAuth, APIKeyAuth)):
|
||||
configure_auth_client(auth, temp_auth_client)
|
||||
headers = await auth.apply_auth(temp_auth_client, {})
|
||||
|
||||
a2a_agent_name = None
|
||||
if agent_card.name:
|
||||
a2a_agent_name = agent_card.name
|
||||
|
||||
if turn_number == 1:
|
||||
agent_id_for_event = agent_id or endpoint
|
||||
crewai_event_bus.emit(
|
||||
agent_branch,
|
||||
A2AConversationStartedEvent(
|
||||
agent_id=agent_id_for_event,
|
||||
endpoint=endpoint,
|
||||
a2a_agent_name=a2a_agent_name,
|
||||
),
|
||||
)
|
||||
|
||||
message_parts = []
|
||||
|
||||
if context:
|
||||
message_parts.append(f"Context:\n{context}\n\n")
|
||||
message_parts.append(f"{task_description}")
|
||||
message_text = "".join(message_parts)
|
||||
|
||||
if is_multiturn and conversation_history and not task_id:
|
||||
if first_task_id := conversation_history[0].task_id:
|
||||
task_id = first_task_id
|
||||
|
||||
parts: PartsDict = {"text": message_text}
|
||||
if response_model:
|
||||
parts.update(
|
||||
{
|
||||
"metadata": PartsMetadataDict(
|
||||
mimeType="application/json",
|
||||
schema=response_model.model_json_schema(),
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
message = Message(
|
||||
role=Role.user,
|
||||
message_id=str(uuid.uuid4()),
|
||||
parts=[Part(root=TextPart(**parts))],
|
||||
context_id=context_id,
|
||||
task_id=task_id,
|
||||
reference_task_ids=reference_task_ids,
|
||||
metadata=metadata,
|
||||
extensions=extensions,
|
||||
)
|
||||
|
||||
transport_protocol = TransportProtocol("JSONRPC")
|
||||
new_messages: list[Message] = [*conversation_history, message]
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AMessageSentEvent(
|
||||
message=message_text,
|
||||
turn_number=turn_number,
|
||||
is_multiturn=is_multiturn,
|
||||
agent_role=agent_role,
|
||||
),
|
||||
)
|
||||
|
||||
async with _create_a2a_client(
|
||||
agent_card=agent_card,
|
||||
transport_protocol=transport_protocol,
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
streaming=True,
|
||||
auth=auth,
|
||||
) as client:
|
||||
result_parts: list[str] = []
|
||||
final_result: dict[str, Any] | None = None
|
||||
event_stream = client.send_message(message)
|
||||
|
||||
try:
|
||||
async for event in event_stream:
|
||||
if isinstance(event, Message):
|
||||
new_messages.append(event)
|
||||
for part in event.parts:
|
||||
if part.root.kind == "text":
|
||||
text = part.root.text
|
||||
result_parts.append(text)
|
||||
|
||||
elif isinstance(event, tuple):
|
||||
a2a_task, update = event
|
||||
|
||||
if isinstance(update, TaskArtifactUpdateEvent):
|
||||
artifact = update.artifact
|
||||
result_parts.extend(
|
||||
part.root.text
|
||||
for part in artifact.parts
|
||||
if part.root.kind == "text"
|
||||
)
|
||||
|
||||
is_final_update = False
|
||||
if isinstance(update, TaskStatusUpdateEvent):
|
||||
is_final_update = update.final
|
||||
|
||||
if not is_final_update and a2a_task.status.state not in [
|
||||
TaskState.completed,
|
||||
TaskState.input_required,
|
||||
TaskState.failed,
|
||||
TaskState.rejected,
|
||||
TaskState.auth_required,
|
||||
TaskState.canceled,
|
||||
]:
|
||||
continue
|
||||
|
||||
if a2a_task.status.state == TaskState.completed:
|
||||
extracted_parts = _extract_task_result_parts(a2a_task)
|
||||
result_parts.extend(extracted_parts)
|
||||
if a2a_task.history:
|
||||
new_messages.extend(a2a_task.history)
|
||||
|
||||
response_text = " ".join(result_parts) if result_parts else ""
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AResponseReceivedEvent(
|
||||
response=response_text,
|
||||
turn_number=turn_number,
|
||||
is_multiturn=is_multiturn,
|
||||
status="completed",
|
||||
agent_role=agent_role,
|
||||
),
|
||||
)
|
||||
|
||||
final_result = {
|
||||
"status": "completed",
|
||||
"result": response_text,
|
||||
"history": new_messages,
|
||||
"agent_card": agent_card,
|
||||
}
|
||||
break
|
||||
|
||||
if a2a_task.status.state == TaskState.input_required:
|
||||
if a2a_task.history:
|
||||
new_messages.extend(a2a_task.history)
|
||||
|
||||
response_text = _extract_error_message(
|
||||
a2a_task, "Additional input required"
|
||||
)
|
||||
if response_text and not a2a_task.history:
|
||||
agent_message = Message(
|
||||
role=Role.agent,
|
||||
message_id=str(uuid.uuid4()),
|
||||
parts=[Part(root=TextPart(text=response_text))],
|
||||
context_id=a2a_task.context_id
|
||||
if hasattr(a2a_task, "context_id")
|
||||
else None,
|
||||
task_id=a2a_task.task_id
|
||||
if hasattr(a2a_task, "task_id")
|
||||
else None,
|
||||
)
|
||||
new_messages.append(agent_message)
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AResponseReceivedEvent(
|
||||
response=response_text,
|
||||
turn_number=turn_number,
|
||||
is_multiturn=is_multiturn,
|
||||
status="input_required",
|
||||
agent_role=agent_role,
|
||||
),
|
||||
)
|
||||
|
||||
final_result = {
|
||||
"status": "input_required",
|
||||
"error": response_text,
|
||||
"history": new_messages,
|
||||
"agent_card": agent_card,
|
||||
}
|
||||
break
|
||||
|
||||
if a2a_task.status.state in [TaskState.failed, TaskState.rejected]:
|
||||
error_msg = _extract_error_message(
|
||||
a2a_task, "Task failed without error message"
|
||||
)
|
||||
if a2a_task.history:
|
||||
new_messages.extend(a2a_task.history)
|
||||
final_result = {
|
||||
"status": "failed",
|
||||
"error": error_msg,
|
||||
"history": new_messages,
|
||||
}
|
||||
break
|
||||
|
||||
if a2a_task.status.state == TaskState.auth_required:
|
||||
error_msg = _extract_error_message(
|
||||
a2a_task, "Authentication required"
|
||||
)
|
||||
final_result = {
|
||||
"status": "auth_required",
|
||||
"error": error_msg,
|
||||
"history": new_messages,
|
||||
}
|
||||
break
|
||||
|
||||
if a2a_task.status.state == TaskState.canceled:
|
||||
error_msg = _extract_error_message(
|
||||
a2a_task, "Task was canceled"
|
||||
)
|
||||
final_result = {
|
||||
"status": "canceled",
|
||||
"error": error_msg,
|
||||
"history": new_messages,
|
||||
}
|
||||
break
|
||||
except Exception as e:
|
||||
current_exception: Exception | BaseException | None = e
|
||||
while current_exception:
|
||||
if hasattr(current_exception, "response"):
|
||||
response = current_exception.response
|
||||
if hasattr(response, "text"):
|
||||
break
|
||||
if current_exception and hasattr(current_exception, "__cause__"):
|
||||
current_exception = current_exception.__cause__
|
||||
raise
|
||||
finally:
|
||||
if hasattr(event_stream, "aclose"):
|
||||
await event_stream.aclose()
|
||||
|
||||
if final_result:
|
||||
return final_result
|
||||
|
||||
return {
|
||||
"status": "completed",
|
||||
"result": " ".join(result_parts) if result_parts else "",
|
||||
"history": new_messages,
|
||||
}
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _create_a2a_client(
|
||||
agent_card: AgentCard,
|
||||
transport_protocol: TransportProtocol,
|
||||
timeout: int,
|
||||
headers: MutableMapping[str, str],
|
||||
streaming: bool,
|
||||
auth: AuthScheme | None = None,
|
||||
) -> AsyncIterator[Client]:
|
||||
"""Create and configure an A2A client.
|
||||
|
||||
Args:
|
||||
agent_card: The A2A agent card
|
||||
transport_protocol: Transport protocol to use
|
||||
timeout: Request timeout in seconds
|
||||
headers: HTTP headers (already with auth applied)
|
||||
streaming: Enable streaming responses
|
||||
auth: Optional AuthScheme for client configuration
|
||||
|
||||
Yields:
|
||||
Configured A2A client instance
|
||||
"""
|
||||
|
||||
async with httpx.AsyncClient(
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
) as httpx_client:
|
||||
if auth and isinstance(auth, (HTTPDigestAuth, APIKeyAuth)):
|
||||
configure_auth_client(auth, httpx_client)
|
||||
|
||||
config = ClientConfig(
|
||||
httpx_client=httpx_client,
|
||||
supported_transports=[str(transport_protocol.value)],
|
||||
streaming=streaming,
|
||||
accepted_output_modes=["application/json"],
|
||||
)
|
||||
|
||||
factory = ClientFactory(config)
|
||||
client = factory.create(agent_card)
|
||||
yield client
|
||||
|
||||
|
||||
def _extract_task_result_parts(a2a_task: A2ATask) -> list[str]:
|
||||
"""Extract result parts from A2A task history and artifacts.
|
||||
|
||||
Args:
|
||||
a2a_task: A2A Task object with history and artifacts
|
||||
|
||||
Returns:
|
||||
List of result text parts
|
||||
"""
|
||||
|
||||
result_parts: list[str] = []
|
||||
|
||||
if a2a_task.history:
|
||||
for history_msg in reversed(a2a_task.history):
|
||||
if history_msg.role == Role.agent:
|
||||
result_parts.extend(
|
||||
part.root.text
|
||||
for part in history_msg.parts
|
||||
if part.root.kind == "text"
|
||||
)
|
||||
break
|
||||
|
||||
if a2a_task.artifacts:
|
||||
result_parts.extend(
|
||||
part.root.text
|
||||
for artifact in a2a_task.artifacts
|
||||
for part in artifact.parts
|
||||
if part.root.kind == "text"
|
||||
)
|
||||
|
||||
return result_parts
|
||||
|
||||
|
||||
def _extract_error_message(a2a_task: A2ATask, default: str) -> str:
|
||||
"""Extract error message from A2A task.
|
||||
|
||||
Args:
|
||||
a2a_task: A2A Task object
|
||||
default: Default message if no error found
|
||||
|
||||
Returns:
|
||||
Error message string
|
||||
"""
|
||||
if a2a_task.status and a2a_task.status.message:
|
||||
msg = a2a_task.status.message
|
||||
if msg:
|
||||
for part in msg.parts:
|
||||
if part.root.kind == "text":
|
||||
return str(part.root.text)
|
||||
return str(msg)
|
||||
|
||||
if a2a_task.history:
|
||||
for history_msg in reversed(a2a_task.history):
|
||||
for part in history_msg.parts:
|
||||
if part.root.kind == "text":
|
||||
return str(part.root.text)
|
||||
|
||||
return default
|
||||
|
||||
|
||||
def create_agent_response_model(agent_ids: tuple[str, ...]) -> type[BaseModel]:
|
||||
"""Create a dynamic AgentResponse model with Literal types for agent IDs.
|
||||
|
||||
Args:
|
||||
agent_ids: List of available A2A agent IDs
|
||||
|
||||
Returns:
|
||||
Dynamically created Pydantic model with Literal-constrained a2a_ids field
|
||||
"""
|
||||
|
||||
DynamicLiteral = create_literals_from_strings(agent_ids) # noqa: N806
|
||||
|
||||
return create_model(
|
||||
"AgentResponse",
|
||||
a2a_ids=(
|
||||
tuple[DynamicLiteral, ...], # type: ignore[valid-type]
|
||||
Field(
|
||||
default_factory=tuple,
|
||||
max_length=len(agent_ids),
|
||||
description="A2A agent IDs to delegate to.",
|
||||
),
|
||||
),
|
||||
message=(
|
||||
str,
|
||||
Field(
|
||||
description="The message content. If is_a2a=true, this is sent to the A2A agent. If is_a2a=false, this is your final answer ending the conversation."
|
||||
),
|
||||
),
|
||||
is_a2a=(
|
||||
bool,
|
||||
Field(
|
||||
description="Set to true to continue the conversation by sending this message to the A2A agent and awaiting their response. Set to false ONLY when you are completely done and providing your final answer (not when asking questions)."
|
||||
),
|
||||
),
|
||||
__base__=BaseModel,
|
||||
)
|
||||
|
||||
|
||||
def extract_a2a_agent_ids_from_config(
|
||||
a2a_config: list[A2AConfig] | A2AConfig | None,
|
||||
) -> tuple[list[A2AConfig], tuple[str, ...]]:
|
||||
"""Extract A2A agent IDs from A2A configuration.
|
||||
|
||||
Args:
|
||||
a2a_config: A2A configuration
|
||||
|
||||
Returns:
|
||||
List of A2A agent IDs
|
||||
"""
|
||||
if a2a_config is None:
|
||||
return [], ()
|
||||
|
||||
if isinstance(a2a_config, A2AConfig):
|
||||
a2a_agents = [a2a_config]
|
||||
else:
|
||||
a2a_agents = a2a_config
|
||||
return a2a_agents, tuple(config.endpoint for config in a2a_agents)
|
||||
|
||||
|
||||
def get_a2a_agents_and_response_model(
|
||||
a2a_config: list[A2AConfig] | A2AConfig | None,
|
||||
) -> tuple[list[A2AConfig], type[BaseModel]]:
|
||||
"""Get A2A agent IDs and response model.
|
||||
|
||||
Args:
|
||||
a2a_config: A2A configuration
|
||||
|
||||
Returns:
|
||||
Tuple of A2A agent IDs and response model
|
||||
"""
|
||||
a2a_agents, agent_ids = extract_a2a_agent_ids_from_config(a2a_config=a2a_config)
|
||||
return a2a_agents, create_agent_response_model(agent_ids)
|
||||
587
lib/crewai/src/crewai/a2a/wrapper.py
Normal file
587
lib/crewai/src/crewai/a2a/wrapper.py
Normal file
@@ -0,0 +1,587 @@
|
||||
"""A2A agent wrapping logic for metaclass integration.
|
||||
|
||||
Wraps agent classes with A2A delegation capabilities.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from functools import wraps
|
||||
from types import MethodType
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
from a2a.types import Role
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
from crewai.a2a.config import A2AConfig
|
||||
from crewai.a2a.templates import (
|
||||
AVAILABLE_AGENTS_TEMPLATE,
|
||||
CONVERSATION_TURN_INFO_TEMPLATE,
|
||||
PREVIOUS_A2A_CONVERSATION_TEMPLATE,
|
||||
UNAVAILABLE_AGENTS_NOTICE_TEMPLATE,
|
||||
)
|
||||
from crewai.a2a.types import AgentResponseProtocol
|
||||
from crewai.a2a.utils import (
|
||||
execute_a2a_delegation,
|
||||
fetch_agent_card,
|
||||
get_a2a_agents_and_response_model,
|
||||
)
|
||||
from crewai.events.event_bus import crewai_event_bus
|
||||
from crewai.events.types.a2a_events import (
|
||||
A2AConversationCompletedEvent,
|
||||
A2AMessageSentEvent,
|
||||
)
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from a2a.types import AgentCard, Message
|
||||
|
||||
from crewai.agent.core import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
def wrap_agent_with_a2a_instance(agent: Agent) -> None:
|
||||
"""Wrap an agent instance's execute_task method with A2A support.
|
||||
|
||||
This function modifies the agent instance by wrapping its execute_task
|
||||
method to add A2A delegation capabilities. Should only be called when
|
||||
the agent has a2a configuration set.
|
||||
|
||||
Args:
|
||||
agent: The agent instance to wrap
|
||||
"""
|
||||
original_execute_task = agent.execute_task.__func__ # type: ignore[attr-defined]
|
||||
|
||||
@wraps(original_execute_task)
|
||||
def execute_task_with_a2a(
|
||||
self: Agent,
|
||||
task: Task,
|
||||
context: str | None = None,
|
||||
tools: list[BaseTool] | None = None,
|
||||
) -> str:
|
||||
"""Execute task with A2A delegation support.
|
||||
|
||||
Args:
|
||||
self: The agent instance
|
||||
task: The task to execute
|
||||
context: Optional context for task execution
|
||||
tools: Optional tools available to the agent
|
||||
|
||||
Returns:
|
||||
Task execution result
|
||||
"""
|
||||
if not self.a2a:
|
||||
return original_execute_task(self, task, context, tools) # type: ignore[no-any-return]
|
||||
|
||||
a2a_agents, agent_response_model = get_a2a_agents_and_response_model(self.a2a)
|
||||
|
||||
return _execute_task_with_a2a(
|
||||
self=self,
|
||||
a2a_agents=a2a_agents,
|
||||
original_fn=original_execute_task,
|
||||
task=task,
|
||||
agent_response_model=agent_response_model,
|
||||
context=context,
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
object.__setattr__(agent, "execute_task", MethodType(execute_task_with_a2a, agent))
|
||||
|
||||
|
||||
def _fetch_card_from_config(
|
||||
config: A2AConfig,
|
||||
) -> tuple[A2AConfig, AgentCard | Exception]:
|
||||
"""Fetch agent card from A2A config.
|
||||
|
||||
Args:
|
||||
config: A2A configuration
|
||||
|
||||
Returns:
|
||||
Tuple of (config, card or exception)
|
||||
"""
|
||||
try:
|
||||
card = fetch_agent_card(
|
||||
endpoint=config.endpoint,
|
||||
auth=config.auth,
|
||||
timeout=config.timeout,
|
||||
)
|
||||
return config, card
|
||||
except Exception as e:
|
||||
return config, e
|
||||
|
||||
|
||||
def _fetch_agent_cards_concurrently(
|
||||
a2a_agents: list[A2AConfig],
|
||||
) -> tuple[dict[str, AgentCard], dict[str, str]]:
|
||||
"""Fetch agent cards concurrently for multiple A2A agents.
|
||||
|
||||
Args:
|
||||
a2a_agents: List of A2A agent configurations
|
||||
|
||||
Returns:
|
||||
Tuple of (agent_cards dict, failed_agents dict mapping endpoint to error message)
|
||||
"""
|
||||
agent_cards: dict[str, AgentCard] = {}
|
||||
failed_agents: dict[str, str] = {}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(a2a_agents)) as executor:
|
||||
futures = {
|
||||
executor.submit(_fetch_card_from_config, config): config
|
||||
for config in a2a_agents
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
config, result = future.result()
|
||||
if isinstance(result, Exception):
|
||||
if config.fail_fast:
|
||||
raise RuntimeError(
|
||||
f"Failed to fetch agent card from {config.endpoint}. "
|
||||
f"Ensure the A2A agent is running and accessible. Error: {result}"
|
||||
) from result
|
||||
failed_agents[config.endpoint] = str(result)
|
||||
else:
|
||||
agent_cards[config.endpoint] = result
|
||||
|
||||
return agent_cards, failed_agents
|
||||
|
||||
|
||||
def _execute_task_with_a2a(
|
||||
self: Agent,
|
||||
a2a_agents: list[A2AConfig],
|
||||
original_fn: Callable[..., str],
|
||||
task: Task,
|
||||
agent_response_model: type[BaseModel],
|
||||
context: str | None,
|
||||
tools: list[BaseTool] | None,
|
||||
) -> str:
|
||||
"""Wrap execute_task with A2A delegation logic.
|
||||
|
||||
Args:
|
||||
self: The agent instance
|
||||
a2a_agents: Dictionary of A2A agent configurations
|
||||
original_fn: The original execute_task method
|
||||
task: The task to execute
|
||||
context: Optional context for task execution
|
||||
tools: Optional tools available to the agent
|
||||
agent_response_model: Optional agent response model
|
||||
|
||||
Returns:
|
||||
Task execution result (either from LLM or A2A agent)
|
||||
"""
|
||||
original_description: str = task.description
|
||||
original_output_pydantic = task.output_pydantic
|
||||
original_response_model = task.response_model
|
||||
|
||||
agent_cards, failed_agents = _fetch_agent_cards_concurrently(a2a_agents)
|
||||
|
||||
if not agent_cards and a2a_agents and failed_agents:
|
||||
unavailable_agents_text = ""
|
||||
for endpoint, error in failed_agents.items():
|
||||
unavailable_agents_text += f" - {endpoint}: {error}\n"
|
||||
|
||||
notice = UNAVAILABLE_AGENTS_NOTICE_TEMPLATE.substitute(
|
||||
unavailable_agents=unavailable_agents_text
|
||||
)
|
||||
task.description = f"{original_description}{notice}"
|
||||
|
||||
try:
|
||||
return original_fn(self, task, context, tools)
|
||||
finally:
|
||||
task.description = original_description
|
||||
|
||||
task.description = _augment_prompt_with_a2a(
|
||||
a2a_agents=a2a_agents,
|
||||
task_description=original_description,
|
||||
agent_cards=agent_cards,
|
||||
failed_agents=failed_agents,
|
||||
)
|
||||
task.response_model = agent_response_model
|
||||
|
||||
try:
|
||||
raw_result = original_fn(self, task, context, tools)
|
||||
agent_response = _parse_agent_response(
|
||||
raw_result=raw_result, agent_response_model=agent_response_model
|
||||
)
|
||||
|
||||
if isinstance(agent_response, BaseModel) and isinstance(
|
||||
agent_response, AgentResponseProtocol
|
||||
):
|
||||
if agent_response.is_a2a:
|
||||
return _delegate_to_a2a(
|
||||
self,
|
||||
agent_response=agent_response,
|
||||
task=task,
|
||||
original_fn=original_fn,
|
||||
context=context,
|
||||
tools=tools,
|
||||
agent_cards=agent_cards,
|
||||
original_task_description=original_description,
|
||||
)
|
||||
return str(agent_response.message)
|
||||
|
||||
return raw_result
|
||||
finally:
|
||||
task.description = original_description
|
||||
task.output_pydantic = original_output_pydantic
|
||||
task.response_model = original_response_model
|
||||
|
||||
|
||||
def _augment_prompt_with_a2a(
|
||||
a2a_agents: list[A2AConfig],
|
||||
task_description: str,
|
||||
agent_cards: dict[str, AgentCard],
|
||||
conversation_history: list[Message] | None = None,
|
||||
turn_num: int = 0,
|
||||
max_turns: int | None = None,
|
||||
failed_agents: dict[str, str] | None = None,
|
||||
) -> str:
|
||||
"""Add A2A delegation instructions to prompt.
|
||||
|
||||
Args:
|
||||
a2a_agents: Dictionary of A2A agent configurations
|
||||
task_description: Original task description
|
||||
agent_cards: dictionary mapping agent IDs to AgentCards
|
||||
conversation_history: Previous A2A Messages from conversation
|
||||
turn_num: Current turn number (0-indexed)
|
||||
max_turns: Maximum allowed turns (from config)
|
||||
failed_agents: Dictionary mapping failed agent endpoints to error messages
|
||||
|
||||
Returns:
|
||||
Augmented task description with A2A instructions
|
||||
"""
|
||||
|
||||
if not agent_cards:
|
||||
return task_description
|
||||
|
||||
agents_text = ""
|
||||
|
||||
for config in a2a_agents:
|
||||
if config.endpoint in agent_cards:
|
||||
card = agent_cards[config.endpoint]
|
||||
agents_text += f"\n{card.model_dump_json(indent=2, exclude_none=True, include={'description', 'url', 'skills'})}\n"
|
||||
|
||||
failed_agents = failed_agents or {}
|
||||
if failed_agents:
|
||||
agents_text += "\n<!-- Unavailable Agents -->\n"
|
||||
for endpoint, error in failed_agents.items():
|
||||
agents_text += f"\n<!-- Agent: {endpoint}\n Status: Unavailable\n Error: {error} -->\n"
|
||||
|
||||
agents_text = AVAILABLE_AGENTS_TEMPLATE.substitute(available_a2a_agents=agents_text)
|
||||
|
||||
history_text = ""
|
||||
if conversation_history:
|
||||
for msg in conversation_history:
|
||||
history_text += f"\n{msg.model_dump_json(indent=2, exclude_none=True, exclude={'message_id'})}\n"
|
||||
|
||||
history_text = PREVIOUS_A2A_CONVERSATION_TEMPLATE.substitute(
|
||||
previous_a2a_conversation=history_text
|
||||
)
|
||||
turn_info = ""
|
||||
|
||||
if max_turns is not None and conversation_history:
|
||||
turn_count = turn_num + 1
|
||||
warning = ""
|
||||
if turn_count >= max_turns:
|
||||
warning = (
|
||||
"CRITICAL: This is the FINAL turn. You MUST conclude the conversation now.\n"
|
||||
"Set is_a2a=false and provide your final response to complete the task."
|
||||
)
|
||||
elif turn_count == max_turns - 1:
|
||||
warning = "WARNING: Next turn will be the last. Consider wrapping up the conversation."
|
||||
|
||||
turn_info = CONVERSATION_TURN_INFO_TEMPLATE.substitute(
|
||||
turn_count=turn_count,
|
||||
max_turns=max_turns,
|
||||
warning=warning,
|
||||
)
|
||||
|
||||
return f"""{task_description}
|
||||
|
||||
IMPORTANT: You have the ability to delegate this task to remote A2A agents.
|
||||
|
||||
{agents_text}
|
||||
{history_text}{turn_info}
|
||||
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def _parse_agent_response(
|
||||
raw_result: str | dict[str, Any], agent_response_model: type[BaseModel]
|
||||
) -> BaseModel | str:
|
||||
"""Parse LLM output as AgentResponse or return raw agent response.
|
||||
|
||||
Args:
|
||||
raw_result: Raw output from LLM
|
||||
agent_response_model: The agent response model
|
||||
|
||||
Returns:
|
||||
Parsed AgentResponse or string
|
||||
"""
|
||||
if agent_response_model:
|
||||
try:
|
||||
if isinstance(raw_result, str):
|
||||
return agent_response_model.model_validate_json(raw_result)
|
||||
if isinstance(raw_result, dict):
|
||||
return agent_response_model.model_validate(raw_result)
|
||||
except ValidationError:
|
||||
return cast(str, raw_result)
|
||||
return cast(str, raw_result)
|
||||
|
||||
|
||||
def _handle_agent_response_and_continue(
|
||||
self: Agent,
|
||||
a2a_result: dict[str, Any],
|
||||
agent_id: str,
|
||||
agent_cards: dict[str, AgentCard] | None,
|
||||
a2a_agents: list[A2AConfig],
|
||||
original_task_description: str,
|
||||
conversation_history: list[Message],
|
||||
turn_num: int,
|
||||
max_turns: int,
|
||||
task: Task,
|
||||
original_fn: Callable[..., str],
|
||||
context: str | None,
|
||||
tools: list[BaseTool] | None,
|
||||
agent_response_model: type[BaseModel],
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""Handle A2A result and get CrewAI agent's response.
|
||||
|
||||
Args:
|
||||
self: The agent instance
|
||||
a2a_result: Result from A2A delegation
|
||||
agent_id: ID of the A2A agent
|
||||
agent_cards: Pre-fetched agent cards
|
||||
a2a_agents: List of A2A configurations
|
||||
original_task_description: Original task description
|
||||
conversation_history: Conversation history
|
||||
turn_num: Current turn number
|
||||
max_turns: Maximum turns allowed
|
||||
task: The task being executed
|
||||
original_fn: Original execute_task method
|
||||
context: Optional context
|
||||
tools: Optional tools
|
||||
agent_response_model: Response model for parsing
|
||||
|
||||
Returns:
|
||||
Tuple of (final_result, current_request) where:
|
||||
- final_result is not None if conversation should end
|
||||
- current_request is the next message to send if continuing
|
||||
"""
|
||||
agent_cards_dict = agent_cards or {}
|
||||
if "agent_card" in a2a_result and agent_id not in agent_cards_dict:
|
||||
agent_cards_dict[agent_id] = a2a_result["agent_card"]
|
||||
|
||||
task.description = _augment_prompt_with_a2a(
|
||||
a2a_agents=a2a_agents,
|
||||
task_description=original_task_description,
|
||||
conversation_history=conversation_history,
|
||||
turn_num=turn_num,
|
||||
max_turns=max_turns,
|
||||
agent_cards=agent_cards_dict,
|
||||
)
|
||||
|
||||
raw_result = original_fn(self, task, context, tools)
|
||||
llm_response = _parse_agent_response(
|
||||
raw_result=raw_result, agent_response_model=agent_response_model
|
||||
)
|
||||
|
||||
if isinstance(llm_response, BaseModel) and isinstance(
|
||||
llm_response, AgentResponseProtocol
|
||||
):
|
||||
if not llm_response.is_a2a:
|
||||
final_turn_number = turn_num + 1
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AMessageSentEvent(
|
||||
message=str(llm_response.message),
|
||||
turn_number=final_turn_number,
|
||||
is_multiturn=True,
|
||||
agent_role=self.role,
|
||||
),
|
||||
)
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AConversationCompletedEvent(
|
||||
status="completed",
|
||||
final_result=str(llm_response.message),
|
||||
error=None,
|
||||
total_turns=final_turn_number,
|
||||
),
|
||||
)
|
||||
return str(llm_response.message), None
|
||||
return None, str(llm_response.message)
|
||||
|
||||
return str(raw_result), None
|
||||
|
||||
|
||||
def _delegate_to_a2a(
|
||||
self: Agent,
|
||||
agent_response: AgentResponseProtocol,
|
||||
task: Task,
|
||||
original_fn: Callable[..., str],
|
||||
context: str | None,
|
||||
tools: list[BaseTool] | None,
|
||||
agent_cards: dict[str, AgentCard] | None = None,
|
||||
original_task_description: str | None = None,
|
||||
) -> str:
|
||||
"""Delegate to A2A agent with multi-turn conversation support.
|
||||
|
||||
Args:
|
||||
self: The agent instance
|
||||
agent_response: The AgentResponse indicating delegation
|
||||
task: The task being executed (for extracting A2A fields)
|
||||
original_fn: The original execute_task method for follow-ups
|
||||
context: Optional context for task execution
|
||||
tools: Optional tools available to the agent
|
||||
agent_cards: Pre-fetched agent cards from _execute_task_with_a2a
|
||||
original_task_description: The original task description before A2A augmentation
|
||||
|
||||
Returns:
|
||||
Result from A2A agent
|
||||
|
||||
Raises:
|
||||
ImportError: If a2a-sdk is not installed
|
||||
"""
|
||||
a2a_agents, agent_response_model = get_a2a_agents_and_response_model(self.a2a)
|
||||
agent_ids = tuple(config.endpoint for config in a2a_agents)
|
||||
current_request = str(agent_response.message)
|
||||
agent_id = agent_response.a2a_ids[0]
|
||||
|
||||
if agent_id not in agent_ids:
|
||||
raise ValueError(
|
||||
f"Unknown A2A agent ID(s): {agent_response.a2a_ids} not in {agent_ids}"
|
||||
)
|
||||
|
||||
agent_config = next(filter(lambda x: x.endpoint == agent_id, a2a_agents))
|
||||
task_config = task.config or {}
|
||||
context_id = task_config.get("context_id")
|
||||
task_id_config = task_config.get("task_id")
|
||||
reference_task_ids = task_config.get("reference_task_ids")
|
||||
metadata = task_config.get("metadata")
|
||||
extensions = task_config.get("extensions")
|
||||
|
||||
if original_task_description is None:
|
||||
original_task_description = task.description
|
||||
|
||||
conversation_history: list[Message] = []
|
||||
max_turns = agent_config.max_turns
|
||||
|
||||
try:
|
||||
for turn_num in range(max_turns):
|
||||
console_formatter = getattr(crewai_event_bus, "_console", None)
|
||||
agent_branch = None
|
||||
if console_formatter:
|
||||
agent_branch = getattr(
|
||||
console_formatter, "current_agent_branch", None
|
||||
) or getattr(console_formatter, "current_task_branch", None)
|
||||
|
||||
a2a_result = execute_a2a_delegation(
|
||||
endpoint=agent_config.endpoint,
|
||||
auth=agent_config.auth,
|
||||
timeout=agent_config.timeout,
|
||||
task_description=current_request,
|
||||
context_id=context_id,
|
||||
task_id=task_id_config,
|
||||
reference_task_ids=reference_task_ids,
|
||||
metadata=metadata,
|
||||
extensions=extensions,
|
||||
conversation_history=conversation_history,
|
||||
agent_id=agent_id,
|
||||
agent_role=Role.user,
|
||||
agent_branch=agent_branch,
|
||||
response_model=agent_config.response_model,
|
||||
turn_number=turn_num + 1,
|
||||
)
|
||||
|
||||
conversation_history = a2a_result.get("history", [])
|
||||
|
||||
if a2a_result["status"] in ["completed", "input_required"]:
|
||||
if (
|
||||
a2a_result["status"] == "completed"
|
||||
and agent_config.trust_remote_completion_status
|
||||
):
|
||||
result_text = a2a_result.get("result", "")
|
||||
final_turn_number = turn_num + 1
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AConversationCompletedEvent(
|
||||
status="completed",
|
||||
final_result=result_text,
|
||||
error=None,
|
||||
total_turns=final_turn_number,
|
||||
),
|
||||
)
|
||||
return result_text # type: ignore[no-any-return]
|
||||
|
||||
final_result, next_request = _handle_agent_response_and_continue(
|
||||
self=self,
|
||||
a2a_result=a2a_result,
|
||||
agent_id=agent_id,
|
||||
agent_cards=agent_cards,
|
||||
a2a_agents=a2a_agents,
|
||||
original_task_description=original_task_description,
|
||||
conversation_history=conversation_history,
|
||||
turn_num=turn_num,
|
||||
max_turns=max_turns,
|
||||
task=task,
|
||||
original_fn=original_fn,
|
||||
context=context,
|
||||
tools=tools,
|
||||
agent_response_model=agent_response_model,
|
||||
)
|
||||
|
||||
if final_result is not None:
|
||||
return final_result
|
||||
|
||||
if next_request is not None:
|
||||
current_request = next_request
|
||||
|
||||
continue
|
||||
|
||||
error_msg = a2a_result.get("error", "Unknown error")
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AConversationCompletedEvent(
|
||||
status="failed",
|
||||
final_result=None,
|
||||
error=error_msg,
|
||||
total_turns=turn_num + 1,
|
||||
),
|
||||
)
|
||||
raise Exception(f"A2A delegation failed: {error_msg}")
|
||||
|
||||
if conversation_history:
|
||||
for msg in reversed(conversation_history):
|
||||
if msg.role == Role.agent:
|
||||
text_parts = [
|
||||
part.root.text for part in msg.parts if part.root.kind == "text"
|
||||
]
|
||||
final_message = (
|
||||
" ".join(text_parts) if text_parts else "Conversation completed"
|
||||
)
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AConversationCompletedEvent(
|
||||
status="completed",
|
||||
final_result=final_message,
|
||||
error=None,
|
||||
total_turns=max_turns,
|
||||
),
|
||||
)
|
||||
return final_message
|
||||
|
||||
crewai_event_bus.emit(
|
||||
None,
|
||||
A2AConversationCompletedEvent(
|
||||
status="failed",
|
||||
final_result=None,
|
||||
error=f"Conversation exceeded maximum turns ({max_turns})",
|
||||
total_turns=max_turns,
|
||||
),
|
||||
)
|
||||
raise Exception(f"A2A conversation exceeded maximum turns ({max_turns})")
|
||||
|
||||
finally:
|
||||
task.description = original_task_description
|
||||
5
lib/crewai/src/crewai/agent/__init__.py
Normal file
5
lib/crewai/src/crewai/agent/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from crewai.agent.core import Agent
|
||||
from crewai.utilities.training_handler import CrewTrainingHandler
|
||||
|
||||
|
||||
__all__ = ["Agent", "CrewTrainingHandler"]
|
||||
@@ -2,27 +2,27 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Sequence
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Final,
|
||||
Literal,
|
||||
cast,
|
||||
)
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pydantic import BaseModel, Field, InstanceOf, PrivateAttr, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from crewai.a2a.config import A2AConfig
|
||||
from crewai.agents.agent_builder.base_agent import BaseAgent
|
||||
from crewai.agents.cache.cache_handler import CacheHandler
|
||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||
from crewai.events.event_bus import crewai_event_bus
|
||||
from crewai.events.types.agent_events import (
|
||||
AgentExecutionCompletedEvent,
|
||||
AgentExecutionErrorEvent,
|
||||
AgentExecutionStartedEvent,
|
||||
)
|
||||
from crewai.events.types.knowledge_events import (
|
||||
KnowledgeQueryCompletedEvent,
|
||||
KnowledgeQueryFailedEvent,
|
||||
@@ -40,6 +40,16 @@ from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.knowledge.utils.knowledge_utils import extract_knowledge_context
|
||||
from crewai.lite_agent import LiteAgent
|
||||
from crewai.llms.base_llm import BaseLLM
|
||||
from crewai.mcp import (
|
||||
MCPClient,
|
||||
MCPServerConfig,
|
||||
MCPServerHTTP,
|
||||
MCPServerSSE,
|
||||
MCPServerStdio,
|
||||
)
|
||||
from crewai.mcp.transports.http import HTTPTransport
|
||||
from crewai.mcp.transports.sse import SSETransport
|
||||
from crewai.mcp.transports.stdio import StdioTransport
|
||||
from crewai.memory.contextual.contextual_memory import ContextualMemory
|
||||
from crewai.rag.embeddings.types import EmbedderConfig
|
||||
from crewai.security.fingerprint import Fingerprint
|
||||
@@ -70,14 +80,14 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
# MCP Connection timeout constants (in seconds)
|
||||
MCP_CONNECTION_TIMEOUT = 10
|
||||
MCP_TOOL_EXECUTION_TIMEOUT = 30
|
||||
MCP_DISCOVERY_TIMEOUT = 15
|
||||
MCP_MAX_RETRIES = 3
|
||||
MCP_CONNECTION_TIMEOUT: Final[int] = 10
|
||||
MCP_TOOL_EXECUTION_TIMEOUT: Final[int] = 30
|
||||
MCP_DISCOVERY_TIMEOUT: Final[int] = 15
|
||||
MCP_MAX_RETRIES: Final[int] = 3
|
||||
|
||||
# Simple in-memory cache for MCP tool schemas (duration: 5 minutes)
|
||||
_mcp_schema_cache = {}
|
||||
_cache_ttl = 300 # 5 minutes
|
||||
_mcp_schema_cache: dict[str, Any] = {}
|
||||
_cache_ttl: Final[int] = 300 # 5 minutes
|
||||
|
||||
|
||||
class Agent(BaseAgent):
|
||||
@@ -108,6 +118,8 @@ class Agent(BaseAgent):
|
||||
"""
|
||||
|
||||
_times_executed: int = PrivateAttr(default=0)
|
||||
_mcp_clients: list[Any] = PrivateAttr(default_factory=list)
|
||||
_last_messages: list[LLMMessage] = PrivateAttr(default_factory=list)
|
||||
max_execution_time: int | None = Field(
|
||||
default=None,
|
||||
description="Maximum execution time for an agent to execute a task",
|
||||
@@ -197,6 +209,10 @@ class Agent(BaseAgent):
|
||||
guardrail_max_retries: int = Field(
|
||||
default=3, description="Maximum number of retries when guardrail fails"
|
||||
)
|
||||
a2a: list[A2AConfig] | A2AConfig | None = Field(
|
||||
default=None,
|
||||
description="A2A (Agent-to-Agent) configuration for delegating tasks to remote agents. Can be a single A2AConfig or a dict mapping agent IDs to configs.",
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
def validate_from_repository(cls, v: Any) -> dict[str, Any] | None | Any: # noqa: N805
|
||||
@@ -305,17 +321,19 @@ class Agent(BaseAgent):
|
||||
# If the task requires output in JSON or Pydantic format,
|
||||
# append specific instructions to the task prompt to ensure
|
||||
# that the final answer does not include any code block markers
|
||||
if task.output_json or task.output_pydantic:
|
||||
# Skip this if task.response_model is set, as native structured outputs handle schema automatically
|
||||
if (task.output_json or task.output_pydantic) and not task.response_model:
|
||||
# Generate the schema based on the output format
|
||||
if task.output_json:
|
||||
# schema = json.dumps(task.output_json, indent=2)
|
||||
schema = generate_model_description(task.output_json)
|
||||
schema_dict = generate_model_description(task.output_json)
|
||||
schema = json.dumps(schema_dict["json_schema"]["schema"], indent=2)
|
||||
task_prompt += "\n" + self.i18n.slice(
|
||||
"formatted_task_instructions"
|
||||
).format(output_format=schema)
|
||||
|
||||
elif task.output_pydantic:
|
||||
schema = generate_model_description(task.output_pydantic)
|
||||
schema_dict = generate_model_description(task.output_pydantic)
|
||||
schema = json.dumps(schema_dict["json_schema"]["schema"], indent=2)
|
||||
task_prompt += "\n" + self.i18n.slice(
|
||||
"formatted_task_instructions"
|
||||
).format(output_format=schema)
|
||||
@@ -438,6 +456,13 @@ class Agent(BaseAgent):
|
||||
else:
|
||||
task_prompt = self._use_trained_data(task_prompt=task_prompt)
|
||||
|
||||
# Import agent events locally to avoid circular imports
|
||||
from crewai.events.types.agent_events import (
|
||||
AgentExecutionCompletedEvent,
|
||||
AgentExecutionErrorEvent,
|
||||
AgentExecutionStartedEvent,
|
||||
)
|
||||
|
||||
try:
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
@@ -513,6 +538,15 @@ class Agent(BaseAgent):
|
||||
self,
|
||||
event=AgentExecutionCompletedEvent(agent=self, task=task, output=result),
|
||||
)
|
||||
|
||||
self._last_messages = (
|
||||
self.agent_executor.messages.copy()
|
||||
if self.agent_executor and hasattr(self.agent_executor, "messages")
|
||||
else []
|
||||
)
|
||||
|
||||
self._cleanup_mcp_clients()
|
||||
|
||||
return result
|
||||
|
||||
def _execute_with_timeout(self, task_prompt: str, task: Task, timeout: int) -> Any:
|
||||
@@ -618,6 +652,7 @@ class Agent(BaseAgent):
|
||||
self._rpm_controller.check_or_wait if self._rpm_controller else None
|
||||
),
|
||||
callbacks=[TokenCalcHandler(self._token_process)],
|
||||
response_model=task.response_model if task else None,
|
||||
)
|
||||
|
||||
def get_delegation_tools(self, agents: list[BaseAgent]) -> list[BaseTool]:
|
||||
@@ -635,30 +670,70 @@ class Agent(BaseAgent):
|
||||
self._logger.log("error", f"Error getting platform tools: {e!s}")
|
||||
return []
|
||||
|
||||
def get_mcp_tools(self, mcps: list[str]) -> list[BaseTool]:
|
||||
"""Convert MCP server references to CrewAI tools."""
|
||||
def get_mcp_tools(self, mcps: list[str | MCPServerConfig]) -> list[BaseTool]:
|
||||
"""Convert MCP server references/configs to CrewAI tools.
|
||||
|
||||
Supports both string references (backwards compatible) and structured
|
||||
configuration objects (MCPServerStdio, MCPServerHTTP, MCPServerSSE).
|
||||
|
||||
Args:
|
||||
mcps: List of MCP server references (strings) or configurations.
|
||||
|
||||
Returns:
|
||||
List of BaseTool instances from MCP servers.
|
||||
"""
|
||||
all_tools = []
|
||||
clients = []
|
||||
|
||||
for mcp_ref in mcps:
|
||||
try:
|
||||
if mcp_ref.startswith("crewai-amp:"):
|
||||
tools = self._get_amp_mcp_tools(mcp_ref)
|
||||
elif mcp_ref.startswith("https://"):
|
||||
tools = self._get_external_mcp_tools(mcp_ref)
|
||||
else:
|
||||
continue
|
||||
for mcp_config in mcps:
|
||||
if isinstance(mcp_config, str):
|
||||
tools = self._get_mcp_tools_from_string(mcp_config)
|
||||
else:
|
||||
tools, client = self._get_native_mcp_tools(mcp_config)
|
||||
if client:
|
||||
clients.append(client)
|
||||
|
||||
all_tools.extend(tools)
|
||||
self._logger.log(
|
||||
"info", f"Successfully loaded {len(tools)} tools from {mcp_ref}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self._logger.log("warning", f"Skipping MCP {mcp_ref} due to error: {e}")
|
||||
continue
|
||||
all_tools.extend(tools)
|
||||
|
||||
# Store clients for cleanup
|
||||
self._mcp_clients.extend(clients)
|
||||
return all_tools
|
||||
|
||||
def _cleanup_mcp_clients(self) -> None:
|
||||
"""Cleanup MCP client connections after task execution."""
|
||||
if not self._mcp_clients:
|
||||
return
|
||||
|
||||
async def _disconnect_all() -> None:
|
||||
for client in self._mcp_clients:
|
||||
if client and hasattr(client, "connected") and client.connected:
|
||||
await client.disconnect()
|
||||
|
||||
try:
|
||||
asyncio.run(_disconnect_all())
|
||||
except Exception as e:
|
||||
self._logger.log("error", f"Error during MCP client cleanup: {e}")
|
||||
finally:
|
||||
self._mcp_clients.clear()
|
||||
|
||||
def _get_mcp_tools_from_string(self, mcp_ref: str) -> list[BaseTool]:
|
||||
"""Get tools from legacy string-based MCP references.
|
||||
|
||||
This method maintains backwards compatibility with string-based
|
||||
MCP references (https://... and crewai-amp:...).
|
||||
|
||||
Args:
|
||||
mcp_ref: String reference to MCP server.
|
||||
|
||||
Returns:
|
||||
List of BaseTool instances.
|
||||
"""
|
||||
if mcp_ref.startswith("crewai-amp:"):
|
||||
return self._get_amp_mcp_tools(mcp_ref)
|
||||
if mcp_ref.startswith("https://"):
|
||||
return self._get_external_mcp_tools(mcp_ref)
|
||||
return []
|
||||
|
||||
def _get_external_mcp_tools(self, mcp_ref: str) -> list[BaseTool]:
|
||||
"""Get tools from external HTTPS MCP server with graceful error handling."""
|
||||
from crewai.tools.mcp_tool_wrapper import MCPToolWrapper
|
||||
@@ -709,7 +784,7 @@ class Agent(BaseAgent):
|
||||
f"Specific tool '{specific_tool}' not found on MCP server: {server_url}",
|
||||
)
|
||||
|
||||
return tools
|
||||
return cast(list[BaseTool], tools)
|
||||
|
||||
except Exception as e:
|
||||
self._logger.log(
|
||||
@@ -717,6 +792,164 @@ class Agent(BaseAgent):
|
||||
)
|
||||
return []
|
||||
|
||||
def _get_native_mcp_tools(
|
||||
self, mcp_config: MCPServerConfig
|
||||
) -> tuple[list[BaseTool], Any | None]:
|
||||
"""Get tools from MCP server using structured configuration.
|
||||
|
||||
This method creates an MCP client based on the configuration type,
|
||||
connects to the server, discovers tools, applies filtering, and
|
||||
returns wrapped tools along with the client instance for cleanup.
|
||||
|
||||
Args:
|
||||
mcp_config: MCP server configuration (MCPServerStdio, MCPServerHTTP, or MCPServerSSE).
|
||||
|
||||
Returns:
|
||||
Tuple of (list of BaseTool instances, MCPClient instance for cleanup).
|
||||
"""
|
||||
from crewai.tools.base_tool import BaseTool
|
||||
from crewai.tools.mcp_native_tool import MCPNativeTool
|
||||
|
||||
if isinstance(mcp_config, MCPServerStdio):
|
||||
transport = StdioTransport(
|
||||
command=mcp_config.command,
|
||||
args=mcp_config.args,
|
||||
env=mcp_config.env,
|
||||
)
|
||||
server_name = f"{mcp_config.command}_{'_'.join(mcp_config.args)}"
|
||||
elif isinstance(mcp_config, MCPServerHTTP):
|
||||
transport = HTTPTransport(
|
||||
url=mcp_config.url,
|
||||
headers=mcp_config.headers,
|
||||
streamable=mcp_config.streamable,
|
||||
)
|
||||
server_name = self._extract_server_name(mcp_config.url)
|
||||
elif isinstance(mcp_config, MCPServerSSE):
|
||||
transport = SSETransport(
|
||||
url=mcp_config.url,
|
||||
headers=mcp_config.headers,
|
||||
)
|
||||
server_name = self._extract_server_name(mcp_config.url)
|
||||
else:
|
||||
raise ValueError(f"Unsupported MCP server config type: {type(mcp_config)}")
|
||||
|
||||
client = MCPClient(
|
||||
transport=transport,
|
||||
cache_tools_list=mcp_config.cache_tools_list,
|
||||
)
|
||||
|
||||
async def _setup_client_and_list_tools() -> list[dict[str, Any]]:
|
||||
"""Async helper to connect and list tools in same event loop."""
|
||||
|
||||
try:
|
||||
if not client.connected:
|
||||
await client.connect()
|
||||
|
||||
tools_list = await client.list_tools()
|
||||
|
||||
try:
|
||||
await client.disconnect()
|
||||
# Small delay to allow background tasks to finish cleanup
|
||||
# This helps prevent "cancel scope in different task" errors
|
||||
# when asyncio.run() closes the event loop
|
||||
await asyncio.sleep(0.1)
|
||||
except Exception as e:
|
||||
self._logger.log("error", f"Error during disconnect: {e}")
|
||||
|
||||
return tools_list
|
||||
except Exception as e:
|
||||
if client.connected:
|
||||
await client.disconnect()
|
||||
await asyncio.sleep(0.1)
|
||||
raise RuntimeError(
|
||||
f"Error during setup client and list tools: {e}"
|
||||
) from e
|
||||
|
||||
try:
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
import concurrent.futures
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(
|
||||
asyncio.run, _setup_client_and_list_tools()
|
||||
)
|
||||
tools_list = future.result()
|
||||
except RuntimeError:
|
||||
try:
|
||||
tools_list = asyncio.run(_setup_client_and_list_tools())
|
||||
except RuntimeError as e:
|
||||
error_msg = str(e).lower()
|
||||
if "cancel scope" in error_msg or "task" in error_msg:
|
||||
raise ConnectionError(
|
||||
"MCP connection failed due to event loop cleanup issues. "
|
||||
"This may be due to authentication errors or server unavailability."
|
||||
) from e
|
||||
except asyncio.CancelledError as e:
|
||||
raise ConnectionError(
|
||||
"MCP connection was cancelled. This may indicate an authentication "
|
||||
"error or server unavailability."
|
||||
) from e
|
||||
|
||||
if mcp_config.tool_filter:
|
||||
filtered_tools = []
|
||||
for tool in tools_list:
|
||||
if callable(mcp_config.tool_filter):
|
||||
try:
|
||||
from crewai.mcp.filters import ToolFilterContext
|
||||
|
||||
context = ToolFilterContext(
|
||||
agent=self,
|
||||
server_name=server_name,
|
||||
run_context=None,
|
||||
)
|
||||
if mcp_config.tool_filter(context, tool):
|
||||
filtered_tools.append(tool)
|
||||
except (TypeError, AttributeError):
|
||||
if mcp_config.tool_filter(tool):
|
||||
filtered_tools.append(tool)
|
||||
else:
|
||||
# Not callable - include tool
|
||||
filtered_tools.append(tool)
|
||||
tools_list = filtered_tools
|
||||
|
||||
tools = []
|
||||
for tool_def in tools_list:
|
||||
tool_name = tool_def.get("name", "")
|
||||
if not tool_name:
|
||||
continue
|
||||
|
||||
# Convert inputSchema to Pydantic model if present
|
||||
args_schema = None
|
||||
if tool_def.get("inputSchema"):
|
||||
args_schema = self._json_schema_to_pydantic(
|
||||
tool_name, tool_def["inputSchema"]
|
||||
)
|
||||
|
||||
tool_schema = {
|
||||
"description": tool_def.get("description", ""),
|
||||
"args_schema": args_schema,
|
||||
}
|
||||
|
||||
try:
|
||||
native_tool = MCPNativeTool(
|
||||
mcp_client=client,
|
||||
tool_name=tool_name,
|
||||
tool_schema=tool_schema,
|
||||
server_name=server_name,
|
||||
)
|
||||
tools.append(native_tool)
|
||||
except Exception as e:
|
||||
self._logger.log("error", f"Failed to create native MCP tool: {e}")
|
||||
continue
|
||||
|
||||
return cast(list[BaseTool], tools), client
|
||||
except Exception as e:
|
||||
if client.connected:
|
||||
asyncio.run(client.disconnect())
|
||||
|
||||
raise RuntimeError(f"Failed to get native MCP tools: {e}") from e
|
||||
|
||||
def _get_amp_mcp_tools(self, amp_ref: str) -> list[BaseTool]:
|
||||
"""Get tools from CrewAI AMP MCP marketplace."""
|
||||
# Parse: "crewai-amp:mcp-name" or "crewai-amp:mcp-name#tool_name"
|
||||
@@ -739,9 +972,9 @@ class Agent(BaseAgent):
|
||||
|
||||
return tools
|
||||
|
||||
def _extract_server_name(self, server_url: str) -> str:
|
||||
@staticmethod
|
||||
def _extract_server_name(server_url: str) -> str:
|
||||
"""Extract clean server name from URL for tool prefixing."""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(server_url)
|
||||
domain = parsed.netloc.replace(".", "_")
|
||||
@@ -778,7 +1011,9 @@ class Agent(BaseAgent):
|
||||
)
|
||||
return {}
|
||||
|
||||
async def _get_mcp_tool_schemas_async(self, server_params: dict) -> dict[str, dict]:
|
||||
async def _get_mcp_tool_schemas_async(
|
||||
self, server_params: dict[str, Any]
|
||||
) -> dict[str, dict]:
|
||||
"""Async implementation of MCP tool schema retrieval with timeouts and retries."""
|
||||
server_url = server_params["url"]
|
||||
return await self._retry_mcp_discovery(
|
||||
@@ -787,7 +1022,7 @@ class Agent(BaseAgent):
|
||||
|
||||
async def _retry_mcp_discovery(
|
||||
self, operation_func, server_url: str
|
||||
) -> dict[str, dict]:
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""Retry MCP discovery operation with exponential backoff, avoiding try-except in loop."""
|
||||
last_error = None
|
||||
|
||||
@@ -815,9 +1050,10 @@ class Agent(BaseAgent):
|
||||
f"Failed to discover MCP tools after {MCP_MAX_RETRIES} attempts: {last_error}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def _attempt_mcp_discovery(
|
||||
self, operation_func, server_url: str
|
||||
) -> tuple[dict[str, dict] | None, str, bool]:
|
||||
operation_func, server_url: str
|
||||
) -> tuple[dict[str, dict[str, Any]] | None, str, bool]:
|
||||
"""Attempt single MCP discovery operation and return (result, error_message, should_retry)."""
|
||||
try:
|
||||
result = await operation_func(server_url)
|
||||
@@ -851,13 +1087,13 @@ class Agent(BaseAgent):
|
||||
|
||||
async def _discover_mcp_tools_with_timeout(
|
||||
self, server_url: str
|
||||
) -> dict[str, dict]:
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""Discover MCP tools with timeout wrapper."""
|
||||
return await asyncio.wait_for(
|
||||
self._discover_mcp_tools(server_url), timeout=MCP_DISCOVERY_TIMEOUT
|
||||
)
|
||||
|
||||
async def _discover_mcp_tools(self, server_url: str) -> dict[str, dict]:
|
||||
async def _discover_mcp_tools(self, server_url: str) -> dict[str, dict[str, Any]]:
|
||||
"""Discover tools from MCP server with proper timeout handling."""
|
||||
from mcp import ClientSession
|
||||
from mcp.client.streamable_http import streamablehttp_client
|
||||
@@ -889,7 +1125,9 @@ class Agent(BaseAgent):
|
||||
}
|
||||
return schemas
|
||||
|
||||
def _json_schema_to_pydantic(self, tool_name: str, json_schema: dict) -> type:
|
||||
def _json_schema_to_pydantic(
|
||||
self, tool_name: str, json_schema: dict[str, Any]
|
||||
) -> type:
|
||||
"""Convert JSON Schema to Pydantic model for tool arguments.
|
||||
|
||||
Args:
|
||||
@@ -926,7 +1164,7 @@ class Agent(BaseAgent):
|
||||
model_name = f"{tool_name.replace('-', '_').replace(' ', '_')}Schema"
|
||||
return create_model(model_name, **field_definitions)
|
||||
|
||||
def _json_type_to_python(self, field_schema: dict) -> type:
|
||||
def _json_type_to_python(self, field_schema: dict[str, Any]) -> type:
|
||||
"""Convert JSON Schema type to Python type.
|
||||
|
||||
Args:
|
||||
@@ -935,7 +1173,6 @@ class Agent(BaseAgent):
|
||||
Returns:
|
||||
Python type
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
json_type = field_schema.get("type")
|
||||
|
||||
@@ -965,13 +1202,15 @@ class Agent(BaseAgent):
|
||||
|
||||
return type_mapping.get(json_type, Any)
|
||||
|
||||
def _fetch_amp_mcp_servers(self, mcp_name: str) -> list[dict]:
|
||||
@staticmethod
|
||||
def _fetch_amp_mcp_servers(mcp_name: str) -> list[dict]:
|
||||
"""Fetch MCP server configurations from CrewAI AMP API."""
|
||||
# TODO: Implement AMP API call to "integrations/mcps" endpoint
|
||||
# Should return list of server configs with URLs
|
||||
return []
|
||||
|
||||
def get_multimodal_tools(self) -> Sequence[BaseTool]:
|
||||
@staticmethod
|
||||
def get_multimodal_tools() -> Sequence[BaseTool]:
|
||||
from crewai.tools.agent_tools.add_image_tool import AddImageTool
|
||||
|
||||
return [AddImageTool()]
|
||||
@@ -991,8 +1230,9 @@ class Agent(BaseAgent):
|
||||
)
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_output_converter(
|
||||
self, llm: BaseLLM, text: str, model: type[BaseModel], instructions: str
|
||||
llm: BaseLLM, text: str, model: type[BaseModel], instructions: str
|
||||
) -> Converter:
|
||||
return Converter(llm=llm, text=text, model=model, instructions=instructions)
|
||||
|
||||
@@ -1022,7 +1262,8 @@ class Agent(BaseAgent):
|
||||
)
|
||||
return task_prompt
|
||||
|
||||
def _render_text_description(self, tools: list[Any]) -> str:
|
||||
@staticmethod
|
||||
def _render_text_description(tools: list[Any]) -> str:
|
||||
"""Render the tool name and description in plain text.
|
||||
|
||||
Output will be in the format of:
|
||||
@@ -1107,6 +1348,15 @@ class Agent(BaseAgent):
|
||||
def set_fingerprint(self, fingerprint: Fingerprint) -> None:
|
||||
self.security_config.fingerprint = fingerprint
|
||||
|
||||
@property
|
||||
def last_messages(self) -> list[LLMMessage]:
|
||||
"""Get messages from the last task execution.
|
||||
|
||||
Returns:
|
||||
List of LLM messages from the most recent task execution.
|
||||
"""
|
||||
return self._last_messages
|
||||
|
||||
def _get_knowledge_search_query(self, task_prompt: str, task: Task) -> str | None:
|
||||
"""Generate a search query for the knowledge base based on the task description."""
|
||||
crewai_event_bus.emit(
|
||||
0
lib/crewai/src/crewai/agent/internal/__init__.py
Normal file
0
lib/crewai/src/crewai/agent/internal/__init__.py
Normal file
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user