mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-03-13 15:28:14 +00:00
Compare commits
6 Commits
1.2.1
...
devin/1761
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
312c6d73bc | ||
|
|
26906113fe | ||
|
|
2e9eb8c32d | ||
|
|
4ebb5114ed | ||
|
|
70b083945f | ||
|
|
410db1ff39 |
@@ -93,11 +93,15 @@ After running the application, you can view the traces in [Datadog LLM Observabi
|
||||
|
||||
Clicking on a trace will show you the details of the trace, including total tokens used, number of LLM calls, models used, and estimated cost. Clicking into a specific span will narrow down these details, and show related input, output, and metadata.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-1.png" alt="Datadog LLM Observability Trace View" />
|
||||
</Frame>
|
||||
|
||||
Additionally, you can view the execution graph view of the trace, which shows the control and data flow of the trace, which will scale with larger agents to show handoffs and relationships between LLM calls, tool calls, and agent interactions.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-2.png" alt="Datadog LLM Observability Agent Execution Flow View" />
|
||||
</Frame>
|
||||
|
||||
## References
|
||||
|
||||
|
||||
@@ -23,13 +23,15 @@ Here's a minimal example of how to use the tool:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import QdrantVectorSearchTool
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Initialize the tool
|
||||
# Initialize the tool with QdrantConfig
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
)
|
||||
)
|
||||
|
||||
# Create an agent that uses the tool
|
||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
||||
def get_openai_embedding(text):
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model="text-embedding-3-small"
|
||||
model="text-embedding-3-large"
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||
# Extract text from PDF
|
||||
text_chunks = extract_text_from_pdf(pdf_path)
|
||||
|
||||
|
||||
# Create Qdrant collection
|
||||
if qdrant.collection_exists(collection_name):
|
||||
qdrant.delete_collection(collection_name)
|
||||
qdrant.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
||||
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||
)
|
||||
|
||||
# Store embeddings
|
||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||
|
||||
# Initialize Qdrant search tool
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
)
|
||||
)
|
||||
|
||||
# Create CrewAI agents
|
||||
search_agent = Agent(
|
||||
role="Senior Semantic Search Agent",
|
||||
goal="Find and analyze documents based on semantic search",
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
information using semantic search in a Qdrant database.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
||||
answer_agent = Agent(
|
||||
role="Senior Answer Assistant",
|
||||
goal="Generate answers to questions based on the context provided",
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
answers to questions based on the context provided.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -180,21 +186,82 @@ print(result)
|
||||
## Tool Parameters
|
||||
|
||||
### Required Parameters
|
||||
- `qdrant_url` (str): The URL of your Qdrant server
|
||||
- `qdrant_api_key` (str): API key for authentication with Qdrant
|
||||
- `collection_name` (str): Name of the Qdrant collection to search
|
||||
- `qdrant_config` (QdrantConfig): Configuration object containing all Qdrant settings
|
||||
|
||||
### Optional Parameters
|
||||
### QdrantConfig Parameters
|
||||
- `qdrant_url` (str): The URL of your Qdrant server
|
||||
- `qdrant_api_key` (str, optional): API key for authentication with Qdrant
|
||||
- `collection_name` (str): Name of the Qdrant collection to search
|
||||
- `limit` (int): Maximum number of results to return (default: 3)
|
||||
- `score_threshold` (float): Minimum similarity score threshold (default: 0.35)
|
||||
- `filter` (Any, optional): Qdrant Filter instance for advanced filtering (default: None)
|
||||
|
||||
### Optional Tool Parameters
|
||||
- `custom_embedding_fn` (Callable[[str], list[float]]): Custom function for text vectorization
|
||||
- `qdrant_package` (str): Base package path for Qdrant (default: "qdrant_client")
|
||||
- `client` (Any): Pre-initialized Qdrant client (optional)
|
||||
|
||||
## Advanced Filtering
|
||||
|
||||
The QdrantVectorSearchTool supports powerful filtering capabilities to refine your search results:
|
||||
|
||||
### Dynamic Filtering
|
||||
Use `filter_by` and `filter_value` parameters in your search to filter results on-the-fly:
|
||||
|
||||
```python
|
||||
# Agent will use these parameters when calling the tool
|
||||
# The tool schema accepts filter_by and filter_value
|
||||
# Example: search with category filter
|
||||
# Results will be filtered where category == "technology"
|
||||
```
|
||||
|
||||
### Preset Filters with QdrantConfig
|
||||
For complex filtering, use Qdrant Filter instances in your configuration:
|
||||
|
||||
```python
|
||||
from qdrant_client.http import models as qmodels
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Create a filter for specific conditions
|
||||
preset_filter = qmodels.Filter(
|
||||
must=[
|
||||
qmodels.FieldCondition(
|
||||
key="category",
|
||||
match=qmodels.MatchValue(value="research")
|
||||
),
|
||||
qmodels.FieldCondition(
|
||||
key="year",
|
||||
match=qmodels.MatchValue(value=2024)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Initialize tool with preset filter
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
filter=preset_filter # Preset filter applied to all searches
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Combining Filters
|
||||
The tool automatically combines preset filters from `QdrantConfig` with dynamic filters from `filter_by` and `filter_value`:
|
||||
|
||||
```python
|
||||
# If QdrantConfig has a preset filter for category="research"
|
||||
# And the search uses filter_by="year", filter_value=2024
|
||||
# Both filters will be combined (AND logic)
|
||||
```
|
||||
|
||||
## Search Parameters
|
||||
|
||||
The tool accepts these parameters in its schema:
|
||||
- `query` (str): The search query to find similar documents
|
||||
- `filter_by` (str, optional): Metadata field to filter on
|
||||
- `filter_value` (str, optional): Value to filter by
|
||||
- `filter_value` (Any, optional): Value to filter by
|
||||
|
||||
## Return Format
|
||||
|
||||
@@ -214,7 +281,7 @@ The tool returns results in JSON format:
|
||||
|
||||
## Default Embedding
|
||||
|
||||
By default, the tool uses OpenAI's `text-embedding-3-small` model for vectorization. This requires:
|
||||
By default, the tool uses OpenAI's `text-embedding-3-large` model for vectorization. This requires:
|
||||
- OpenAI API key set in environment: `OPENAI_API_KEY`
|
||||
|
||||
## Custom Embeddings
|
||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
||||
# Tokenize and get model outputs
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||
outputs = model(**inputs)
|
||||
|
||||
|
||||
# Use mean pooling to get text embedding
|
||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||
|
||||
|
||||
# Convert to list of floats and return
|
||||
return embeddings[0].tolist()
|
||||
|
||||
# Use custom embeddings with the tool
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection"
|
||||
),
|
||||
custom_embedding_fn=custom_embeddings # Pass your custom function
|
||||
)
|
||||
```
|
||||
@@ -269,4 +340,4 @@ Required environment variables:
|
||||
```bash
|
||||
export QDRANT_URL="your_qdrant_url" # If not provided in constructor
|
||||
export QDRANT_API_KEY="your_api_key" # If not provided in constructor
|
||||
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
||||
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
||||
|
||||
@@ -54,25 +54,25 @@ The following parameters can be used to customize the `CSVSearchTool`'s behavior
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = CSVSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -46,23 +46,25 @@ tool = DirectorySearchTool(directory='/path/to/directory')
|
||||
The DirectorySearchTool uses OpenAI for embeddings and summarization by default. Customization options for these settings include changing the model provider and configuration, enhancing flexibility for advanced users.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DirectorySearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # Options include ollama, google, anthropic, llama2, and more
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Additional configurations here
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -56,25 +56,25 @@ The following parameters can be used to customize the `DOCXSearchTool`'s behavio
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DOCXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -48,27 +48,25 @@ tool = MDXSearchTool(mdx='path/to/your/document.mdx')
|
||||
The tool defaults to using OpenAI for embeddings and summarization. For customization, utilize a configuration dictionary as shown below:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = MDXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # Options include google, openai, anthropic, llama2, etc.
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Optional parameters can be included here.
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# Optional title for the embeddings can be added here.
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -45,28 +45,64 @@ tool = PDFSearchTool(pdf='path/to/your/document.pdf')
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows. Note: a vector database is required because generated embeddings must be stored and queried from a vectordb.
|
||||
|
||||
```python Code
|
||||
from crewai_tools import PDFSearchTool
|
||||
|
||||
# - embedding_model (required): choose provider + provider-specific config
|
||||
# - vectordb (required): choose vector DB and pass its config
|
||||
|
||||
tool = PDFSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
# Supported providers: "openai", "azure", "google-generativeai", "google-vertex",
|
||||
# "voyageai", "cohere", "huggingface", "jina", "sentence-transformer",
|
||||
# "text2vec", "ollama", "openclip", "instructor", "onnx", "roboflow", "watsonx", "custom"
|
||||
"provider": "openai", # or: "google-generativeai", "cohere", "ollama", ...
|
||||
"config": {
|
||||
# Model identifier for the chosen provider. "model" will be auto-mapped to "model_name" internally.
|
||||
"model": "text-embedding-3-small",
|
||||
# Optional: API key. If omitted, the tool will use provider-specific env vars when available
|
||||
# (e.g., OPENAI_API_KEY for provider="openai").
|
||||
# "api_key": "sk-...",
|
||||
|
||||
# Provider-specific examples:
|
||||
# --- Google Generative AI ---
|
||||
# (Set provider="google-generativeai" above)
|
||||
# "model": "models/embedding-001",
|
||||
# "task_type": "retrieval_document",
|
||||
# "title": "Embeddings",
|
||||
|
||||
# --- Cohere ---
|
||||
# (Set provider="cohere" above)
|
||||
# "model": "embed-english-v3.0",
|
||||
|
||||
# --- Ollama (local) ---
|
||||
# (Set provider="ollama" above)
|
||||
# "model": "nomic-embed-text",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# For ChromaDB: pass "settings" (chromadb.config.Settings) or rely on defaults.
|
||||
# Example (uncomment and import):
|
||||
# from chromadb.config import Settings
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# For Qdrant: pass "vectors_config" (qdrant_client.models.VectorParams).
|
||||
# Example (uncomment and import):
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Note: collection name is controlled by the tool (default: "rag_tool_collection"), not set here.
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -57,25 +57,41 @@ By default, the tool uses OpenAI for both embeddings and summarization.
|
||||
To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = TXTSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# Required: embeddings provider + config
|
||||
"embedding_model": {
|
||||
"provider": "openai", # or google-generativeai, cohere, ollama, ...
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...", # optional if env var is set
|
||||
# Provider examples:
|
||||
# Google → model: "models/embedding-001", task_type: "retrieval_document"
|
||||
# Cohere → model: "embed-english-v3.0"
|
||||
# Ollama → model: "nomic-embed-text"
|
||||
},
|
||||
},
|
||||
|
||||
# Required: vector database config
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# Chroma settings (optional persistence)
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Qdrant vector params example:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Note: collection name is controlled by the tool (default: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -54,25 +54,25 @@ It is an optional parameter during the tool's initialization but must be provide
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = XMLSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -93,11 +93,15 @@ ddtrace-run python crewai_agent.py
|
||||
|
||||
트레이스를 클릭하면 사용된 총 토큰, LLM 호출 수, 사용된 모델, 예상 비용 등 트레이스에 대한 세부 정보가 표시됩니다. 특정 스팬(span)을 클릭하면 이러한 세부 정보의 범위가 좁혀지고 관련 입력, 출력 및 메타데이터가 표시됩니다.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-1.png" alt="Datadog LLM 옵저버빌리티 추적 보기" />
|
||||
</Frame>
|
||||
|
||||
또한, 트레이스의 제어 및 데이터 흐름을 보여주는 트레이스의 실행 그래프 보기를 볼 수 있으며, 이는 더 큰 에이전트로 확장하여 LLM 호출, 도구 호출 및 에이전트 상호 작용 간의 핸드오프와 관계를 보여줍니다.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-2.png" alt="Datadog LLM Observability 에이전트 실행 흐름 보기" />
|
||||
</Frame>
|
||||
|
||||
## 참조
|
||||
|
||||
|
||||
@@ -23,13 +23,15 @@ uv add qdrant-client
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import QdrantVectorSearchTool
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Initialize the tool
|
||||
# QdrantConfig로 도구 초기화
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
)
|
||||
)
|
||||
|
||||
# Create an agent that uses the tool
|
||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
||||
def get_openai_embedding(text):
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model="text-embedding-3-small"
|
||||
model="text-embedding-3-large"
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||
# Extract text from PDF
|
||||
text_chunks = extract_text_from_pdf(pdf_path)
|
||||
|
||||
|
||||
# Create Qdrant collection
|
||||
if qdrant.collection_exists(collection_name):
|
||||
qdrant.delete_collection(collection_name)
|
||||
qdrant.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
||||
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||
)
|
||||
|
||||
# Store embeddings
|
||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||
|
||||
# Initialize Qdrant search tool
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
)
|
||||
)
|
||||
|
||||
# Create CrewAI agents
|
||||
search_agent = Agent(
|
||||
role="Senior Semantic Search Agent",
|
||||
goal="Find and analyze documents based on semantic search",
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
information using semantic search in a Qdrant database.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
||||
answer_agent = Agent(
|
||||
role="Senior Answer Assistant",
|
||||
goal="Generate answers to questions based on the context provided",
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
answers to questions based on the context provided.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -180,21 +186,82 @@ print(result)
|
||||
## 도구 매개변수
|
||||
|
||||
### 필수 파라미터
|
||||
- `qdrant_url` (str): Qdrant 서버의 URL
|
||||
- `qdrant_api_key` (str): Qdrant 인증을 위한 API 키
|
||||
- `collection_name` (str): 검색할 Qdrant 컬렉션의 이름
|
||||
- `qdrant_config` (QdrantConfig): 모든 Qdrant 설정을 포함하는 구성 객체
|
||||
|
||||
### 선택적 매개변수
|
||||
### QdrantConfig 매개변수
|
||||
- `qdrant_url` (str): Qdrant 서버의 URL
|
||||
- `qdrant_api_key` (str, 선택 사항): Qdrant 인증을 위한 API 키
|
||||
- `collection_name` (str): 검색할 Qdrant 컬렉션의 이름
|
||||
- `limit` (int): 반환할 최대 결과 수 (기본값: 3)
|
||||
- `score_threshold` (float): 최소 유사도 점수 임계값 (기본값: 0.35)
|
||||
- `filter` (Any, 선택 사항): 고급 필터링을 위한 Qdrant Filter 인스턴스 (기본값: None)
|
||||
|
||||
### 선택적 도구 매개변수
|
||||
- `custom_embedding_fn` (Callable[[str], list[float]]): 텍스트 벡터화를 위한 사용자 지정 함수
|
||||
- `qdrant_package` (str): Qdrant의 기본 패키지 경로 (기본값: "qdrant_client")
|
||||
- `client` (Any): 사전 초기화된 Qdrant 클라이언트 (선택 사항)
|
||||
|
||||
## 고급 필터링
|
||||
|
||||
QdrantVectorSearchTool은 검색 결과를 세밀하게 조정할 수 있는 강력한 필터링 기능을 지원합니다:
|
||||
|
||||
### 동적 필터링
|
||||
검색 시 `filter_by` 및 `filter_value` 매개변수를 사용하여 즉석에서 결과를 필터링할 수 있습니다:
|
||||
|
||||
```python
|
||||
# 에이전트는 도구를 호출할 때 이러한 매개변수를 사용합니다
|
||||
# 도구 스키마는 filter_by 및 filter_value를 허용합니다
|
||||
# 예시: 카테고리 필터를 사용한 검색
|
||||
# 결과는 category == "기술"인 항목으로 필터링됩니다
|
||||
```
|
||||
|
||||
### QdrantConfig를 사용한 사전 설정 필터
|
||||
복잡한 필터링의 경우 구성에서 Qdrant Filter 인스턴스를 사용하세요:
|
||||
|
||||
```python
|
||||
from qdrant_client.http import models as qmodels
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# 특정 조건에 대한 필터 생성
|
||||
preset_filter = qmodels.Filter(
|
||||
must=[
|
||||
qmodels.FieldCondition(
|
||||
key="category",
|
||||
match=qmodels.MatchValue(value="research")
|
||||
),
|
||||
qmodels.FieldCondition(
|
||||
key="year",
|
||||
match=qmodels.MatchValue(value=2024)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# 사전 설정 필터로 도구 초기화
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
filter=preset_filter # 모든 검색에 적용되는 사전 설정 필터
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### 필터 결합
|
||||
도구는 `QdrantConfig`의 사전 설정 필터와 `filter_by` 및 `filter_value`의 동적 필터를 자동으로 결합합니다:
|
||||
|
||||
```python
|
||||
# QdrantConfig에 category="research"에 대한 사전 설정 필터가 있고
|
||||
# 검색에서 filter_by="year", filter_value=2024를 사용하는 경우
|
||||
# 두 필터가 모두 결합됩니다 (AND 논리)
|
||||
```
|
||||
|
||||
## 검색 매개변수
|
||||
|
||||
이 도구는 스키마에서 다음과 같은 매개변수를 허용합니다:
|
||||
- `query` (str): 유사한 문서를 찾기 위한 검색 쿼리
|
||||
- `filter_by` (str, 선택 사항): 필터링할 메타데이터 필드
|
||||
- `filter_value` (str, 선택 사항): 필터 기준 값
|
||||
- `filter_value` (Any, 선택 사항): 필터 기준 값
|
||||
|
||||
## 반환 형식
|
||||
|
||||
@@ -214,7 +281,7 @@ print(result)
|
||||
|
||||
## 기본 임베딩
|
||||
|
||||
기본적으로, 이 도구는 벡터화를 위해 OpenAI의 `text-embedding-3-small` 모델을 사용합니다. 이를 위해서는 다음이 필요합니다:
|
||||
기본적으로, 이 도구는 벡터화를 위해 OpenAI의 `text-embedding-3-large` 모델을 사용합니다. 이를 위해서는 다음이 필요합니다:
|
||||
- 환경변수에 설정된 OpenAI API 키: `OPENAI_API_KEY`
|
||||
|
||||
## 커스텀 임베딩
|
||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
||||
# Tokenize and get model outputs
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||
outputs = model(**inputs)
|
||||
|
||||
|
||||
# Use mean pooling to get text embedding
|
||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||
|
||||
|
||||
# Convert to list of floats and return
|
||||
return embeddings[0].tolist()
|
||||
|
||||
# Use custom embeddings with the tool
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection"
|
||||
),
|
||||
custom_embedding_fn=custom_embeddings # Pass your custom function
|
||||
)
|
||||
```
|
||||
@@ -270,4 +341,4 @@ tool = QdrantVectorSearchTool(
|
||||
export QDRANT_URL="your_qdrant_url" # If not provided in constructor
|
||||
export QDRANT_API_KEY="your_api_key" # If not provided in constructor
|
||||
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
||||
```
|
||||
```
|
||||
|
||||
@@ -54,25 +54,25 @@ tool = CSVSearchTool()
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 사용자 지정하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = CSVSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -46,23 +46,25 @@ tool = DirectorySearchTool(directory='/path/to/directory')
|
||||
DirectorySearchTool은 기본적으로 OpenAI를 사용하여 임베딩 및 요약을 수행합니다. 이 설정의 커스터마이즈 옵션에는 모델 공급자 및 구성을 변경하는 것이 포함되어 있어, 고급 사용자를 위한 유연성을 향상시킵니다.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DirectorySearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # Options include ollama, google, anthropic, llama2, and more
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Additional configurations here
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -56,25 +56,25 @@ tool = DOCXSearchTool(docx='path/to/your/document.docx')
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 커스터마이즈하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DOCXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -48,27 +48,25 @@ tool = MDXSearchTool(mdx='path/to/your/document.mdx')
|
||||
이 도구는 기본적으로 임베딩과 요약을 위해 OpenAI를 사용합니다. 커스터마이징을 위해 아래와 같이 설정 딕셔너리를 사용할 수 있습니다.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = MDXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # 옵션에는 google, openai, anthropic, llama2 등이 있습니다.
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# 선택적 파라미터를 여기에 포함할 수 있습니다.
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # 또는 openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# 임베딩에 대한 선택적 제목을 여기에 추가할 수 있습니다.
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -45,28 +45,60 @@ tool = PDFSearchTool(pdf='path/to/your/document.pdf')
|
||||
|
||||
## 커스텀 모델 및 임베딩
|
||||
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 커스터마이즈하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다:
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 커스터마이즈하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다. 참고: 임베딩은 벡터DB에 저장되어야 하므로 vectordb 설정이 필요합니다.
|
||||
|
||||
```python Code
|
||||
from crewai_tools import PDFSearchTool
|
||||
from chromadb.config import Settings # Chroma 영속성 설정
|
||||
|
||||
tool = PDFSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# 필수: 임베딩 제공자와 설정
|
||||
"embedding_model": {
|
||||
# 사용 가능 공급자: "openai", "azure", "google-generativeai", "google-vertex",
|
||||
# "voyageai", "cohere", "huggingface", "jina", "sentence-transformer",
|
||||
# "text2vec", "ollama", "openclip", "instructor", "onnx", "roboflow", "watsonx", "custom"
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
# "model" 키는 내부적으로 "model_name"으로 매핑됩니다.
|
||||
"model": "text-embedding-3-small",
|
||||
# 선택: API 키 (미설정 시 환경변수 사용)
|
||||
# "api_key": "sk-...",
|
||||
|
||||
# 공급자별 예시
|
||||
# --- Google ---
|
||||
# (provider를 "google-generativeai"로 설정)
|
||||
# "model": "models/embedding-001",
|
||||
# "task_type": "retrieval_document",
|
||||
|
||||
# --- Cohere ---
|
||||
# (provider를 "cohere"로 설정)
|
||||
# "model": "embed-english-v3.0",
|
||||
|
||||
# --- Ollama(로컬) ---
|
||||
# (provider를 "ollama"로 설정)
|
||||
# "model": "nomic-embed-text",
|
||||
},
|
||||
},
|
||||
|
||||
# 필수: 벡터DB 설정
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# Chroma 설정 예시
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Qdrant 설정 예시
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# 참고: 컬렉션 이름은 도구에서 관리합니다(기본값: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -57,25 +57,34 @@ tool = TXTSearchTool(txt='path/to/text/file.txt')
|
||||
모델을 커스터마이징하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = TXTSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# 필수: 임베딩 제공자 + 설정
|
||||
"embedding_model": {
|
||||
"provider": "openai", # 또는 google-generativeai, cohere, ollama 등
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...", # 환경변수 사용 시 생략 가능
|
||||
# 공급자별 예시: Google → model: "models/embedding-001", task_type: "retrieval_document"
|
||||
},
|
||||
},
|
||||
|
||||
# 필수: 벡터DB 설정
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# Chroma 설정(영속성 예시)
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
|
||||
# Qdrant 벡터 파라미터 예시:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# 참고: 컬렉션 이름은 도구에서 관리합니다(기본값: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -54,25 +54,25 @@ tool = XMLSearchTool(xml='path/to/your/xmlfile.xml')
|
||||
기본적으로 이 도구는 임베딩과 요약 모두에 OpenAI를 사용합니다. 모델을 커스터마이징하려면 다음과 같이 config 딕셔너리를 사용할 수 있습니다.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = XMLSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # 또는 "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -93,11 +93,14 @@ Depois de executar o aplicativo, você pode visualizar os traços na [Datadog LL
|
||||
|
||||
Ao clicar em um rastreamento, você verá os detalhes do rastreamento, incluindo o total de tokens usados, o número de chamadas LLM, os modelos usados e o custo estimado. Clicar em um intervalo específico reduzirá esses detalhes e mostrará a entrada, a saída e os metadados relacionados.
|
||||
|
||||

|
||||
|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-1.png" alt="Visualização do rastreamento de observabilidade do Datadog LLM" />
|
||||
</Frame>
|
||||
Além disso, você pode visualizar a visualização do gráfico de execução do rastreamento, que mostra o controle e o fluxo de dados do rastreamento, que será dimensionado com agentes maiores para mostrar transferências e relacionamentos entre chamadas LLM, chamadas de ferramentas e interações de agentes.
|
||||
|
||||

|
||||
<Frame>
|
||||
<img src="/images/datadog-llm-observability-2.png" alt="Visualização do fluxo de execução do agente de observabilidade do Datadog LLM" />
|
||||
</Frame>
|
||||
|
||||
## Referências
|
||||
|
||||
|
||||
@@ -23,13 +23,15 @@ Veja um exemplo mínimo de como utilizar a ferramenta:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import QdrantVectorSearchTool
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Inicialize a ferramenta
|
||||
# Inicialize a ferramenta com QdrantConfig
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_qdrant_url",
|
||||
qdrant_api_key="your_qdrant_api_key",
|
||||
collection_name="your_collection"
|
||||
)
|
||||
)
|
||||
|
||||
# Crie um agente que utiliza a ferramenta
|
||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
||||
def get_openai_embedding(text):
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model="text-embedding-3-small"
|
||||
model="text-embedding-3-large"
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||
# Extrair texto do PDF
|
||||
text_chunks = extract_text_from_pdf(pdf_path)
|
||||
|
||||
|
||||
# Criar coleção no Qdrant
|
||||
if qdrant.collection_exists(collection_name):
|
||||
qdrant.delete_collection(collection_name)
|
||||
qdrant.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
||||
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||
)
|
||||
|
||||
# Armazenar embeddings
|
||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||
|
||||
# Inicializar ferramenta de busca Qdrant
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||
collection_name=collection_name,
|
||||
limit=3,
|
||||
score_threshold=0.35
|
||||
)
|
||||
)
|
||||
|
||||
# Criar agentes CrewAI
|
||||
search_agent = Agent(
|
||||
role="Senior Semantic Search Agent",
|
||||
goal="Find and analyze documents based on semantic search",
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
backstory="""You are an expert research assistant who can find relevant
|
||||
information using semantic search in a Qdrant database.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
||||
answer_agent = Agent(
|
||||
role="Senior Answer Assistant",
|
||||
goal="Generate answers to questions based on the context provided",
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
backstory="""You are an expert answer assistant who can generate
|
||||
answers to questions based on the context provided.""",
|
||||
tools=[qdrant_tool],
|
||||
verbose=True
|
||||
@@ -180,21 +186,82 @@ print(result)
|
||||
## Parâmetros da Ferramenta
|
||||
|
||||
### Parâmetros Obrigatórios
|
||||
- `qdrant_url` (str): URL do seu servidor Qdrant
|
||||
- `qdrant_api_key` (str): Chave de API para autenticação com o Qdrant
|
||||
- `collection_name` (str): Nome da coleção Qdrant a ser pesquisada
|
||||
- `qdrant_config` (QdrantConfig): Objeto de configuração contendo todas as configurações do Qdrant
|
||||
|
||||
### Parâmetros Opcionais
|
||||
### Parâmetros do QdrantConfig
|
||||
- `qdrant_url` (str): URL do seu servidor Qdrant
|
||||
- `qdrant_api_key` (str, opcional): Chave de API para autenticação com o Qdrant
|
||||
- `collection_name` (str): Nome da coleção Qdrant a ser pesquisada
|
||||
- `limit` (int): Número máximo de resultados a serem retornados (padrão: 3)
|
||||
- `score_threshold` (float): Limite mínimo de similaridade (padrão: 0.35)
|
||||
- `filter` (Any, opcional): Instância de Filter do Qdrant para filtragem avançada (padrão: None)
|
||||
|
||||
### Parâmetros Opcionais da Ferramenta
|
||||
- `custom_embedding_fn` (Callable[[str], list[float]]): Função personalizada para vetorização de textos
|
||||
- `qdrant_package` (str): Caminho base do pacote Qdrant (padrão: "qdrant_client")
|
||||
- `client` (Any): Cliente Qdrant pré-inicializado (opcional)
|
||||
|
||||
## Filtragem Avançada
|
||||
|
||||
A ferramenta QdrantVectorSearchTool oferece recursos poderosos de filtragem para refinar os resultados da busca:
|
||||
|
||||
### Filtragem Dinâmica
|
||||
Use os parâmetros `filter_by` e `filter_value` na sua busca para filtrar resultados dinamicamente:
|
||||
|
||||
```python
|
||||
# O agente usará esses parâmetros ao chamar a ferramenta
|
||||
# O schema da ferramenta aceita filter_by e filter_value
|
||||
# Exemplo: busca com filtro de categoria
|
||||
# Os resultados serão filtrados onde categoria == "tecnologia"
|
||||
```
|
||||
|
||||
### Filtros Pré-definidos com QdrantConfig
|
||||
Para filtragens complexas, use instâncias de Filter do Qdrant na sua configuração:
|
||||
|
||||
```python
|
||||
from qdrant_client.http import models as qmodels
|
||||
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||
|
||||
# Criar um filtro para condições específicas
|
||||
preset_filter = qmodels.Filter(
|
||||
must=[
|
||||
qmodels.FieldCondition(
|
||||
key="categoria",
|
||||
match=qmodels.MatchValue(value="pesquisa")
|
||||
),
|
||||
qmodels.FieldCondition(
|
||||
key="ano",
|
||||
match=qmodels.MatchValue(value=2024)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Inicializar ferramenta com filtro pré-definido
|
||||
qdrant_tool = QdrantVectorSearchTool(
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
filter=preset_filter # Filtro pré-definido aplicado a todas as buscas
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Combinando Filtros
|
||||
A ferramenta combina automaticamente os filtros pré-definidos do `QdrantConfig` com os filtros dinâmicos de `filter_by` e `filter_value`:
|
||||
|
||||
```python
|
||||
# Se QdrantConfig tem um filtro pré-definido para categoria="pesquisa"
|
||||
# E a busca usa filter_by="ano", filter_value=2024
|
||||
# Ambos os filtros serão combinados (lógica AND)
|
||||
```
|
||||
|
||||
## Parâmetros de Busca
|
||||
|
||||
A ferramenta aceita estes parâmetros em seu schema:
|
||||
- `query` (str): Consulta de busca para encontrar documentos similares
|
||||
- `filter_by` (str, opcional): Campo de metadado para filtrar
|
||||
- `filter_value` (str, opcional): Valor para filtrar
|
||||
- `filter_value` (Any, opcional): Valor para filtrar
|
||||
|
||||
## Formato de Retorno
|
||||
|
||||
@@ -214,7 +281,7 @@ A ferramenta retorna resultados no formato JSON:
|
||||
|
||||
## Embedding Padrão
|
||||
|
||||
Por padrão, a ferramenta utiliza o modelo `text-embedding-3-small` da OpenAI para vetorização. Isso requer:
|
||||
Por padrão, a ferramenta utiliza o modelo `text-embedding-3-large` da OpenAI para vetorização. Isso requer:
|
||||
- Chave de API da OpenAI definida na variável de ambiente: `OPENAI_API_KEY`
|
||||
|
||||
## Embeddings Personalizados
|
||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
||||
# Tokenizar e obter saídas do modelo
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||
outputs = model(**inputs)
|
||||
|
||||
|
||||
# Usar mean pooling para obter o embedding do texto
|
||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||
|
||||
|
||||
# Converter para lista de floats e retornar
|
||||
return embeddings[0].tolist()
|
||||
|
||||
# Usar embeddings personalizados com a ferramenta
|
||||
from crewai_tools import QdrantConfig
|
||||
|
||||
tool = QdrantVectorSearchTool(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection",
|
||||
qdrant_config=QdrantConfig(
|
||||
qdrant_url="your_url",
|
||||
qdrant_api_key="your_key",
|
||||
collection_name="your_collection"
|
||||
),
|
||||
custom_embedding_fn=custom_embeddings # Passe sua função personalizada
|
||||
)
|
||||
```
|
||||
@@ -270,4 +341,4 @@ Variáveis de ambiente obrigatórias:
|
||||
export QDRANT_URL="your_qdrant_url" # Se não for informado no construtor
|
||||
export QDRANT_API_KEY="your_api_key" # Se não for informado no construtor
|
||||
export OPENAI_API_KEY="your_openai_key" # Se estiver usando embeddings padrão
|
||||
```
|
||||
```
|
||||
|
||||
@@ -46,23 +46,25 @@ tool = DirectorySearchTool(directory='/path/to/directory')
|
||||
O DirectorySearchTool utiliza OpenAI para embeddings e sumarização por padrão. As opções de personalização dessas configurações incluem a alteração do provedor de modelo e configurações, ampliando a flexibilidade para usuários avançados.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DirectorySearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # As opções incluem ollama, google, anthropic, llama2 e mais
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Configurações adicionais aqui
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -56,25 +56,25 @@ Os seguintes parâmetros podem ser usados para customizar o comportamento da `DO
|
||||
Por padrão, a ferramenta utiliza o OpenAI tanto para embeddings quanto para sumarização. Para customizar o modelo, você pode usar um dicionário de configuração como no exemplo:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DOCXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # ou google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -48,27 +48,25 @@ tool = MDXSearchTool(mdx='path/to/your/document.mdx')
|
||||
A ferramenta utiliza, por padrão, o OpenAI para embeddings e sumarização. Para personalizar, utilize um dicionário de configuração conforme exemplo abaixo:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = MDXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # As opções incluem google, openai, anthropic, llama2, etc.
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Parâmetros opcionais podem ser incluídos aqui.
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# Um título opcional para os embeddings pode ser adicionado aqui.
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -45,28 +45,60 @@ tool = PDFSearchTool(pdf='path/to/your/document.pdf')
|
||||
|
||||
## Modelo e embeddings personalizados
|
||||
|
||||
Por padrão, a ferramenta utiliza OpenAI tanto para embeddings quanto para sumarização. Para personalizar o modelo, você pode usar um dicionário de configuração como no exemplo abaixo:
|
||||
Por padrão, a ferramenta utiliza OpenAI para embeddings e sumarização. Para personalizar, use um dicionário de configuração conforme abaixo. Observação: um banco vetorial (vectordb) é necessário, pois os embeddings gerados precisam ser armazenados e consultados.
|
||||
|
||||
```python Code
|
||||
from crewai_tools import PDFSearchTool
|
||||
from chromadb.config import Settings # Persistência no Chroma
|
||||
|
||||
tool = PDFSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # ou google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# Obrigatório: provedor de embeddings + configuração
|
||||
"embedding_model": {
|
||||
# Provedores suportados: "openai", "azure", "google-generativeai", "google-vertex",
|
||||
# "voyageai", "cohere", "huggingface", "jina", "sentence-transformer",
|
||||
# "text2vec", "ollama", "openclip", "instructor", "onnx", "roboflow", "watsonx", "custom"
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
# "model" é mapeado internamente para "model_name".
|
||||
"model": "text-embedding-3-small",
|
||||
# Opcional: chave da API (se ausente, usa variáveis de ambiente do provedor)
|
||||
# "api_key": "sk-...",
|
||||
|
||||
# Exemplos específicos por provedor
|
||||
# --- Google ---
|
||||
# (defina provider="google-generativeai")
|
||||
# "model": "models/embedding-001",
|
||||
# "task_type": "retrieval_document",
|
||||
|
||||
# --- Cohere ---
|
||||
# (defina provider="cohere")
|
||||
# "model": "embed-english-v3.0",
|
||||
|
||||
# --- Ollama (local) ---
|
||||
# (defina provider="ollama")
|
||||
# "model": "nomic-embed-text",
|
||||
},
|
||||
},
|
||||
|
||||
# Obrigatório: configuração do banco vetorial
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# Exemplo Chroma:
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Exemplo Qdrant:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Observação: o nome da coleção é controlado pela ferramenta (padrão: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -57,25 +57,39 @@ Por padrão, a ferramenta utiliza o OpenAI tanto para embeddings quanto para sum
|
||||
Para personalizar o modelo, você pode usar um dicionário de configuração como o exemplo a seguir:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = TXTSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # ou google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# Obrigatório: provedor de embeddings + configuração
|
||||
"embedding_model": {
|
||||
"provider": "openai", # ou google-generativeai, cohere, ollama, ...
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...", # opcional se variável de ambiente estiver definida
|
||||
# Exemplos por provedor:
|
||||
# Google → model: "models/embedding-001", task_type: "retrieval_document"
|
||||
},
|
||||
},
|
||||
|
||||
# Obrigatório: configuração do banco vetorial
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# Configurações do Chroma (persistência opcional)
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Exemplo de parâmetros de vetor do Qdrant:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Observação: o nome da coleção é controlado pela ferramenta (padrão: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -54,25 +54,25 @@ Este parâmetro é opcional durante a inicialização da ferramenta, mas deve se
|
||||
Por padrão, a ferramenta utiliza a OpenAI tanto para embeddings quanto para sumarização. Para personalizar o modelo, você pode usar um dicionário de configuração conforme o exemplo a seguir:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = XMLSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # ou google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # ou openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # ou "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -22,22 +22,23 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
"""Tool for crawling websites using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
"""Tool for crawling websites using Firecrawl v2 API. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
config (dict): Optional. It contains Firecrawl v2 API parameters.
|
||||
|
||||
Default configuration options:
|
||||
max_depth (int): Maximum depth to crawl. Default: 2
|
||||
Default configuration options (Firecrawl v2 API):
|
||||
max_discovery_depth (int): Maximum depth for discovering pages. Default: 2
|
||||
ignore_sitemap (bool): Whether to ignore sitemap. Default: True
|
||||
limit (int): Maximum number of pages to crawl. Default: 100
|
||||
allow_backward_links (bool): Allow crawling backward links. Default: False
|
||||
limit (int): Maximum number of pages to crawl. Default: 10
|
||||
allow_external_links (bool): Allow crawling external links. Default: False
|
||||
scrape_options (ScrapeOptions): Options for scraping content
|
||||
- formats (list[str]): Content formats to return. Default: ["markdown", "screenshot", "links"]
|
||||
allow_subdomains (bool): Allow crawling subdomains. Default: False
|
||||
delay (int): Delay between requests in milliseconds. Default: None
|
||||
scrape_options (dict): Options for scraping content
|
||||
- formats (list[str]): Content formats to return. Default: ["markdown"]
|
||||
- only_main_content (bool): Only return main content. Default: True
|
||||
- timeout (int): Timeout in milliseconds. Default: 30000
|
||||
- timeout (int): Timeout in milliseconds. Default: 10000
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -49,14 +50,15 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
api_key: str | None = None
|
||||
config: dict[str, Any] | None = Field(
|
||||
default_factory=lambda: {
|
||||
"maxDepth": 2,
|
||||
"ignoreSitemap": True,
|
||||
"max_discovery_depth": 2,
|
||||
"ignore_sitemap": True,
|
||||
"limit": 10,
|
||||
"allowBackwardLinks": False,
|
||||
"allowExternalLinks": False,
|
||||
"scrapeOptions": {
|
||||
"formats": ["markdown", "screenshot", "links"],
|
||||
"onlyMainContent": True,
|
||||
"allow_external_links": False,
|
||||
"allow_subdomains": False,
|
||||
"delay": None,
|
||||
"scrape_options": {
|
||||
"formats": ["markdown"],
|
||||
"only_main_content": True,
|
||||
"timeout": 10000,
|
||||
},
|
||||
}
|
||||
@@ -107,7 +109,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
return self._firecrawl.crawl_url(url, poll_interval=2, params=self.config)
|
||||
return self._firecrawl.crawl(url=url, poll_interval=2, **self.config)
|
||||
|
||||
|
||||
try:
|
||||
|
||||
@@ -22,20 +22,27 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||
|
||||
|
||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
"""Tool for scraping webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
"""Tool for scraping webpages using Firecrawl v2 API. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
config (dict): Optional. It contains Firecrawl v2 API parameters.
|
||||
|
||||
Default configuration options:
|
||||
Default configuration options (Firecrawl v2 API):
|
||||
formats (list[str]): Content formats to return. Default: ["markdown"]
|
||||
onlyMainContent (bool): Only return main content. Default: True
|
||||
includeTags (list[str]): Tags to include. Default: []
|
||||
excludeTags (list[str]): Tags to exclude. Default: []
|
||||
headers (dict): Headers to include. Default: {}
|
||||
waitFor (int): Time to wait for page to load in ms. Default: 0
|
||||
json_options (dict): Options for JSON extraction. Default: None
|
||||
only_main_content (bool): Only return main content excluding headers, navs, footers, etc. Default: True
|
||||
include_tags (list[str]): Tags to include in the output. Default: []
|
||||
exclude_tags (list[str]): Tags to exclude from the output. Default: []
|
||||
max_age (int): Returns cached version if younger than this age in milliseconds. Default: 172800000 (2 days)
|
||||
headers (dict): Headers to send with the request (e.g., cookies, user-agent). Default: {}
|
||||
wait_for (int): Delay in milliseconds before fetching content. Default: 0
|
||||
mobile (bool): Emulate scraping from a mobile device. Default: False
|
||||
skip_tls_verification (bool): Skip TLS certificate verification. Default: True
|
||||
timeout (int): Request timeout in milliseconds. Default: None
|
||||
remove_base64_images (bool): Remove base64 images from output. Default: True
|
||||
block_ads (bool): Enable ad-blocking and cookie popup blocking. Default: True
|
||||
proxy (str): Proxy type ("basic", "stealth", "auto"). Default: "auto"
|
||||
store_in_cache (bool): Store page in Firecrawl index and cache. Default: True
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -48,11 +55,18 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
config: dict[str, Any] = Field(
|
||||
default_factory=lambda: {
|
||||
"formats": ["markdown"],
|
||||
"onlyMainContent": True,
|
||||
"includeTags": [],
|
||||
"excludeTags": [],
|
||||
"only_main_content": True,
|
||||
"include_tags": [],
|
||||
"exclude_tags": [],
|
||||
"max_age": 172800000, # 2 days cache
|
||||
"headers": {},
|
||||
"waitFor": 0,
|
||||
"wait_for": 0,
|
||||
"mobile": False,
|
||||
"skip_tls_verification": True,
|
||||
"remove_base64_images": True,
|
||||
"block_ads": True,
|
||||
"proxy": "auto",
|
||||
"store_in_cache": True,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -95,7 +109,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
return self._firecrawl.scrape_url(url, params=self.config)
|
||||
return self._firecrawl.scrape(url=url, **self.config)
|
||||
|
||||
|
||||
try:
|
||||
|
||||
@@ -23,19 +23,24 @@ class FirecrawlSearchToolSchema(BaseModel):
|
||||
|
||||
|
||||
class FirecrawlSearchTool(BaseTool):
|
||||
"""Tool for searching webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
"""Tool for searching webpages using Firecrawl v2 API. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
config (dict): Optional. It contains Firecrawl v2 API parameters.
|
||||
|
||||
Default configuration options:
|
||||
limit (int): Maximum number of pages to crawl. Default: 5
|
||||
tbs (str): Time before search. Default: None
|
||||
lang (str): Language. Default: "en"
|
||||
country (str): Country. Default: "us"
|
||||
location (str): Location. Default: None
|
||||
timeout (int): Timeout in milliseconds. Default: 60000
|
||||
Default configuration options (Firecrawl v2 API):
|
||||
limit (int): Maximum number of search results to return. Default: 5
|
||||
tbs (str): Time-based search filter (e.g., "qdr:d" for past day). Default: None
|
||||
location (str): Location for search results. Default: None
|
||||
timeout (int): Request timeout in milliseconds. Default: None
|
||||
scrape_options (dict): Options for scraping the search results. Default: {"formats": ["markdown"]}
|
||||
- formats (list[str]): Content formats to return. Default: ["markdown"]
|
||||
- only_main_content (bool): Only return main content. Default: True
|
||||
- include_tags (list[str]): Tags to include. Default: []
|
||||
- exclude_tags (list[str]): Tags to exclude. Default: []
|
||||
- wait_for (int): Delay before fetching content in ms. Default: 0
|
||||
- timeout (int): Request timeout in milliseconds. Default: None
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -49,10 +54,15 @@ class FirecrawlSearchTool(BaseTool):
|
||||
default_factory=lambda: {
|
||||
"limit": 5,
|
||||
"tbs": None,
|
||||
"lang": "en",
|
||||
"country": "us",
|
||||
"location": None,
|
||||
"timeout": 60000,
|
||||
"timeout": None,
|
||||
"scrape_options": {
|
||||
"formats": ["markdown"],
|
||||
"only_main_content": True,
|
||||
"include_tags": [],
|
||||
"exclude_tags": [],
|
||||
"wait_for": 0,
|
||||
},
|
||||
}
|
||||
)
|
||||
_firecrawl: FirecrawlApp | None = PrivateAttr(None)
|
||||
@@ -106,7 +116,7 @@ class FirecrawlSearchTool(BaseTool):
|
||||
|
||||
return self._firecrawl.search(
|
||||
query=query,
|
||||
params=self.config,
|
||||
**self.config,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
@@ -12,9 +12,13 @@ from pydantic.types import ImportString
|
||||
|
||||
|
||||
class QdrantToolSchema(BaseModel):
|
||||
query: str = Field(..., description="Query to search in Qdrant DB.")
|
||||
filter_by: str | None = None
|
||||
filter_value: str | None = None
|
||||
query: str = Field(..., description="Query to search in Qdrant DB")
|
||||
filter_by: str | None = Field(
|
||||
default=None, description="Parameter to filter the search by."
|
||||
)
|
||||
filter_value: Any | None = Field(
|
||||
default=None, description="Value to filter the search by."
|
||||
)
|
||||
|
||||
|
||||
class QdrantConfig(BaseModel):
|
||||
@@ -25,7 +29,9 @@ class QdrantConfig(BaseModel):
|
||||
collection_name: str
|
||||
limit: int = 3
|
||||
score_threshold: float = 0.35
|
||||
filter_conditions: list[tuple[str, Any]] = Field(default_factory=list)
|
||||
filter: Any | None = Field(
|
||||
default=None, description="Qdrant Filter instance for advanced filtering."
|
||||
)
|
||||
|
||||
|
||||
class QdrantVectorSearchTool(BaseTool):
|
||||
@@ -76,23 +82,26 @@ class QdrantVectorSearchTool(BaseTool):
|
||||
filter_value: Any | None = None,
|
||||
) -> str:
|
||||
"""Perform vector similarity search."""
|
||||
filter_ = self.qdrant_package.http.models.Filter
|
||||
field_condition = self.qdrant_package.http.models.FieldCondition
|
||||
match_value = self.qdrant_package.http.models.MatchValue
|
||||
conditions = self.qdrant_config.filter_conditions.copy()
|
||||
if filter_by and filter_value is not None:
|
||||
conditions.append((filter_by, filter_value))
|
||||
|
||||
search_filter = (
|
||||
filter_(
|
||||
must=[
|
||||
field_condition(key=k, match=match_value(value=v))
|
||||
for k, v in conditions
|
||||
]
|
||||
)
|
||||
if conditions
|
||||
else None
|
||||
self.qdrant_config.filter.model_copy()
|
||||
if self.qdrant_config.filter is not None
|
||||
else self.qdrant_package.http.models.Filter(must=[])
|
||||
)
|
||||
if filter_by and filter_value is not None:
|
||||
if not hasattr(search_filter, "must") or not isinstance(
|
||||
search_filter.must, list
|
||||
):
|
||||
search_filter.must = []
|
||||
search_filter.must.append(
|
||||
self.qdrant_package.http.models.FieldCondition(
|
||||
key=filter_by,
|
||||
match=self.qdrant_package.http.models.MatchValue(
|
||||
value=filter_value
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
query_vector = (
|
||||
self.custom_embedding_fn(query)
|
||||
if self.custom_embedding_fn
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,289 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"url": "https://firecrawl.dev", "includeTags": [], "excludeTags": [],
|
||||
"onlyMainContent": true, "waitFor": 0, "skipTlsVerification": true, "removeBase64Images":
|
||||
true, "fastMode": false, "blockAds": true, "storeInCache": true, "maxAge": 172800000,
|
||||
"formats": ["markdown"], "headers": {}, "mobile": false, "proxy": "auto", "origin":
|
||||
"python-sdk@4.5.0"}'
|
||||
headers:
|
||||
Accept:
|
||||
- '*/*'
|
||||
Accept-Encoding:
|
||||
- gzip, deflate, zstd
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Length:
|
||||
- '350'
|
||||
Content-Type:
|
||||
- application/json
|
||||
User-Agent:
|
||||
- python-requests/2.32.5
|
||||
method: POST
|
||||
uri: https://api.firecrawl.dev/v2/scrape
|
||||
response:
|
||||
body:
|
||||
string: "{\"success\":true,\"data\":{\"markdown\":\"We just raised our Series
|
||||
A and shipped Firecrawl /v2 \U0001F389. [Read the blog.](https://www.firecrawl.dev/blog/firecrawl-v2-series-a-announcement)\\n\\n[2
|
||||
Months Free \u2014 Annually](https://www.firecrawl.dev/pricing)\\n\\n# Turn
|
||||
websites into LLM-ready data\\n\\nPower your AI apps with clean web data\\n\\nfrom
|
||||
any website. [It's also open source.](https://github.com/firecrawl/firecrawl)\\n\\nScrape\\n\\nSearch\\nNew\\n\\nMap\\n\\nCrawl\\n\\nScrape\\n\\nLogo\\n\\nNavigation\\n\\nButton\\n\\nH1
|
||||
Title\\n\\nDescription\\n\\nCTA Button\\n\\n\\\\[ .JSON \\\\]\\n\\n```json\\n1[\\\\\\n2
|
||||
\ {\\\\\\n3 \\\"url\\\": \\\"https://example.com\\\",\\\\\\n4 \\\"markdown\\\":
|
||||
\\\"# Getting Started...\\\",\\\\\\n5 \\\"json\\\": { \\\"title\\\": \\\"Guide\\\",
|
||||
\\\"docs\\\": \\\"...\\\" },\\\\\\n6 \\\"screenshot\\\": \\\"https://example.com/hero.png\\\"\\\\\\n7
|
||||
\ }\\\\\\n8]\\n```\\n\\nScrape Completed\\n\\nTrusted by5000+\\n\\ncompaniesof
|
||||
all sizes\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\\\[01/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nMain Features\\n\\n//\\n\\nDeveloper First\\n\\n//\\n\\n##
|
||||
Startscraping today\\n\\nEnhance your apps with industry leading web scraping
|
||||
and crawling capabilities.\\n\\nScrape\\n\\nGet llm-ready data from websites.
|
||||
Markdown, JSON, screenshot, etc.\\n\\nSearch\\n\\nNew\\n\\nSearch the web
|
||||
and get full content from results.\\n\\nCrawl\\n\\nCrawl all the pages on
|
||||
a website and get data for each page.\\n\\nPython\\n\\nNode.js\\n\\nCurl\\n\\nCopy
|
||||
code\\n\\n```python\\n1# pip install firecrawl-py\\n2from firecrawl import
|
||||
Firecrawl\\n3\\n4app = Firecrawl(api_key=\\\"fc-YOUR_API_KEY\\\")\\n5\\n6#
|
||||
Scrape a website:\\n7app.scrape('firecrawl.dev')\\n8\\n9\\n10\\n```\\n\\n\\\\[
|
||||
.MD \\\\]\\n\\n```markdown\\n1# Firecrawl\\n2\\n3Firecrawl is a powerful web
|
||||
scraping\\n4library that makes it easy to extract\\n5data from websites.\\n6\\n7##
|
||||
Installation\\n8\\n9To install Firecrawl, run:\\n10\\n11\\n```\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIntegrations\\n\\n###
|
||||
Use well-known tools\\n\\nAlready fully integrated with the greatest existing
|
||||
tools and workflows.\\n\\n[See all integrations](https://www.firecrawl.dev/app)\\n\\n\\n\\nmendableai/firecrawl\\n\\nPublic\\n\\nStar\\n\\n65.3K\\n\\n\\\\[python-SDK\\\\]
|
||||
improvs/async\\n\\n#1337\\n\\n\xB7\\n\\nApr 18, 2025\\n\\n\xB7\\n\\n\\n\\nrafaelsideguide\\n\\nfeat(extract):
|
||||
cost limit\\n\\n#1473\\n\\n\xB7\\n\\nApr 17, 2025\\n\\n\xB7\\n\\n\\n\\nmogery\\n\\nfeat(scrape):
|
||||
get job result from GCS, avoid Redis\\n\\n#1461\\n\\n\xB7\\n\\nApr 15, 2025\\n\\n\xB7\\n\\n\\n\\nmogery\\n\\nExtract
|
||||
v2/rerank improvs\\n\\n#1437\\n\\n\xB7\\n\\nApr 11, 2025\\n\\n\xB7\\n\\n\\n\\nrafaelsideguide\\n\\n\\n\\n\\n\\n+90\\n\\nOpen
|
||||
Source\\n\\n### Code you can trust\\n\\nDeveloped transparently and collaboratively.
|
||||
Join our community of contributors.\\n\\n[Check out our repo](https://github.com/firecrawl/firecrawl)\\n\\n\\\\[02/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nCore\\n\\n//\\n\\nBuilt to outperform\\n\\n//\\n\\n##
|
||||
Core principles, provenperformance\\n\\nBuilt from the ground up to outperform
|
||||
traditional scrapers.\\n\\nNo proxy headaches\\n\\nReliable.Covers 96% of
|
||||
the web,\\n\\nincluding JS-heavy and protected pages. No proxies, no puppets,
|
||||
just clean data.\\n\\nFirecrawl\\n\\n96%\\n\\n\\n\\nPuppeteer\\n\\n79%\\n\\ncURL\\n\\n75%\\n\\nSpeed
|
||||
that feels invisible\\n\\nBlazingly fast.Delivers results in less than 1 second,
|
||||
fast for real-time agents\\n\\nand dynamic apps.\\n\\nURL\\n\\nCrawl\\n\\nScrape\\n\\nfirecrawl.dev/docs\\n\\n50ms\\n\\n51ms\\n\\nfirecrawl.dev/templates\\n\\n52ms\\n\\n50ms\\n\\nfirecrawl.dev/changelog\\n\\n49ms\\n\\n52ms\\n\\nfirecrawl.dev/about\\n\\n52ms\\n\\n50ms\\n\\nfirecrawl.dev/changelog\\n\\n50ms\\n\\n52ms\\n\\nfirecrawl.dev/playground\\n\\n51ms\\n\\n49ms\\n\\n\\\\[
|
||||
CTA \\\\]\\n\\n\\\\[ CRAWL \\\\]\\n\\n\\\\[ SCRAPE \\\\]\\n\\n\\\\[ CTA \\\\]\\n\\n//\\n\\nGet
|
||||
started\\n\\n//\\n\\nReady to build?\\n\\nStart getting Web Data for free
|
||||
and scale seamlessly as your project expands. No credit card needed.\\n\\n[Start
|
||||
for free](https://www.firecrawl.dev/signin) [See our plans](https://www.firecrawl.dev/pricing)\\n\\n\\\\[03/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nFeatures\\n\\n//\\n\\nZero configuration\\n\\n//\\n\\n##
|
||||
We handle the hard stuff\\n\\nRotating proxies, orchestration, rate limits,
|
||||
js-blocked content and more.\\n\\nDocs to data\\n\\nMedia parsing.Firecrawl
|
||||
can parse and output content from web hosted pdfs, docx, and more.\\n\\nhttps://example.com/docs/report.pdf\\n\\nhttps://example.com/files/brief.docx\\n\\nhttps://example.com/docs/guide.html\\n\\ndocx\\n\\nParsing...\\n\\nKnows
|
||||
the moment\\n\\nSmart wait.Firecrawl intelligently waits for content to load,
|
||||
making scraping faster and more reliable.\\n\\nhttps://example-spa.com\\n\\nRequest
|
||||
Sent\\n\\nScrapes the real thing\\n\\nCached, when you need it.Selective caching,
|
||||
you choose your caching patterns, growing web index.\\n\\n\\n\\nUser\\n\\nFirecrawl\\n\\nCache\\n\\nInvisible
|
||||
access\\n\\nStealth mode.Crawls the web without\\n\\nbeing blocked, mimics
|
||||
real users to access protected or dynamic content.\\n\\nInteractive scraping\\n\\nActions.Click,
|
||||
scroll, write, wait, press and more before extracting content.\\n\\nhttps://example.com\\n\\nNavigate\\n\\nClick\\n\\nType\\n\\nWait\\n\\nScroll\\n\\nPress\\n\\nScreenshot\\n\\nScrape\\n\\n\\\\[04/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nPricing\\n\\n//\\n\\nTransparent\\n\\n//\\n\\n## Flexible
|
||||
pricing\\n\\nExplore transparent pricing built for real-world scraping. Start
|
||||
for free, then scale as you grow.\\n\\n\U0001F1FA\U0001F1F8USD\\n\\nFree Plan\\n\\nA
|
||||
lightweight way to try scraping.\\n\\nNo cost, no card, no hassle.\\n\\n500
|
||||
credits\\n\\n$0123456789\\n\\none-time\\n\\nGet started\\n\\nScrape 500 pages\\n\\n2
|
||||
concurrent requests\\n\\nLow rate limits\\n\\nHobby\\n\\nGreat for side projects
|
||||
and small tools.\\n\\nFast, simple, no overkill.\\n\\n3,000 credits\\n\\n$01234567890123456789\\n\\n/monthly\\n\\nBilled
|
||||
yearly\\n\\n2 months free\\n\\nSubscribe\\n\\nScrape 3,000 pages\\n\\n5 concurrent
|
||||
requests\\n\\nBasic support\\n\\n$9 per extra 1k credits\\n\\nStandard\\n\\nMost
|
||||
popular\\n\\nPerfect for scaling with less effort.\\n\\nSimple, solid, dependable.\\n\\n100,000
|
||||
credits\\n\\n$01234567890123456789\\n\\n/monthly\\n\\nBilled yearly\\n\\n2
|
||||
months free\\n\\nSubscribe\\n\\nScrape 100,000 pages\\n\\n50 concurrent requests\\n\\nStandard
|
||||
support\\n\\n$47 per extra 35k credits\\n\\nGrowth\\n\\nBuilt for high volume
|
||||
and speed.\\n\\nFirecrawl at full force.\\n\\n500,000 credits\\n\\n$012345678901234567890123456789\\n\\n/monthly\\n\\nBilled
|
||||
yearly\\n\\n2 months free\\n\\nSubscribe\\n\\nScrape 500,000 pages\\n\\n100
|
||||
concurrent requests\\n\\nPriority support\\n\\n$177 per extra 175k credits\\n\\nExtra
|
||||
credits are available via auto-recharge packs. [Enable](https://www.firecrawl.dev/signin/signup)\\n\\nEnterprise\\n\\nPower
|
||||
at your pace\\n\\nUnlimited credits. Custom RPMs.\\n\\n[Contact sales](https://fk4bvu0n5qp.typeform.com/to/Ej6oydlg)
|
||||
[More details](https://www.firecrawl.dev/enterprise)\\n\\nBulk discounts\\n\\nTop
|
||||
priority support\\n\\nCustom concurrency limits\\n\\nImproved stealth proxies\\n\\nSLAs\\n\\nAdvanced
|
||||
security & controls\\n\\n\\\\[05/ 07 \\\\]\\n\\n\xB7\\n\\nTestimonials\\n\\n//\\n\\nCommunity\\n\\n//\\n\\n##
|
||||
People love building withFirecrawl\\n\\nDiscover why developers choose
|
||||
Firecrawl every day.\\n\\n[Morgan
|
||||
Linton@morganlinton\\\"If you're coding with AI, and haven't discovered @firecrawl\\\\_dev
|
||||
yet, prepare to have your mind blown \U0001F92F\\\"](https://x.com/morganlinton/status/1839454165703204955)
|
||||
[Chris
|
||||
DeWeese@chrisdeweese\\\\_\\\"Started using @firecrawl\\\\_dev for a project,
|
||||
I wish I used this sooner.\\\"](https://x.com/chrisdeweese_/status/1853587120406876601)
|
||||
[Alex
|
||||
Reibman@AlexReibman\\\"Moved our internal agent's web scraping tool from Apify
|
||||
to Firecrawl because it benchmarked 50x faster with AgentOps.\\\"](https://x.com/AlexReibman/status/1780299595484131836)
|
||||
[Tom
|
||||
- Morpho@TomReppelin\\\"I found gold today. Thank you @firecrawl\\\\_dev\\\"](https://x.com/TomReppelin/status/1844382491014201613)\\n\\n[Morgan
|
||||
Linton@morganlinton\\\"If you're coding with AI, and haven't discovered @firecrawl\\\\_dev
|
||||
yet, prepare to have your mind blown \U0001F92F\\\"](https://x.com/morganlinton/status/1839454165703204955)
|
||||
[Chris
|
||||
DeWeese@chrisdeweese\\\\_\\\"Started using @firecrawl\\\\_dev for a project,
|
||||
I wish I used this sooner.\\\"](https://x.com/chrisdeweese_/status/1853587120406876601)
|
||||
[Alex
|
||||
Reibman@AlexReibman\\\"Moved our internal agent's web scraping tool from Apify
|
||||
to Firecrawl because it benchmarked 50x faster with AgentOps.\\\"](https://x.com/AlexReibman/status/1780299595484131836)
|
||||
[Tom
|
||||
- Morpho@TomReppelin\\\"I found gold today. Thank you @firecrawl\\\\_dev\\\"](https://x.com/TomReppelin/status/1844382491014201613)\\n\\n[Bardia@thepericulum\\\"The
|
||||
Firecrawl team ships. I wanted types for their node SDK, and less than an
|
||||
hour later, I got them.\\\"](https://x.com/thepericulum/status/1781397799487078874)
|
||||
[Matt
|
||||
Busigin@mbusigin\\\"Firecrawl is dope. Congrats guys \U0001F44F\\\"](https://x.com/mbusigin/status/1836065372010656069)
|
||||
[Sumanth@Sumanth\\\\_077\\\"Web
|
||||
scraping will never be the same!\\\\\\\\\\n\\\\\\\\\\nFirecrawl is an open-source
|
||||
framework that takes a URL, crawls it, and conver...\\\"](https://x.com/Sumanth_077/status/1940049003074478511)
|
||||
[Steven
|
||||
Tey@steventey\\\"Open-source Clay alternative just dropped\\\\\\\\\\n\\\\\\\\\\nUpload
|
||||
a CSV of emails and...\\\"](https://x.com/steventey/status/1932945651761098889)\\n\\n[Bardia@thepericulum\\\"The
|
||||
Firecrawl team ships. I wanted types for their node SDK, and less than an
|
||||
hour later, I got them.\\\"](https://x.com/thepericulum/status/1781397799487078874)
|
||||
[Matt
|
||||
Busigin@mbusigin\\\"Firecrawl is dope. Congrats guys \U0001F44F\\\"](https://x.com/mbusigin/status/1836065372010656069)
|
||||
[Sumanth@Sumanth\\\\_077\\\"Web
|
||||
scraping will never be the same!\\\\\\\\\\n\\\\\\\\\\nFirecrawl is an open-source
|
||||
framework that takes a URL, crawls it, and conver...\\\"](https://x.com/Sumanth_077/status/1940049003074478511)
|
||||
[Steven
|
||||
Tey@steventey\\\"Open-source Clay alternative just dropped\\\\\\\\\\n\\\\\\\\\\nUpload
|
||||
a CSV of emails and...\\\"](https://x.com/steventey/status/1932945651761098889)\\n\\n\\\\[06/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nUse Cases\\n\\n//\\n\\nUse cases\\n\\n//\\n\\n## Transform
|
||||
\ web data into AI-powered solutions\\n\\nDiscover how Firecrawl customers
|
||||
are getting the most out of our API.\\n\\n[View all use cases](https://docs.firecrawl.dev/use-cases/overview)\\n\\nChat
|
||||
with context\\n\\nSmarter AI chats\\n\\nPower your AI assistants with real-time,
|
||||
accurate web content.\\n\\n[View docs](https://docs.firecrawl.dev/introduction)\\n\\n\\n\\nAI Assistant\\n\\nwithFirecrawl\\n\\nReal-time\xB7Updated
|
||||
2 min ago\\n\\nAsk anything...\\n\\nKnow your leads\\n\\nLead enrichment\\n\\nEnhance
|
||||
your sales data with\\n\\nweb information.\\n\\n[Check out Extract](https://www.firecrawl.dev/extract)\\n\\nExtracting
|
||||
leads from directory...\\n\\nTech startups\\n\\nWith contact info\\n\\nDecision
|
||||
makers\\n\\nFunding stage\\n\\nReady to engage\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nKnow your leads\\n\\nMCPs\\n\\nAdd
|
||||
powerful scraping to your\\n\\ncode editors.\\n\\n[Get started](https://docs.firecrawl.dev/mcp-server)\\n\\n\\n\\nClaude Code\\n\\n\\n\\nCursor\\n\\n\\n\\nWindsurf\\n\\n\u273B\\n\\nWelcome
|
||||
to Claude Code!\\n\\n/help for help, /status for your current setup\\n\\n>Try
|
||||
\\\"how do I log an error?\\\"\\n\\nBuild with context\\n\\nAI platforms\\n\\nLet
|
||||
your customers build AI apps\\n\\nwith web data.\\n\\n[Check out Map](https://docs.firecrawl.dev/features/map)\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nExtracting
|
||||
text...\\n\\nNo insight missed\\n\\nDeep research\\n\\nExtract comprehensive
|
||||
information for\\n\\nin-depth research.\\n\\n[Build your own with Search](https://docs.firecrawl.dev/features/search)\\n\\nDeep
|
||||
research in progress...\\n\\nAcademic papers\\n\\n0 found\\n\\nNews articles\\n\\n0
|
||||
found\\n\\nExpert opinions\\n\\n0 found\\n\\nResearch reports\\n\\n0 found\\n\\nIndustry
|
||||
data\\n\\n0 found\\n\\nAsk anything...\\n\\n\\\\[ CTA \\\\]\\n\\n\\\\[ CRAWL
|
||||
\\\\]\\n\\n\\\\[ SCRAPE \\\\]\\n\\n\\\\[ CTA \\\\]\\n\\n//\\n\\nGet started\\n\\n//\\n\\nReady
|
||||
to build?\\n\\nStart getting Web Data for free and scale seamlessly as your
|
||||
project expands. No credit card needed.\\n\\n[Start for free](https://www.firecrawl.dev/signin)
|
||||
[See our plans](https://www.firecrawl.dev/pricing)\\n\\n\\\\[07/ 07 \\\\]\\n\\n\xB7\\n\\nFAQ\\n\\n//\\n\\nFAQ\\n\\n//\\n\\n##
|
||||
Frequently askedquestions\\n\\nEverything you need to know about Firecrawl.\\n\\nGeneral\\n\\nWhat
|
||||
is Firecrawl?\\n\\nWhat sites work?\\n\\nWho can benefit from using Firecrawl?\\n\\nIs
|
||||
Firecrawl open-source?\\n\\nWhat is the difference between Firecrawl and other
|
||||
web scrapers?\\n\\nWhat is the difference between the open-source version
|
||||
and the hosted version?\\n\\nScraping & Crawling\\n\\nHow does Firecrawl handle
|
||||
dynamic content on websites?\\n\\nWhy is it not crawling all the pages?\\n\\nCan
|
||||
Firecrawl crawl websites without a sitemap?\\n\\nWhat formats can Firecrawl
|
||||
convert web data into?\\n\\nHow does Firecrawl ensure the cleanliness of the
|
||||
data?\\n\\nIs Firecrawl suitable for large-scale data scraping projects?\\n\\nDoes
|
||||
it respect robots.txt?\\n\\nWhat measures does Firecrawl take to handle web
|
||||
scraping challenges like rate limits and caching?\\n\\nDoes Firecrawl handle
|
||||
captcha or authentication?\\n\\nAPI Related\\n\\nWhere can I find my API key?\\n\\nBilling\\n\\nIs
|
||||
Firecrawl free?\\n\\nIs there a pay-per-use plan instead of monthly?\\n\\nDo
|
||||
credits roll over to the next month?\\n\\nHow many credits do scraping and
|
||||
crawling cost?\\n\\nDo you charge for failed requests?\\n\\nWhat payment methods
|
||||
do you accept?\\n\\nFOOTER\\n\\nThe easiest way to extract\\n\\ndata from
|
||||
the web\\n\\nBacked by\\n\\nY Combinator\\n\\n[Linkedin](https://www.linkedin.com/company/firecrawl)
|
||||
[Github](https://github.com/firecrawl/firecrawl)\\n\\nSOC II \xB7 Type 2\\n\\nAICPA\\n\\nSOC
|
||||
2\\n\\n[X (Twitter)](https://x.com/firecrawl_dev) [Discord](https://discord.gg/gSmWdAkdwd)\\n\\nProducts\\n\\n[Playground](https://www.firecrawl.dev/playground)
|
||||
[Extract](https://www.firecrawl.dev/extract) [Pricing](https://www.firecrawl.dev/pricing)
|
||||
[Templates](https://www.firecrawl.dev/templates) [Changelog](https://www.firecrawl.dev/changelog)\\n\\nUse
|
||||
Cases\\n\\n[AI Platforms](https://docs.firecrawl.dev/use-cases/ai-platforms)
|
||||
[Lead Enrichment](https://docs.firecrawl.dev/use-cases/lead-enrichment) [SEO
|
||||
Platforms](https://docs.firecrawl.dev/use-cases/seo-platforms) [Deep Research](https://docs.firecrawl.dev/use-cases/deep-research)\\n\\nDocumentation\\n\\n[Getting
|
||||
started](https://docs.firecrawl.dev/introduction) [API Reference](https://docs.firecrawl.dev/api-reference/introduction)
|
||||
[Integrations](https://www.firecrawl.dev/app) [Examples](https://docs.firecrawl.dev/use-cases/overview)
|
||||
[SDKs](https://docs.firecrawl.dev/sdks/overview)\\n\\nCompany\\n\\n[Blog](https://www.firecrawl.dev/blog)
|
||||
[Careers](https://www.firecrawl.dev/careers) [Creator & OSS program](https://www.firecrawl.dev/creator-oss-program)
|
||||
[Student program](https://www.firecrawl.dev/student-program)\\n\\n\xA9 2025
|
||||
Firecrawl\\n\\n[Terms of Service](https://www.firecrawl.dev/terms-of-service)
|
||||
[Privacy Policy](https://www.firecrawl.dev/privacy-policy) [Report Abuse](mailto:help@firecrawl.com?subject=Issue:)\\n\\n[All
|
||||
systems normal](https://status.firecrawl.dev/)\\n\\nStripeM-Inner\",\"metadata\":{\"twitter:title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"publisher\":\"Firecrawl\",\"ogUrl\":\"https://www.firecrawl.dev\",\"robots\":\"follow,
|
||||
index\",\"title\":\"Firecrawl - The Web Data API for AI\",\"ogDescription\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"ogImage\":\"https://www.firecrawl.dev/og.png\",\"viewport\":\"width=device-width,
|
||||
initial-scale=1, maximum-scale=1, user-scalable=no\",\"og:url\":\"https://www.firecrawl.dev\",\"og:site_name\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"og:type\":\"website\",\"twitter:image\":\"https://www.firecrawl.dev/og.png\",\"author\":\"Firecrawl\",\"og:title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"favicon\":\"https://www.firecrawl.dev/favicon.png\",\"description\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"referrer\":\"origin-when-cross-origin\",\"twitter:site\":\"@Vercel\",\"ogSiteName\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"og:image\":\"https://www.firecrawl.dev/og.png\",\"twitter:card\":\"summary_large_image\",\"twitter:creator\":\"@Vercel\",\"twitter:description\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"language\":\"en\",\"keywords\":\"Firecrawl,Markdown,Data,Mendable,Langchain\",\"creator\":\"Firecrawl\",\"ogTitle\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"og:description\":\"The web crawling, scraping,
|
||||
and search API for AI. Built for scale. Firecrawl delivers the entire internet
|
||||
to AI agents and builders. Clean, structured, and ready to reason with.\",\"scrapeId\":\"e78d8060-d581-4e5e-b25a-90cfdad48530\",\"sourceURL\":\"https://firecrawl.dev\",\"url\":\"https://www.firecrawl.dev/\",\"statusCode\":200,\"contentType\":\"text/html;
|
||||
charset=utf-8\",\"proxyUsed\":\"basic\",\"cacheState\":\"hit\",\"cachedAt\":\"2025-10-29T13:09:07.713Z\",\"creditsUsed\":1}}}"
|
||||
headers:
|
||||
Access-Control-Allow-Origin:
|
||||
- '*'
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
||||
Content-Length:
|
||||
- '24693'
|
||||
Content-Type:
|
||||
- application/json; charset=utf-8
|
||||
Date:
|
||||
- Wed, 29 Oct 2025 14:34:03 GMT
|
||||
ETag:
|
||||
- W/"6075-Q1W6uMv95JKEZARbtaiPYYMojlU"
|
||||
Via:
|
||||
- 1.1 google
|
||||
X-Powered-By:
|
||||
- Express
|
||||
X-Response-Time:
|
||||
- 4719.998ms
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,937 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"query": "firecrawl", "limit": 5, "scrapeOptions": {"includeTags": [],
|
||||
"excludeTags": [], "onlyMainContent": true, "waitFor": 0, "skipTlsVerification":
|
||||
true, "removeBase64Images": true, "fastMode": false, "blockAds": true, "storeInCache":
|
||||
true, "maxAge": 14400000, "formats": ["markdown"], "mobile": false}, "origin":
|
||||
"python-sdk@4.5.0"}'
|
||||
headers:
|
||||
Accept:
|
||||
- '*/*'
|
||||
Accept-Encoding:
|
||||
- gzip, deflate, zstd
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Length:
|
||||
- '338'
|
||||
Content-Type:
|
||||
- application/json
|
||||
User-Agent:
|
||||
- python-requests/2.32.5
|
||||
method: POST
|
||||
uri: https://api.firecrawl.dev/v2/search
|
||||
response:
|
||||
body:
|
||||
string: "{\"success\":true,\"data\":{\"web\":[{\"url\":\"https://www.firecrawl.dev/\",\"title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"description\":\"The web crawling, scraping,
|
||||
and search API for AI. Built for scale. Firecrawl delivers the entire internet
|
||||
to AI agents and builders.\",\"position\":1,\"markdown\":\"We just raised
|
||||
our Series A and shipped Firecrawl /v2 \U0001F389. [Read the blog.](https://www.firecrawl.dev/blog/firecrawl-v2-series-a-announcement)\\n\\n[2
|
||||
Months Free \u2014 Annually](https://www.firecrawl.dev/pricing)\\n\\n# Turn
|
||||
websites into LLM-ready data\\n\\nPower your AI apps with clean web data\\n\\nfrom
|
||||
any website. [It's also open source.](https://github.com/firecrawl/firecrawl)\\n\\nScrape\\n\\nSearch\\nNew\\n\\nMap\\n\\nCrawl\\n\\nScrape\\n\\nLogo\\n\\nNavigation\\n\\nButton\\n\\nH1
|
||||
Title\\n\\nDescription\\n\\nCTA Button\\n\\n\\\\[ .JSON \\\\]\\n\\n```json\\n1[\\\\\\n2
|
||||
\ {\\\\\\n3 \\\"url\\\": \\\"https://example.com\\\",\\\\\\n4 \\\"markdown\\\":
|
||||
\\\"# Getting Started...\\\",\\\\\\n5 \\\"json\\\": { \\\"title\\\": \\\"Guide\\\",
|
||||
\\\"docs\\\": \\\"...\\\" },\\\\\\n6 \\\"screenshot\\\": \\\"https://example.com/hero.png\\\"\\\\\\n7
|
||||
\ }\\\\\\n8]\\n```\\n\\nScrape Completed\\n\\nTrusted by5000+\\n\\ncompaniesof
|
||||
all sizes\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\\\[01/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nMain Features\\n\\n//\\n\\nDeveloper First\\n\\n//\\n\\n##
|
||||
Startscraping today\\n\\nEnhance your apps with industry leading web scraping
|
||||
and crawling capabilities.\\n\\nScrape\\n\\nGet llm-ready data from websites.
|
||||
Markdown, JSON, screenshot, etc.\\n\\nSearch\\n\\nNew\\n\\nSearch the web
|
||||
and get full content from results.\\n\\nCrawl\\n\\nCrawl all the pages on
|
||||
a website and get data for each page.\\n\\nPython\\n\\nNode.js\\n\\nCurl\\n\\nCopy
|
||||
code\\n\\n```python\\n1# pip install firecrawl-py\\n2from firecrawl import
|
||||
Firecrawl\\n3\\n4app = Firecrawl(api_key=\\\"fc-YOUR_API_KEY\\\")\\n5\\n6#
|
||||
Scrape a website:\\n7app.scrape('firecrawl.dev')\\n8\\n9\\n10\\n```\\n\\n\\\\[
|
||||
.MD \\\\]\\n\\n```markdown\\n1# Firecrawl\\n2\\n3Firecrawl is a powerful web
|
||||
scraping\\n4library that makes it easy to extract\\n5data from websites.\\n6\\n7##
|
||||
Installation\\n8\\n9To install Firecrawl, run:\\n10\\n11\\n```\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIntegrations\\n\\n###
|
||||
Use well-known tools\\n\\nAlready fully integrated with the greatest existing
|
||||
tools and workflows.\\n\\n[See all integrations](https://www.firecrawl.dev/app)\\n\\n\\n\\nmendableai/firecrawl\\n\\nPublic\\n\\nStar\\n\\n65.3K\\n\\n\\\\[python-SDK\\\\]
|
||||
improvs/async\\n\\n#1337\\n\\n\xB7\\n\\nApr 18, 2025\\n\\n\xB7\\n\\n\\n\\nrafaelsideguide\\n\\nfeat(extract):
|
||||
cost limit\\n\\n#1473\\n\\n\xB7\\n\\nApr 17, 2025\\n\\n\xB7\\n\\n\\n\\nmogery\\n\\nfeat(scrape):
|
||||
get job result from GCS, avoid Redis\\n\\n#1461\\n\\n\xB7\\n\\nApr 15, 2025\\n\\n\xB7\\n\\n\\n\\nmogery\\n\\nExtract
|
||||
v2/rerank improvs\\n\\n#1437\\n\\n\xB7\\n\\nApr 11, 2025\\n\\n\xB7\\n\\n\\n\\nrafaelsideguide\\n\\n\\n\\n\\n\\n+90\\n\\nOpen
|
||||
Source\\n\\n### Code you can trust\\n\\nDeveloped transparently and collaboratively.
|
||||
Join our community of contributors.\\n\\n[Check out our repo](https://github.com/firecrawl/firecrawl)\\n\\n\\\\[02/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nCore\\n\\n//\\n\\nBuilt to outperform\\n\\n//\\n\\n##
|
||||
Core principles, provenperformance\\n\\nBuilt from the ground up to outperform
|
||||
traditional scrapers.\\n\\nNo proxy headaches\\n\\nReliable.Covers 96% of
|
||||
the web,\\n\\nincluding JS-heavy and protected pages. No proxies, no puppets,
|
||||
just clean data.\\n\\nFirecrawl\\n\\n96%\\n\\n\\n\\nPuppeteer\\n\\n79%\\n\\ncURL\\n\\n75%\\n\\nSpeed
|
||||
that feels invisible\\n\\nBlazingly fast.Delivers results in less than 1 second,
|
||||
fast for real-time agents\\n\\nand dynamic apps.\\n\\nURL\\n\\nCrawl\\n\\nScrape\\n\\nfirecrawl.dev/docs\\n\\n50ms\\n\\n51ms\\n\\nfirecrawl.dev/templates\\n\\n52ms\\n\\n50ms\\n\\nfirecrawl.dev/changelog\\n\\n49ms\\n\\n52ms\\n\\nfirecrawl.dev/about\\n\\n52ms\\n\\n50ms\\n\\nfirecrawl.dev/changelog\\n\\n50ms\\n\\n52ms\\n\\nfirecrawl.dev/playground\\n\\n51ms\\n\\n49ms\\n\\n\\\\[
|
||||
CTA \\\\]\\n\\n\\\\[ CRAWL \\\\]\\n\\n\\\\[ SCRAPE \\\\]\\n\\n\\\\[ CTA \\\\]\\n\\n//\\n\\nGet
|
||||
started\\n\\n//\\n\\nReady to build?\\n\\nStart getting Web Data for free
|
||||
and scale seamlessly as your project expands. No credit card needed.\\n\\n[Start
|
||||
for free](https://www.firecrawl.dev/signin) [See our plans](https://www.firecrawl.dev/pricing)\\n\\n\\\\[03/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nFeatures\\n\\n//\\n\\nZero configuration\\n\\n//\\n\\n##
|
||||
We handle the hard stuff\\n\\nRotating proxies, orchestration, rate limits,
|
||||
js-blocked content and more.\\n\\nDocs to data\\n\\nMedia parsing.Firecrawl
|
||||
can parse and output content from web hosted pdfs, docx, and more.\\n\\nhttps://example.com/docs/report.pdf\\n\\nhttps://example.com/files/brief.docx\\n\\nhttps://example.com/docs/guide.html\\n\\ndocx\\n\\nParsing...\\n\\nKnows
|
||||
the moment\\n\\nSmart wait.Firecrawl intelligently waits for content to load,
|
||||
making scraping faster and more reliable.\\n\\nhttps://example-spa.com\\n\\nRequest
|
||||
Sent\\n\\nScrapes the real thing\\n\\nCached, when you need it.Selective caching,
|
||||
you choose your caching patterns, growing web index.\\n\\n\\n\\nUser\\n\\nFirecrawl\\n\\nCache\\n\\nInvisible
|
||||
access\\n\\nStealth mode.Crawls the web without\\n\\nbeing blocked, mimics
|
||||
real users to access protected or dynamic content.\\n\\nInteractive scraping\\n\\nActions.Click,
|
||||
scroll, write, wait, press and more before extracting content.\\n\\nhttps://example.com\\n\\nNavigate\\n\\nClick\\n\\nType\\n\\nWait\\n\\nScroll\\n\\nPress\\n\\nScreenshot\\n\\nScrape\\n\\n\\\\[04/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nPricing\\n\\n//\\n\\nTransparent\\n\\n//\\n\\n## Flexible
|
||||
pricing\\n\\nExplore transparent pricing built for real-world scraping. Start
|
||||
for free, then scale as you grow.\\n\\n\U0001F1FA\U0001F1F8USD\\n\\nFree Plan\\n\\nA
|
||||
lightweight way to try scraping.\\n\\nNo cost, no card, no hassle.\\n\\n500
|
||||
credits\\n\\n$0123456789\\n\\none-time\\n\\nGet started\\n\\nScrape 500 pages\\n\\n2
|
||||
concurrent requests\\n\\nLow rate limits\\n\\nHobby\\n\\nGreat for side projects
|
||||
and small tools.\\n\\nFast, simple, no overkill.\\n\\n3,000 credits\\n\\n$01234567890123456789\\n\\n/monthly\\n\\nBilled
|
||||
yearly\\n\\n2 months free\\n\\nSubscribe\\n\\nScrape 3,000 pages\\n\\n5 concurrent
|
||||
requests\\n\\nBasic support\\n\\n$9 per extra 1k credits\\n\\nStandard\\n\\nMost
|
||||
popular\\n\\nPerfect for scaling with less effort.\\n\\nSimple, solid, dependable.\\n\\n100,000
|
||||
credits\\n\\n$01234567890123456789\\n\\n/monthly\\n\\nBilled yearly\\n\\n2
|
||||
months free\\n\\nSubscribe\\n\\nScrape 100,000 pages\\n\\n50 concurrent requests\\n\\nStandard
|
||||
support\\n\\n$47 per extra 35k credits\\n\\nGrowth\\n\\nBuilt for high volume
|
||||
and speed.\\n\\nFirecrawl at full force.\\n\\n500,000 credits\\n\\n$012345678901234567890123456789\\n\\n/monthly\\n\\nBilled
|
||||
yearly\\n\\n2 months free\\n\\nSubscribe\\n\\nScrape 500,000 pages\\n\\n100
|
||||
concurrent requests\\n\\nPriority support\\n\\n$177 per extra 175k credits\\n\\nExtra
|
||||
credits are available via auto-recharge packs. [Enable](https://www.firecrawl.dev/signin/signup)\\n\\nEnterprise\\n\\nPower
|
||||
at your pace\\n\\nUnlimited credits. Custom RPMs.\\n\\n[Contact sales](https://fk4bvu0n5qp.typeform.com/to/Ej6oydlg)
|
||||
[More details](https://www.firecrawl.dev/enterprise)\\n\\nBulk discounts\\n\\nTop
|
||||
priority support\\n\\nCustom concurrency limits\\n\\nImproved stealth proxies\\n\\nSLAs\\n\\nAdvanced
|
||||
security & controls\\n\\n\\\\[05/ 07 \\\\]\\n\\n\xB7\\n\\nTestimonials\\n\\n//\\n\\nCommunity\\n\\n//\\n\\n##
|
||||
People love building withFirecrawl\\n\\nDiscover why developers choose
|
||||
Firecrawl every day.\\n\\n[Morgan
|
||||
Linton@morganlinton\\\"If you're coding with AI, and haven't discovered @firecrawl\\\\_dev
|
||||
yet, prepare to have your mind blown \U0001F92F\\\"](https://x.com/morganlinton/status/1839454165703204955)
|
||||
[Chris
|
||||
DeWeese@chrisdeweese\\\\_\\\"Started using @firecrawl\\\\_dev for a project,
|
||||
I wish I used this sooner.\\\"](https://x.com/chrisdeweese_/status/1853587120406876601)
|
||||
[Alex
|
||||
Reibman@AlexReibman\\\"Moved our internal agent's web scraping tool from Apify
|
||||
to Firecrawl because it benchmarked 50x faster with AgentOps.\\\"](https://x.com/AlexReibman/status/1780299595484131836)
|
||||
[Tom
|
||||
- Morpho@TomReppelin\\\"I found gold today. Thank you @firecrawl\\\\_dev\\\"](https://x.com/TomReppelin/status/1844382491014201613)\\n\\n[Morgan
|
||||
Linton@morganlinton\\\"If you're coding with AI, and haven't discovered @firecrawl\\\\_dev
|
||||
yet, prepare to have your mind blown \U0001F92F\\\"](https://x.com/morganlinton/status/1839454165703204955)
|
||||
[Chris
|
||||
DeWeese@chrisdeweese\\\\_\\\"Started using @firecrawl\\\\_dev for a project,
|
||||
I wish I used this sooner.\\\"](https://x.com/chrisdeweese_/status/1853587120406876601)
|
||||
[Alex
|
||||
Reibman@AlexReibman\\\"Moved our internal agent's web scraping tool from Apify
|
||||
to Firecrawl because it benchmarked 50x faster with AgentOps.\\\"](https://x.com/AlexReibman/status/1780299595484131836)
|
||||
[Tom
|
||||
- Morpho@TomReppelin\\\"I found gold today. Thank you @firecrawl\\\\_dev\\\"](https://x.com/TomReppelin/status/1844382491014201613)\\n\\n[Bardia@thepericulum\\\"The
|
||||
Firecrawl team ships. I wanted types for their node SDK, and less than an
|
||||
hour later, I got them.\\\"](https://x.com/thepericulum/status/1781397799487078874)
|
||||
[Matt
|
||||
Busigin@mbusigin\\\"Firecrawl is dope. Congrats guys \U0001F44F\\\"](https://x.com/mbusigin/status/1836065372010656069)
|
||||
[Sumanth@Sumanth\\\\_077\\\"Web
|
||||
scraping will never be the same!\\\\\\\\\\n\\\\\\\\\\nFirecrawl is an open-source
|
||||
framework that takes a URL, crawls it, and conver...\\\"](https://x.com/Sumanth_077/status/1940049003074478511)
|
||||
[Steven
|
||||
Tey@steventey\\\"Open-source Clay alternative just dropped\\\\\\\\\\n\\\\\\\\\\nUpload
|
||||
a CSV of emails and...\\\"](https://x.com/steventey/status/1932945651761098889)\\n\\n[Bardia@thepericulum\\\"The
|
||||
Firecrawl team ships. I wanted types for their node SDK, and less than an
|
||||
hour later, I got them.\\\"](https://x.com/thepericulum/status/1781397799487078874)
|
||||
[Matt
|
||||
Busigin@mbusigin\\\"Firecrawl is dope. Congrats guys \U0001F44F\\\"](https://x.com/mbusigin/status/1836065372010656069)
|
||||
[Sumanth@Sumanth\\\\_077\\\"Web
|
||||
scraping will never be the same!\\\\\\\\\\n\\\\\\\\\\nFirecrawl is an open-source
|
||||
framework that takes a URL, crawls it, and conver...\\\"](https://x.com/Sumanth_077/status/1940049003074478511)
|
||||
[Steven
|
||||
Tey@steventey\\\"Open-source Clay alternative just dropped\\\\\\\\\\n\\\\\\\\\\nUpload
|
||||
a CSV of emails and...\\\"](https://x.com/steventey/status/1932945651761098889)\\n\\n\\\\[06/
|
||||
07 \\\\]\\n\\n\xB7\\n\\nUse Cases\\n\\n//\\n\\nUse cases\\n\\n//\\n\\n## Transform
|
||||
\ web data into AI-powered solutions\\n\\nDiscover how Firecrawl customers
|
||||
are getting the most out of our API.\\n\\n[View all use cases](https://docs.firecrawl.dev/use-cases/overview)\\n\\nChat
|
||||
with context\\n\\nSmarter AI chats\\n\\nPower your AI assistants with real-time,
|
||||
accurate web content.\\n\\n[View docs](https://docs.firecrawl.dev/introduction)\\n\\n\\n\\nAI Assistant\\n\\nwithFirecrawl\\n\\nReal-time\xB7Updated
|
||||
2 min ago\\n\\nAsk anything...\\n\\nKnow your leads\\n\\nLead enrichment\\n\\nEnhance
|
||||
your sales data with\\n\\nweb information.\\n\\n[Check out Extract](https://www.firecrawl.dev/extract)\\n\\nExtracting
|
||||
leads from directory...\\n\\nTech startups\\n\\nWith contact info\\n\\nDecision
|
||||
makers\\n\\nFunding stage\\n\\nReady to engage\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nKnow your leads\\n\\nMCPs\\n\\nAdd
|
||||
powerful scraping to your\\n\\ncode editors.\\n\\n[Get started](https://docs.firecrawl.dev/mcp-server)\\n\\n\\n\\nClaude Code\\n\\n\\n\\nCursor\\n\\n\\n\\nWindsurf\\n\\n\u273B\\n\\nWelcome
|
||||
to Claude Code!\\n\\n/help for help, /status for your current setup\\n\\n>Try
|
||||
\\\"how do I log an error?\\\"\\n\\nBuild with context\\n\\nAI platforms\\n\\nLet
|
||||
your customers build AI apps\\n\\nwith web data.\\n\\n[Check out Map](https://docs.firecrawl.dev/features/map)\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nExtracting
|
||||
text...\\n\\nNo insight missed\\n\\nDeep research\\n\\nExtract comprehensive
|
||||
information for\\n\\nin-depth research.\\n\\n[Build your own with Search](https://docs.firecrawl.dev/features/search)\\n\\nDeep
|
||||
research in progress...\\n\\nAcademic papers\\n\\n0 found\\n\\nNews articles\\n\\n0
|
||||
found\\n\\nExpert opinions\\n\\n0 found\\n\\nResearch reports\\n\\n0 found\\n\\nIndustry
|
||||
data\\n\\n0 found\\n\\nAsk anything...\\n\\n\\\\[ CTA \\\\]\\n\\n\\\\[ CRAWL
|
||||
\\\\]\\n\\n\\\\[ SCRAPE \\\\]\\n\\n\\\\[ CTA \\\\]\\n\\n//\\n\\nGet started\\n\\n//\\n\\nReady
|
||||
to build?\\n\\nStart getting Web Data for free and scale seamlessly as your
|
||||
project expands. No credit card needed.\\n\\n[Start for free](https://www.firecrawl.dev/signin)
|
||||
[See our plans](https://www.firecrawl.dev/pricing)\\n\\n\\\\[07/ 07 \\\\]\\n\\n\xB7\\n\\nFAQ\\n\\n//\\n\\nFAQ\\n\\n//\\n\\n##
|
||||
Frequently askedquestions\\n\\nEverything you need to know about Firecrawl.\\n\\nGeneral\\n\\nWhat
|
||||
is Firecrawl?\\n\\nWhat sites work?\\n\\nWho can benefit from using Firecrawl?\\n\\nIs
|
||||
Firecrawl open-source?\\n\\nWhat is the difference between Firecrawl and other
|
||||
web scrapers?\\n\\nWhat is the difference between the open-source version
|
||||
and the hosted version?\\n\\nScraping & Crawling\\n\\nHow does Firecrawl handle
|
||||
dynamic content on websites?\\n\\nWhy is it not crawling all the pages?\\n\\nCan
|
||||
Firecrawl crawl websites without a sitemap?\\n\\nWhat formats can Firecrawl
|
||||
convert web data into?\\n\\nHow does Firecrawl ensure the cleanliness of the
|
||||
data?\\n\\nIs Firecrawl suitable for large-scale data scraping projects?\\n\\nDoes
|
||||
it respect robots.txt?\\n\\nWhat measures does Firecrawl take to handle web
|
||||
scraping challenges like rate limits and caching?\\n\\nDoes Firecrawl handle
|
||||
captcha or authentication?\\n\\nAPI Related\\n\\nWhere can I find my API key?\\n\\nBilling\\n\\nIs
|
||||
Firecrawl free?\\n\\nIs there a pay-per-use plan instead of monthly?\\n\\nDo
|
||||
credits roll over to the next month?\\n\\nHow many credits do scraping and
|
||||
crawling cost?\\n\\nDo you charge for failed requests?\\n\\nWhat payment methods
|
||||
do you accept?\\n\\nFOOTER\\n\\nThe easiest way to extract\\n\\ndata from
|
||||
the web\\n\\nBacked by\\n\\nY Combinator\\n\\n[Linkedin](https://www.linkedin.com/company/firecrawl)
|
||||
[Github](https://github.com/firecrawl/firecrawl)\\n\\nSOC II \xB7 Type 2\\n\\nAICPA\\n\\nSOC
|
||||
2\\n\\n[X (Twitter)](https://x.com/firecrawl_dev) [Discord](https://discord.gg/gSmWdAkdwd)\\n\\nProducts\\n\\n[Playground](https://www.firecrawl.dev/playground)
|
||||
[Extract](https://www.firecrawl.dev/extract) [Pricing](https://www.firecrawl.dev/pricing)
|
||||
[Templates](https://www.firecrawl.dev/templates) [Changelog](https://www.firecrawl.dev/changelog)\\n\\nUse
|
||||
Cases\\n\\n[AI Platforms](https://docs.firecrawl.dev/use-cases/ai-platforms)
|
||||
[Lead Enrichment](https://docs.firecrawl.dev/use-cases/lead-enrichment) [SEO
|
||||
Platforms](https://docs.firecrawl.dev/use-cases/seo-platforms) [Deep Research](https://docs.firecrawl.dev/use-cases/deep-research)\\n\\nDocumentation\\n\\n[Getting
|
||||
started](https://docs.firecrawl.dev/introduction) [API Reference](https://docs.firecrawl.dev/api-reference/introduction)
|
||||
[Integrations](https://www.firecrawl.dev/app) [Examples](https://docs.firecrawl.dev/use-cases/overview)
|
||||
[SDKs](https://docs.firecrawl.dev/sdks/overview)\\n\\nCompany\\n\\n[Blog](https://www.firecrawl.dev/blog)
|
||||
[Careers](https://www.firecrawl.dev/careers) [Creator & OSS program](https://www.firecrawl.dev/creator-oss-program)
|
||||
[Student program](https://www.firecrawl.dev/student-program)\\n\\n\xA9 2025
|
||||
Firecrawl\\n\\n[Terms of Service](https://www.firecrawl.dev/terms-of-service)
|
||||
[Privacy Policy](https://www.firecrawl.dev/privacy-policy) [Report Abuse](mailto:help@firecrawl.com?subject=Issue:)\\n\\n[All
|
||||
systems normal](https://status.firecrawl.dev/)\\n\\nStripeM-Inner\",\"metadata\":{\"favicon\":\"https://www.firecrawl.dev/favicon.png\",\"ogUrl\":\"https://www.firecrawl.dev\",\"ogImage\":\"https://www.firecrawl.dev/og.png\",\"referrer\":\"origin-when-cross-origin\",\"ogDescription\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"robots\":\"follow, index\",\"twitter:card\":\"summary_large_image\",\"og:site_name\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"twitter:title\":\"Firecrawl - The Web Data API
|
||||
for AI\",\"og:image\":\"https://www.firecrawl.dev/og.png\",\"title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"og:description\":\"The web crawling, scraping,
|
||||
and search API for AI. Built for scale. Firecrawl delivers the entire internet
|
||||
to AI agents and builders. Clean, structured, and ready to reason with.\",\"twitter:image\":\"https://www.firecrawl.dev/og.png\",\"viewport\":\"width=device-width,
|
||||
initial-scale=1, maximum-scale=1, user-scalable=no\",\"ogSiteName\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"keywords\":\"Firecrawl,Markdown,Data,Mendable,Langchain\",\"author\":\"Firecrawl\",\"og:title\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"twitter:description\":\"The web crawling, scraping,
|
||||
and search API for AI. Built for scale. Firecrawl delivers the entire internet
|
||||
to AI agents and builders. Clean, structured, and ready to reason with.\",\"description\":\"The
|
||||
web crawling, scraping, and search API for AI. Built for scale. Firecrawl
|
||||
delivers the entire internet to AI agents and builders. Clean, structured,
|
||||
and ready to reason with.\",\"twitter:site\":\"@Vercel\",\"og:url\":\"https://www.firecrawl.dev\",\"og:type\":\"website\",\"ogTitle\":\"Firecrawl
|
||||
- The Web Data API for AI\",\"language\":\"en\",\"creator\":\"Firecrawl\",\"publisher\":\"Firecrawl\",\"twitter:creator\":\"@Vercel\",\"scrapeId\":\"57b0586f-36e8-4923-aaa2-88ff58c03999\",\"sourceURL\":\"https://www.firecrawl.dev/\",\"url\":\"https://www.firecrawl.dev/\",\"statusCode\":200,\"contentType\":\"text/html;
|
||||
charset=utf-8\",\"proxyUsed\":\"basic\",\"cacheState\":\"hit\",\"cachedAt\":\"2025-10-29T13:09:07.713Z\"}},{\"url\":\"https://github.com/firecrawl/firecrawl\",\"title\":\"firecrawl/firecrawl:
|
||||
The Web Data API for AI - Turn entire ... - GitHub\",\"description\":\"Firecrawl
|
||||
is an API service that takes a URL, crawls it, and converts it into clean
|
||||
markdown or structured data. We crawl all accessible subpages and give you
|
||||
...\",\"position\":2,\"category\":\"github\",\"markdown\":\"[Skip to content](https://github.com/firecrawl/firecrawl#start-of-content)\\n\\nYou
|
||||
signed in with another tab or window. [Reload](https://github.com/firecrawl/firecrawl)
|
||||
to refresh your session.You signed out in another tab or window. [Reload](https://github.com/firecrawl/firecrawl)
|
||||
to refresh your session.You switched accounts on another tab or window. [Reload](https://github.com/firecrawl/firecrawl)
|
||||
to refresh your session.Dismiss alert\\n\\n{{ message }}\\n\\n[firecrawl](https://github.com/firecrawl)/
|
||||
**[firecrawl](https://github.com/firecrawl/firecrawl)** Public\\n\\n- Couldn't
|
||||
load subscription status.\\nRetry\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n### Uh
|
||||
oh!\\n\\n\\n\\n\\n\\n\\n\\nThere was an error while loading. [Please reload
|
||||
this page](https://github.com/firecrawl/firecrawl).\\n\\n- [Fork\\\\\\\\\\n5.1k](https://github.com/login?return_to=%2Ffirecrawl%2Ffirecrawl)\\n-
|
||||
[Star\\\\\\\\\\n65.2k](https://github.com/login?return_to=%2Ffirecrawl%2Ffirecrawl)\\n\\n\\n\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data\\n\\n\\n[firecrawl.dev](https://firecrawl.dev/ \\\"https://firecrawl.dev\\\")\\n\\n###
|
||||
License\\n\\n[AGPL-3.0 license](https://github.com/firecrawl/firecrawl/blob/main/LICENSE)\\n\\n[65.2k\\\\\\\\\\nstars](https://github.com/firecrawl/firecrawl/stargazers)
|
||||
[5.1k\\\\\\\\\\nforks](https://github.com/firecrawl/firecrawl/forks) [Branches](https://github.com/firecrawl/firecrawl/branches)
|
||||
[Tags](https://github.com/firecrawl/firecrawl/tags) [Activity](https://github.com/firecrawl/firecrawl/activity)\\n\\n[Star](https://github.com/login?return_to=%2Ffirecrawl%2Ffirecrawl)\\n\\nCouldn't
|
||||
load subscription status.\\nRetry\\n\\n### Uh oh!\\n\\nThere was an error
|
||||
while loading. [Please reload this page](https://github.com/firecrawl/firecrawl).\\n\\n#
|
||||
firecrawl/firecrawl\\n\\nmain\\n\\n[**887** Branches](https://github.com/firecrawl/firecrawl/branches)
|
||||
[**28** Tags](https://github.com/firecrawl/firecrawl/tags)\\n\\n[Go to Branches
|
||||
page](https://github.com/firecrawl/firecrawl/branches)[Go to Tags page](https://github.com/firecrawl/firecrawl/tags)\\n\\nGo
|
||||
to file\\n\\nCode\\n\\nOpen more actions menu\\n\\n## Folders and files\\n\\n|
|
||||
Name | Name | Last commit message | Last commit date |\\n| --- | --- | ---
|
||||
| --- |\\n| ## Latest commit<br>[](https://github.com/amplitudesxd)[amplitudesxd](https://github.com/firecrawl/firecrawl/commits?author=amplitudesxd)<br>[chore:
|
||||
update last scrape rpc (](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc)
|
||||
[#2339](https://github.com/firecrawl/firecrawl/pull/2339) [)](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc)<br>success<br>20
|
||||
hours agoOct 27, 2025<br>[37de287](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc)\_\xB7\_20
|
||||
hours agoOct 27, 2025<br>## History<br>[4,487 Commits](https://github.com/firecrawl/firecrawl/commits/main/)
|
||||
<br>Open commit details<br>[View commit history for this file.](https://github.com/firecrawl/firecrawl/commits/main/)
|
||||
|\\n| [.github](https://github.com/firecrawl/firecrawl/tree/main/.github \\\".github\\\")
|
||||
| [.github](https://github.com/firecrawl/firecrawl/tree/main/.github \\\".github\\\")
|
||||
| [fix(ci): temp disabled prod env tests](https://github.com/firecrawl/firecrawl/commit/42fc149c1ab738da0e15e772817774aa35273f8e
|
||||
\\\"fix(ci): temp disabled prod env tests\\\") | 5 days agoOct 23, 2025 |\\n|
|
||||
[apps](https://github.com/firecrawl/firecrawl/tree/main/apps \\\"apps\\\")
|
||||
| [apps](https://github.com/firecrawl/firecrawl/tree/main/apps \\\"apps\\\")
|
||||
| [chore: update last scrape rpc (](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc
|
||||
\\\"chore: update last scrape rpc (#2339)\\\") [#2339](https://github.com/firecrawl/firecrawl/pull/2339)
|
||||
[)](https://github.com/firecrawl/firecrawl/commit/37de2877fab4bae2de297e37bad3c9bcd49a64bc
|
||||
\\\"chore: update last scrape rpc (#2339)\\\") | 20 hours agoOct 27, 2025
|
||||
|\\n| [examples](https://github.com/firecrawl/firecrawl/tree/main/examples
|
||||
\\\"examples\\\") | [examples](https://github.com/firecrawl/firecrawl/tree/main/examples
|
||||
\\\"examples\\\") | [Merge pull request](https://github.com/firecrawl/firecrawl/commit/7ad57003b4ad8b230ba8252129e52bafa62dfae9
|
||||
\\\"Merge pull request #2172 from MAVRICK-1/firecrawl-gemini-screenshot-editor
|
||||
\ feat: Add Firecrawl + Gemini 2.5 Flash Image CLI Editor\\\") [#2172](https://github.com/firecrawl/firecrawl/pull/2172)
|
||||
[from MAVRICK-1/firecrawl-gemini-screenshot-e\u2026](https://github.com/firecrawl/firecrawl/commit/7ad57003b4ad8b230ba8252129e52bafa62dfae9
|
||||
\\\"Merge pull request #2172 from MAVRICK-1/firecrawl-gemini-screenshot-editor
|
||||
\ feat: Add Firecrawl + Gemini 2.5 Flash Image CLI Editor\\\") | last monthSep
|
||||
23, 2025 |\\n| [img](https://github.com/firecrawl/firecrawl/tree/main/img
|
||||
\\\"img\\\") | [img](https://github.com/firecrawl/firecrawl/tree/main/img
|
||||
\\\"img\\\") | [updated readme](https://github.com/firecrawl/firecrawl/commit/4f904e774831dc598681d3e998d0e5e15abcec27
|
||||
\\\"updated readme\\\") | 2 months agoAug 18, 2025 |\\n| [.gitattributes](https://github.com/firecrawl/firecrawl/blob/main/.gitattributes
|
||||
\\\".gitattributes\\\") | [.gitattributes](https://github.com/firecrawl/firecrawl/blob/main/.gitattributes
|
||||
\\\".gitattributes\\\") | [Initial commit](https://github.com/firecrawl/firecrawl/commit/a6c2a878119321a196f720cce4195e086f1c6b46
|
||||
\\\"Initial commit\\\") | last yearApr 15, 2024 |\\n| [.gitignore](https://github.com/firecrawl/firecrawl/blob/main/.gitignore
|
||||
\\\".gitignore\\\") | [.gitignore](https://github.com/firecrawl/firecrawl/blob/main/.gitignore
|
||||
\\\".gitignore\\\") | [Nick: init](https://github.com/firecrawl/firecrawl/commit/ab3fa4838458c8303a67dd30fdd75a16b89cc20b
|
||||
\\\"Nick: init\\\") | 3 weeks agoOct 10, 2025 |\\n| [.gitmodules](https://github.com/firecrawl/firecrawl/blob/main/.gitmodules
|
||||
\\\".gitmodules\\\") | [.gitmodules](https://github.com/firecrawl/firecrawl/blob/main/.gitmodules
|
||||
\\\".gitmodules\\\") | [mendableai -> firecrawl](https://github.com/firecrawl/firecrawl/commit/2f3bc4e7a7b1a67a29c06df629f79402ee1aad1b
|
||||
\\\"mendableai -> firecrawl\\\") | 2 months agoAug 18, 2025 |\\n| [CLAUDE.md](https://github.com/firecrawl/firecrawl/blob/main/CLAUDE.md
|
||||
\\\"CLAUDE.md\\\") | [CLAUDE.md](https://github.com/firecrawl/firecrawl/blob/main/CLAUDE.md
|
||||
\\\"CLAUDE.md\\\") | [add claude file](https://github.com/firecrawl/firecrawl/commit/3f0873c788823258a7d9f55d1c8772aed4e1a8de
|
||||
\\\"add claude file\\\") | 2 months agoAug 6, 2025 |\\n| [CONTRIBUTING.md](https://github.com/firecrawl/firecrawl/blob/main/CONTRIBUTING.md
|
||||
\\\"CONTRIBUTING.md\\\") | [CONTRIBUTING.md](https://github.com/firecrawl/firecrawl/blob/main/CONTRIBUTING.md
|
||||
\\\"CONTRIBUTING.md\\\") | [Add Rust to CONTRIBUTING (](https://github.com/firecrawl/firecrawl/commit/f396cb20b54c3c2d7e64882642c5df6310a01002
|
||||
\\\"Add Rust to CONTRIBUTING (#2180)\\\") [#2180](https://github.com/firecrawl/firecrawl/pull/2180)
|
||||
[)](https://github.com/firecrawl/firecrawl/commit/f396cb20b54c3c2d7e64882642c5df6310a01002
|
||||
\\\"Add Rust to CONTRIBUTING (#2180)\\\") | last monthSep 18, 2025 |\\n| [LICENSE](https://github.com/firecrawl/firecrawl/blob/main/LICENSE
|
||||
\\\"LICENSE\\\") | [LICENSE](https://github.com/firecrawl/firecrawl/blob/main/LICENSE
|
||||
\\\"LICENSE\\\") | [Update SDKs to MIT license](https://github.com/firecrawl/firecrawl/commit/afb49e21e7cff595ebad9ce0b7aba13b88f39cf8
|
||||
\\\"Update SDKs to MIT license\\\") | last yearJul 8, 2024 |\\n| [README.md](https://github.com/firecrawl/firecrawl/blob/main/README.md
|
||||
\\\"README.md\\\") | [README.md](https://github.com/firecrawl/firecrawl/blob/main/README.md
|
||||
\\\"README.md\\\") | [Update README.md](https://github.com/firecrawl/firecrawl/commit/a21430e97818d95099bb365be711d9227bd75590
|
||||
\\\"Update README.md\\\") | 3 weeks agoOct 6, 2025 |\\n| [SELF\\\\_HOST.md](https://github.com/firecrawl/firecrawl/blob/main/SELF_HOST.md
|
||||
\\\"SELF_HOST.md\\\") | [SELF\\\\_HOST.md](https://github.com/firecrawl/firecrawl/blob/main/SELF_HOST.md
|
||||
\\\"SELF_HOST.md\\\") | [Allow self-hosted webhook delivery to private IP
|
||||
addresses (](https://github.com/firecrawl/firecrawl/commit/5756b834884d481382ce1f5674836a56b7fee33d
|
||||
\\\"Allow self-hosted webhook delivery to private IP addresses (#2232)\\\")
|
||||
[#2232](https://github.com/firecrawl/firecrawl/pull/2232) [)](https://github.com/firecrawl/firecrawl/commit/5756b834884d481382ce1f5674836a56b7fee33d
|
||||
\\\"Allow self-hosted webhook delivery to private IP addresses (#2232)\\\")
|
||||
| 27 days agoOct 1, 2025 |\\n| [docker-compose.yaml](https://github.com/firecrawl/firecrawl/blob/main/docker-compose.yaml
|
||||
\\\"docker-compose.yaml\\\") | [docker-compose.yaml](https://github.com/firecrawl/firecrawl/blob/main/docker-compose.yaml
|
||||
\\\"docker-compose.yaml\\\") | [Fix a self-hosted docker-compose.yaml bug
|
||||
caused by a recent firecraw\u2026](https://github.com/firecrawl/firecrawl/commit/7d4100b274889977fa1ba26344532d9d8747494c
|
||||
\\\"Fix a self-hosted docker-compose.yaml bug caused by a recent firecrawl
|
||||
change (#2252) Add EXTRACT_WORKER_PORT to docker-compose environment\\\")
|
||||
| 3 weeks agoOct 4, 2025 |\\n| View all files |\\n\\n## Repository files navigation\\n\\n###
|
||||
[](https://raw.githubusercontent.com/firecrawl/firecrawl/main/img/firecrawl_logo.png)\\n\\n[Permalink:
|
||||
](https://github.com/firecrawl/firecrawl#----)\\n\\n[](https://github.com/firecrawl/firecrawl/blob/main/LICENSE)[](https://pepy.tech/project/firecrawl-py)[](https://github.com/firecrawl/firecrawl/graphs/contributors)[](https://firecrawl.dev/)\\n\\n[](https://twitter.com/firecrawl_dev)[](https://www.linkedin.com/company/104100957)[](https://discord.com/invite/gSmWdAkdwd)\\n\\n#
|
||||
\U0001F525 Firecrawl\\n\\n[Permalink: \U0001F525 Firecrawl](https://github.com/firecrawl/firecrawl#-firecrawl)\\n\\nEmpower
|
||||
your AI apps with clean data from any website. Featuring advanced scraping,
|
||||
crawling, and data extraction capabilities.\\n\\n_This repository is in development,
|
||||
and we\u2019re still integrating custom modules into the mono repo. It's not
|
||||
fully ready for self-hosted deployment yet, but you can run it locally._\\n\\n##
|
||||
What is Firecrawl?\\n\\n[Permalink: What is Firecrawl?](https://github.com/firecrawl/firecrawl#what-is-firecrawl)\\n\\n[Firecrawl](https://firecrawl.dev/?ref=github)
|
||||
is an API service that takes a URL, crawls it, and converts it into clean
|
||||
markdown or structured data. We crawl all accessible subpages and give you
|
||||
clean data for each. No sitemap required. Check out our [documentation](https://docs.firecrawl.dev/).\\n\\nLooking
|
||||
for our MCP? Check out the [repo here](https://github.com/firecrawl/firecrawl-mcp-server).\\n\\n_Pst.
|
||||
hey, you, join our stargazers :)_\\n\\n[](https://github.com/firecrawl/firecrawl)\\n\\n##
|
||||
How to use it?\\n\\n[Permalink: How to use it?](https://github.com/firecrawl/firecrawl#how-to-use-it)\\n\\nWe
|
||||
provide an easy to use API with our hosted version. You can find the playground
|
||||
and documentation [here](https://firecrawl.dev/playground). You can also self
|
||||
host the backend if you'd like.\\n\\nCheck out the following resources to
|
||||
get started:\\n\\n- [x] **API**: [Documentation](https://docs.firecrawl.dev/api-reference/introduction)\\n-
|
||||
[x] **SDKs**: [Python](https://docs.firecrawl.dev/sdks/python), [Node](https://docs.firecrawl.dev/sdks/node)\\n-
|
||||
[x] **LLM Frameworks**: [Langchain (python)](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/),
|
||||
[Langchain (js)](https://js.langchain.com/docs/integrations/document_loaders/web_loaders/firecrawl),
|
||||
[Llama Index](https://docs.llamaindex.ai/en/latest/examples/data_connectors/WebPageDemo/#using-firecrawl-reader),
|
||||
[Crew.ai](https://docs.crewai.com/), [Composio](https://composio.dev/tools/firecrawl/all),
|
||||
[PraisonAI](https://docs.praison.ai/firecrawl/), [Superinterface](https://superinterface.ai/docs/assistants/functions/firecrawl),
|
||||
[Vectorize](https://docs.vectorize.io/integrations/source-connectors/firecrawl)\\n-
|
||||
[x] **Low-code Frameworks**: [Dify](https://dify.ai/blog/dify-ai-blog-integrated-with-firecrawl),
|
||||
[Langflow](https://docs.langflow.org/), [Flowise AI](https://docs.flowiseai.com/integrations/langchain/document-loaders/firecrawl),
|
||||
[Cargo](https://docs.getcargo.io/integration/firecrawl), [Pipedream](https://pipedream.com/apps/firecrawl/)\\n-
|
||||
[x] **Community SDKs**: [Go](https://docs.firecrawl.dev/sdks/go), [Rust](https://docs.firecrawl.dev/sdks/rust)\\n-
|
||||
[x] **Others**: [Zapier](https://zapier.com/apps/firecrawl/integrations),
|
||||
[Pabbly Connect](https://www.pabbly.com/connect/integrations/firecrawl/)\\n-
|
||||
[ ] Want an SDK or Integration? Let us know by opening an issue.\\n\\nTo
|
||||
run locally, refer to guide [here](https://github.com/firecrawl/firecrawl/blob/main/CONTRIBUTING.md).\\n\\n###
|
||||
API Key\\n\\n[Permalink: API Key](https://github.com/firecrawl/firecrawl#api-key)\\n\\nTo
|
||||
use the API, you need to sign up on [Firecrawl](https://firecrawl.dev/) and
|
||||
get an API key.\\n\\n### Features\\n\\n[Permalink: Features](https://github.com/firecrawl/firecrawl#features)\\n\\n-
|
||||
[**Scrape**](https://github.com/firecrawl/firecrawl#scraping): scrapes a URL
|
||||
and get its content in LLM-ready format (markdown, structured data via [LLM
|
||||
Extract](https://github.com/firecrawl/firecrawl#llm-extraction-beta), screenshot,
|
||||
html)\\n- [**Crawl**](https://github.com/firecrawl/firecrawl#crawling): scrapes
|
||||
all the URLs of a web page and return content in LLM-ready format\\n- [**Map**](https://github.com/firecrawl/firecrawl#map):
|
||||
input a website and get all the website urls - extremely fast\\n- [**Search**](https://github.com/firecrawl/firecrawl#search):
|
||||
search the web and get full content from results\\n- [**Extract**](https://github.com/firecrawl/firecrawl#extract):
|
||||
get structured data from single page, multiple pages or entire websites with
|
||||
AI.\\n\\n### Powerful Capabilities\\n\\n[Permalink: Powerful Capabilities](https://github.com/firecrawl/firecrawl#powerful-capabilities)\\n\\n-
|
||||
**LLM-ready formats**: markdown, structured data, screenshot, HTML, links,
|
||||
metadata\\n- **The hard stuff**: proxies, anti-bot mechanisms, dynamic content
|
||||
(js-rendered), output parsing, orchestration\\n- **Customizability**: exclude
|
||||
tags, crawl behind auth walls with custom headers, max crawl depth, etc...\\n-
|
||||
**Media parsing**: pdfs, docx, images\\n- **Reliability first**: designed
|
||||
to get the data you need - no matter how hard it is\\n- **Actions**: click,
|
||||
scroll, input, wait and more before extracting data\\n- **Batching**: scrape
|
||||
thousands of URLs at the same time with a new async endpoint\\n- **Change
|
||||
Tracking**: monitor and detect changes in website content over time\\n\\nYou
|
||||
can find all of Firecrawl's capabilities and how to use them in our [documentation](https://docs.firecrawl.dev/)\\n\\n###
|
||||
Crawling\\n\\n[Permalink: Crawling](https://github.com/firecrawl/firecrawl#crawling)\\n\\nUsed
|
||||
to crawl a URL and all accessible subpages. This submits a crawl job and returns
|
||||
a job ID to check the status of the crawl.\\n\\n```\\ncurl -X POST https://api.firecrawl.dev/v2/crawl
|
||||
\\\\\\n -H 'Content-Type: application/json' \\\\\\n -H 'Authorization:
|
||||
Bearer fc-YOUR_API_KEY' \\\\\\n -d '{\\n \\\"url\\\": \\\"https://docs.firecrawl.dev\\\",\\n
|
||||
\ \\\"limit\\\": 10,\\n \\\"scrapeOptions\\\": {\\n \\\"formats\\\":
|
||||
[\\\"markdown\\\", \\\"html\\\"]\\n }\\n }'\\n```\\n\\nReturns a crawl
|
||||
job id and the url to check the status of the crawl.\\n\\n```\\n{\\n \\\"success\\\":
|
||||
true,\\n \\\"id\\\": \\\"123-456-789\\\",\\n \\\"url\\\": \\\"https://api.firecrawl.dev/v2/crawl/123-456-789\\\"\\n}\\n```\\n\\n###
|
||||
Check Crawl Job\\n\\n[Permalink: Check Crawl Job](https://github.com/firecrawl/firecrawl#check-crawl-job)\\n\\nUsed
|
||||
to check the status of a crawl job and get its result.\\n\\n```\\ncurl -X
|
||||
GET https://api.firecrawl.dev/v2/crawl/123-456-789 \\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\n -H 'Authorization: Bearer YOUR_API_KEY'\\n```\\n\\n```\\n{\\n
|
||||
\ \\\"status\\\": \\\"completed\\\",\\n \\\"total\\\": 36,\\n \\\"creditsUsed\\\":
|
||||
36,\\n \\\"expiresAt\\\": \\\"2024-00-00T00:00:00.000Z\\\",\\n \\\"data\\\":
|
||||
[\\\\\\n {\\\\\\n \\\"markdown\\\": \\\"[Firecrawl Docs home page!...\\\",\\\\\\n
|
||||
\ \\\"html\\\": \\\"<!DOCTYPE html><html lang=\\\\\\\"en\\\\\\\" class=\\\\\\\"js-focus-visible
|
||||
lg:[--scroll-mt:9.5rem]\\\\\\\" data-js-focus-visible=\\\\\\\"\\\\\\\">...\\\",\\\\\\n
|
||||
\ \\\"metadata\\\": {\\\\\\n \\\"title\\\": \\\"Build a 'Chat with
|
||||
website' using Groq Llama 3 | Firecrawl\\\",\\\\\\n \\\"language\\\":
|
||||
\\\"en\\\",\\\\\\n \\\"sourceURL\\\": \\\"https://docs.firecrawl.dev/learn/rag-llama3\\\",\\\\\\n
|
||||
\ \\\"description\\\": \\\"Learn how to use Firecrawl, Groq Llama 3,
|
||||
and Langchain to build a 'Chat with your website' bot.\\\",\\\\\\n \\\"ogLocaleAlternate\\\":
|
||||
[],\\\\\\n \\\"statusCode\\\": 200\\\\\\n }\\\\\\n }\\\\\\n
|
||||
\ ]\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n### Scraping\\\\\\n\\\\\\n[Permalink: Scraping](https://github.com/firecrawl/firecrawl#scraping)\\\\\\n\\\\\\nUsed
|
||||
to scrape a URL and get its content in the specified formats.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"https://docs.firecrawl.dev\\\",\\\\\\n
|
||||
\ \\\"formats\\\" : [\\\"markdown\\\", \\\"html\\\"]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\nResponse:\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"data\\\": {\\\\\\n \\\"markdown\\\":
|
||||
\\\"Launch Week I is here! [See our Day 2 Release \U0001F680](https://www.firecrawl.dev/blog/launch-week-i-day-2-doubled-rate-limits)[\U0001F4A5
|
||||
Get 2 months free...\\\",\\\\\\n \\\"html\\\": \\\"<!DOCTYPE html><html
|
||||
lang=\\\\\\\"en\\\\\\\" class=\\\\\\\"light\\\\\\\" style=\\\\\\\"color-scheme:
|
||||
light;\\\\\\\"><body class=\\\\\\\"__variable_36bd41 __variable_d7dc5d font-inter
|
||||
...\\\",\\\\\\n \\\"metadata\\\": {\\\\\\n \\\"title\\\": \\\"Home
|
||||
- Firecrawl\\\",\\\\\\n \\\"description\\\": \\\"Firecrawl crawls and
|
||||
converts any website into clean markdown.\\\",\\\\\\n \\\"language\\\":
|
||||
\\\"en\\\",\\\\\\n \\\"keywords\\\": \\\"Firecrawl,Markdown,Data,Mendable,Langchain\\\",\\\\\\n
|
||||
\ \\\"robots\\\": \\\"follow, index\\\",\\\\\\n \\\"ogTitle\\\":
|
||||
\\\"Firecrawl\\\",\\\\\\n \\\"ogDescription\\\": \\\"Turn any website
|
||||
into LLM-ready data.\\\",\\\\\\n \\\"ogUrl\\\": \\\"https://www.firecrawl.dev/\\\",\\\\\\n
|
||||
\ \\\"ogImage\\\": \\\"https://www.firecrawl.dev/og.png?123\\\",\\\\\\n
|
||||
\ \\\"ogLocaleAlternate\\\": [],\\\\\\n \\\"ogSiteName\\\": \\\"Firecrawl\\\",\\\\\\n
|
||||
\ \\\"sourceURL\\\": \\\"https://firecrawl.dev\\\",\\\\\\n \\\"statusCode\\\":
|
||||
200\\\\\\n }\\\\\\n }\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n### Map\\\\\\n\\\\\\n[Permalink:
|
||||
Map](https://github.com/firecrawl/firecrawl#map)\\\\\\n\\\\\\nUsed to map
|
||||
a URL and get urls of the website. This returns most links present on the
|
||||
website.\\\\\\n\\\\\\n```\\\\\\ncurl -X POST https://api.firecrawl.dev/v2/map
|
||||
\\\\\\\\\\n -H 'Content-Type: application/json' \\\\\\\\\\n -H 'Authorization:
|
||||
Bearer YOUR_API_KEY' \\\\\\\\\\n -d '{\\\\\\n \\\"url\\\": \\\"https://firecrawl.dev\\\"\\\\\\n
|
||||
\ }'\\\\\\n```\\\\\\n\\\\\\nResponse:\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n \\\"success\\\":
|
||||
true,\\\\\\n \\\"links\\\": [\\\\\\n { \\\"url\\\": \\\"https://firecrawl.dev\\\",
|
||||
\\\"title\\\": \\\"Firecrawl\\\", \\\"description\\\": \\\"Firecrawl is a
|
||||
tool that allows you to crawl a website and get the data you need.\\\" },\\\\\\n
|
||||
\ { \\\"url\\\": \\\"https://www.firecrawl.dev/pricing\\\", \\\"title\\\":
|
||||
\\\"Firecrawl Pricing\\\", \\\"description\\\": \\\"Firecrawl Pricing\\\"
|
||||
},\\\\\\n { \\\"url\\\": \\\"https://www.firecrawl.dev/blog\\\", \\\"title\\\":
|
||||
\\\"Firecrawl Blog\\\", \\\"description\\\": \\\"Firecrawl Blog\\\" },\\\\\\n
|
||||
\ { \\\"url\\\": \\\"https://www.firecrawl.dev/playground\\\", \\\"title\\\":
|
||||
\\\"Firecrawl Playground\\\", \\\"description\\\": \\\"Firecrawl Playground\\\"
|
||||
},\\\\\\n { \\\"url\\\": \\\"https://www.firecrawl.dev/smart-crawl\\\",
|
||||
\\\"title\\\": \\\"Firecrawl Smart Crawl\\\", \\\"description\\\": \\\"Firecrawl
|
||||
Smart Crawl\\\" }\\\\\\n ]\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n#### Map with search\\\\\\n\\\\\\n[Permalink:
|
||||
Map with search](https://github.com/firecrawl/firecrawl#map-with-search)\\\\\\n\\\\\\nMap
|
||||
with `search` param allows you to search for specific urls inside a website.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/map \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"https://firecrawl.dev\\\",\\\\\\n \\\"search\\\":
|
||||
\\\"docs\\\"\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\nResponse will be an ordered
|
||||
list from the most relevant to the least relevant.\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"links\\\": [\\\\\\n { \\\"url\\\":
|
||||
\\\"https://docs.firecrawl.dev\\\", \\\"title\\\": \\\"Firecrawl Docs\\\",
|
||||
\\\"description\\\": \\\"Firecrawl Docs\\\" },\\\\\\n { \\\"url\\\": \\\"https://docs.firecrawl.dev/sdks/python\\\",
|
||||
\\\"title\\\": \\\"Firecrawl Python SDK\\\", \\\"description\\\": \\\"Firecrawl
|
||||
Python SDK\\\" },\\\\\\n { \\\"url\\\": \\\"https://docs.firecrawl.dev/learn/rag-llama3\\\",
|
||||
\\\"title\\\": \\\"Firecrawl RAG Llama 3\\\", \\\"description\\\": \\\"Firecrawl
|
||||
RAG Llama 3\\\" }\\\\\\n ]\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n### Search\\\\\\n\\\\\\n[Permalink:
|
||||
Search](https://github.com/firecrawl/firecrawl#search)\\\\\\n\\\\\\nSearch
|
||||
the web and get full content from results\\\\\\n\\\\\\nFirecrawl\u2019s search
|
||||
API allows you to perform web searches and optionally scrape the search results
|
||||
in one operation.\\\\\\n\\\\\\n- Choose specific output formats (markdown,
|
||||
HTML, links, screenshots)\\\\\\n- Search the web with customizable parameters
|
||||
(language, country, etc.)\\\\\\n- Optionally retrieve content from search
|
||||
results in various formats\\\\\\n- Control the number of results and set timeouts\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/search \\\\\\\\\\n -H \\\"Content-Type:
|
||||
application/json\\\" \\\\\\\\\\n -H \\\"Authorization: Bearer fc-YOUR_API_KEY\\\"
|
||||
\\\\\\\\\\n -d '{\\\\\\n \\\"query\\\": \\\"what is firecrawl?\\\",\\\\\\n
|
||||
\ \\\"limit\\\": 5\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n#### Response\\\\\\n\\\\\\n[Permalink:
|
||||
Response](https://github.com/firecrawl/firecrawl#response)\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"data\\\": [\\\\\\n {\\\\\\n \\\"url\\\":
|
||||
\\\"https://firecrawl.dev\\\",\\\\\\n \\\"title\\\": \\\"Firecrawl |
|
||||
Home Page\\\",\\\\\\n \\\"description\\\": \\\"Turn websites into LLM-ready
|
||||
data with Firecrawl\\\"\\\\\\n },\\\\\\n {\\\\\\n \\\"url\\\":
|
||||
\\\"https://docs.firecrawl.dev\\\",\\\\\\n \\\"title\\\": \\\"Documentation
|
||||
| Firecrawl\\\",\\\\\\n \\\"description\\\": \\\"Learn how to use Firecrawl
|
||||
in your own applications\\\"\\\\\\n }\\\\\\n ]\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n####
|
||||
With content scraping\\\\\\n\\\\\\n[Permalink: With content scraping](https://github.com/firecrawl/firecrawl#with-content-scraping)\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/search \\\\\\\\\\n -H \\\"Content-Type:
|
||||
application/json\\\" \\\\\\\\\\n -H \\\"Authorization: Bearer fc-YOUR_API_KEY\\\"
|
||||
\\\\\\\\\\n -d '{\\\\\\n \\\"query\\\": \\\"what is firecrawl?\\\",\\\\\\n
|
||||
\ \\\"limit\\\": 5,\\\\\\n \\\"scrapeOptions\\\": {\\\\\\n \\\"formats\\\":
|
||||
[\\\"markdown\\\", \\\"links\\\"]\\\\\\n }\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n###
|
||||
Extract (Beta)\\\\\\n\\\\\\n[Permalink: Extract (Beta)](https://github.com/firecrawl/firecrawl#extract-beta)\\\\\\n\\\\\\nGet
|
||||
structured data from entire websites with a prompt and/or a schema.\\\\\\n\\\\\\nYou
|
||||
can extract structured data from one or multiple URLs, including wildcards:\\\\\\n\\\\\\nSingle
|
||||
Page:\\\\\\nExample: [https://firecrawl.dev/some-page](https://firecrawl.dev/some-page)\\\\\\n\\\\\\nMultiple
|
||||
Pages / Full Domain\\\\\\nExample: [https://firecrawl.dev/](https://firecrawl.dev/)\\\\*\\\\\\n\\\\\\nWhen
|
||||
you use /\\\\*, Firecrawl will automatically crawl and parse all URLs it can
|
||||
discover in that domain, then extract the requested data.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/extract \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"urls\\\": [\\\\\\n \\\"https://firecrawl.dev/*\\\",\\\\\\n
|
||||
\ \\\"https://docs.firecrawl.dev/\\\",\\\\\\n \\\"https://www.ycombinator.com/companies\\\"\\\\\\n
|
||||
\ ],\\\\\\n \\\"prompt\\\": \\\"Extract the company mission, whether
|
||||
it is open source, and whether it is in Y Combinator from the page.\\\",\\\\\\n
|
||||
\ \\\"schema\\\": {\\\\\\n \\\"type\\\": \\\"object\\\",\\\\\\n
|
||||
\ \\\"properties\\\": {\\\\\\n \\\"company_mission\\\": {\\\\\\n
|
||||
\ \\\"type\\\": \\\"string\\\"\\\\\\n },\\\\\\n \\\"is_open_source\\\":
|
||||
{\\\\\\n \\\"type\\\": \\\"boolean\\\"\\\\\\n },\\\\\\n
|
||||
\ \\\"is_in_yc\\\": {\\\\\\n \\\"type\\\": \\\"boolean\\\"\\\\\\n
|
||||
\ }\\\\\\n },\\\\\\n \\\"required\\\": [\\\\\\n \\\"company_mission\\\",\\\\\\n
|
||||
\ \\\"is_open_source\\\",\\\\\\n \\\"is_in_yc\\\"\\\\\\n
|
||||
\ ]\\\\\\n }\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"id\\\": \\\"44aa536d-f1cb-4706-ab87-ed0386685740\\\",\\\\\\n
|
||||
\ \\\"urlTrace\\\": []\\\\\\n}\\\\\\n```\\\\\\n\\\\\\nIf you are using the
|
||||
sdks, it will auto pull the response for you:\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"data\\\": {\\\\\\n \\\"company_mission\\\":
|
||||
\\\"Firecrawl is the easiest way to extract data from the web. Developers
|
||||
use us to reliably convert URLs into LLM-ready markdown or structured data
|
||||
with a single API call.\\\",\\\\\\n \\\"supports_sso\\\": false,\\\\\\n
|
||||
\ \\\"is_open_source\\\": true,\\\\\\n \\\"is_in_yc\\\": true\\\\\\n
|
||||
\ }\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n### LLM Extraction (Beta)\\\\\\n\\\\\\n[Permalink:
|
||||
LLM Extraction (Beta)](https://github.com/firecrawl/firecrawl#llm-extraction-beta)\\\\\\n\\\\\\nUsed
|
||||
to extract structured data from scraped pages.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"https://www.mendable.ai/\\\",\\\\\\n \\\"formats\\\":
|
||||
[\\\\\\n {\\\\\\n \\\"type\\\": \\\"json\\\",\\\\\\n \\\"schema\\\":
|
||||
{\\\\\\n \\\"type\\\": \\\"object\\\",\\\\\\n \\\"properties\\\":
|
||||
{\\\\\\n \\\"company_mission\\\": { \\\"type\\\": \\\"string\\\"
|
||||
},\\\\\\n \\\"supports_sso\\\": { \\\"type\\\": \\\"boolean\\\"
|
||||
},\\\\\\n \\\"is_open_source\\\": { \\\"type\\\": \\\"boolean\\\"
|
||||
},\\\\\\n \\\"is_in_yc\\\": { \\\"type\\\": \\\"boolean\\\" }\\\\\\n
|
||||
\ }\\\\\\n }\\\\\\n }\\\\\\n ]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n```\\\\\\n{\\\\\\n
|
||||
\ \\\"success\\\": true,\\\\\\n \\\"data\\\": {\\\\\\n \\\"content\\\":
|
||||
\\\"Raw Content\\\",\\\\\\n \\\"metadata\\\": {\\\\\\n \\\"title\\\":
|
||||
\\\"Mendable\\\",\\\\\\n \\\"description\\\": \\\"Mendable allows you
|
||||
to easily build AI chat applications. Ingest, customize, then deploy with
|
||||
one line of code anywhere you want. Brought to you by SideGuide\\\",\\\\\\n
|
||||
\ \\\"robots\\\": \\\"follow, index\\\",\\\\\\n \\\"ogTitle\\\":
|
||||
\\\"Mendable\\\",\\\\\\n \\\"ogDescription\\\": \\\"Mendable allows you
|
||||
to easily build AI chat applications. Ingest, customize, then deploy with
|
||||
one line of code anywhere you want. Brought to you by SideGuide\\\",\\\\\\n
|
||||
\ \\\"ogUrl\\\": \\\"https://mendable.ai/\\\",\\\\\\n \\\"ogImage\\\":
|
||||
\\\"https://mendable.ai/mendable_new_og1.png\\\",\\\\\\n \\\"ogLocaleAlternate\\\":
|
||||
[],\\\\\\n \\\"ogSiteName\\\": \\\"Mendable\\\",\\\\\\n \\\"sourceURL\\\":
|
||||
\\\"https://mendable.ai/\\\"\\\\\\n },\\\\\\n \\\"json\\\": {\\\\\\n
|
||||
\ \\\"company_mission\\\": \\\"Train a secure AI on your technical resources
|
||||
that answers customer and employee questions so your team doesn't have to\\\",\\\\\\n
|
||||
\ \\\"supports_sso\\\": true,\\\\\\n \\\"is_open_source\\\": false,\\\\\\n
|
||||
\ \\\"is_in_yc\\\": true\\\\\\n }\\\\\\n }\\\\\\n}\\\\\\n```\\\\\\n\\\\\\n###
|
||||
Extracting without a schema (New)\\\\\\n\\\\\\n[Permalink: Extracting without
|
||||
a schema (New)](https://github.com/firecrawl/firecrawl#extracting-without-a-schema-new)\\\\\\n\\\\\\nYou
|
||||
can now extract without a schema by just passing a `prompt` to the endpoint.
|
||||
The llm chooses the structure of the data.\\\\\\n\\\\\\n```\\\\\\ncurl -X
|
||||
POST https://api.firecrawl.dev/v2/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"https://docs.firecrawl.dev/\\\",\\\\\\n
|
||||
\ \\\"formats\\\": [\\\\\\n {\\\\\\n \\\"type\\\": \\\"json\\\",\\\\\\n
|
||||
\ \\\"prompt\\\": \\\"Extract the company mission from the page.\\\"\\\\\\n
|
||||
\ }\\\\\\n ]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n### Interacting
|
||||
with the page with Actions (Cloud-only)\\\\\\n\\\\\\n[Permalink: Interacting
|
||||
with the page with Actions (Cloud-only)](https://github.com/firecrawl/firecrawl#interacting-with-the-page-with-actions-cloud-only)\\\\\\n\\\\\\nFirecrawl
|
||||
allows you to perform various actions on a web page before scraping its content.
|
||||
This is particularly useful for interacting with dynamic content, navigating
|
||||
through pages, or accessing content that requires user interaction.\\\\\\n\\\\\\nHere
|
||||
is an example of how to use actions to navigate to google.com, search for
|
||||
Firecrawl, click on the first result, and take a screenshot.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"url\\\": \\\"google.com\\\",\\\\\\n \\\"formats\\\":
|
||||
[\\\"markdown\\\"],\\\\\\n \\\"actions\\\": [\\\\\\n {\\\"type\\\":
|
||||
\\\"wait\\\", \\\"milliseconds\\\": 2000},\\\\\\n {\\\"type\\\":
|
||||
\\\"click\\\", \\\"selector\\\": \\\"textarea[title=\\\\\\\"Search\\\\\\\"]\\\"},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"wait\\\", \\\"milliseconds\\\": 2000},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"write\\\", \\\"text\\\": \\\"firecrawl\\\"},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"wait\\\", \\\"milliseconds\\\": 2000},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"press\\\", \\\"key\\\": \\\"ENTER\\\"},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"wait\\\", \\\"milliseconds\\\": 3000},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"click\\\", \\\"selector\\\": \\\"h3\\\"},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"wait\\\", \\\"milliseconds\\\": 3000},\\\\\\n
|
||||
\ {\\\"type\\\": \\\"screenshot\\\"}\\\\\\n ]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n###
|
||||
Batch Scraping Multiple URLs (New)\\\\\\n\\\\\\n[Permalink: Batch Scraping
|
||||
Multiple URLs (New)](https://github.com/firecrawl/firecrawl#batch-scraping-multiple-urls-new)\\\\\\n\\\\\\nYou
|
||||
can now batch scrape multiple URLs at the same time. It is very similar to
|
||||
how the /crawl endpoint works. It submits a batch scrape job and returns a
|
||||
job ID to check the status of the batch scrape.\\\\\\n\\\\\\n```\\\\\\ncurl
|
||||
-X POST https://api.firecrawl.dev/v2/batch/scrape \\\\\\\\\\n -H 'Content-Type:
|
||||
application/json' \\\\\\\\\\n -H 'Authorization: Bearer YOUR_API_KEY' \\\\\\\\\\n
|
||||
\ -d '{\\\\\\n \\\"urls\\\": [\\\"https://docs.firecrawl.dev\\\", \\\"https://docs.firecrawl.dev/sdks/overview\\\"],\\\\\\n
|
||||
\ \\\"formats\\\" : [\\\"markdown\\\", \\\"html\\\"]\\\\\\n }'\\\\\\n```\\\\\\n\\\\\\n##
|
||||
Using Python SDK\\\\\\n\\\\\\n[Permalink: Using Python SDK](https://github.com/firecrawl/firecrawl#using-python-sdk)\\\\\\n\\\\\\n###
|
||||
Installing Python SDK\\\\\\n\\\\\\n[Permalink: Installing Python SDK](https://github.com/firecrawl/firecrawl#installing-python-sdk)\\\\\\n\\\\\\n```\\\\\\npip
|
||||
install firecrawl-py\\\\\\n```\\\\\\n\\\\\\n### Crawl a website\\\\\\n\\\\\\n[Permalink:
|
||||
Crawl a website](https://github.com/firecrawl/firecrawl#crawl-a-website)\\\\\\n\\\\\\n```\\\\\\nfrom
|
||||
firecrawl import Firecrawl\\\\\\n\\\\\\nfirecrawl = Firecrawl(api_key=\\\"fc-YOUR_API_KEY\\\")\\\\\\n\\\\\\n#
|
||||
Scrape a website (returns a Document)\\\\\\ndoc = firecrawl.scrape(\\\\\\n
|
||||
\ \\\"https://firecrawl.dev\\\",\\\\\\n formats=[\\\"markdown\\\", \\\"html\\\"],\\\\\\n)\\\\\\nprint(doc.markdown)\\\\\\n\\\\\\n#
|
||||
Crawl a website\\\\\\nresponse = firecrawl.crawl(\\\\\\n \\\"https://firecrawl.dev\\\",\\\\\\n
|
||||
\ limit=100,\\\\\\n scrape_options={\\\"formats\\\": [\\\"markdown\\\",
|
||||
\\\"html\\\"]},\\\\\\n poll_interval=30,\\\\\\n)\\\\\\nprint(response)\\\\\\n```\\\\\\n\\\\\\n###
|
||||
Extracting structured data from a URL\\\\\\n\\\\\\n[Permalink: Extracting
|
||||
structured data from a URL](https://github.com/firecrawl/firecrawl#extracting-structured-data-from-a-url)\\\\\\n\\\\\\nWith
|
||||
LLM extraction, you can easily extract structured data from any URL. We support
|
||||
pydantic schemas to make it easier for you too. Here is how you to use it:\\\\\\n\\\\\\n```\\\\\\nfrom
|
||||
pydantic import BaseModel, Field\\\\\\nfrom typing import List\\\\\\n\\\\\\nclass
|
||||
Article(BaseModel):\\\\\\n title: str\\\\\\n points: int\\\\\\n by:
|
||||
str\\\\\\n commentsURL: str\\\\\\n\\\\\\nclass TopArticles(BaseModel):\\\\\\n
|
||||
\ top: List[Article] = Field(..., description=\\\"Top 5 stories\\\")\\\\\\n\\\\\\n#
|
||||
Use JSON format with a Pydantic schema\\\\\\ndoc = firecrawl.scrape(\\\\\\n
|
||||
\ \\\"https://news.ycombinator.com\\\",\\\\\\n formats=[{\\\"type\\\":
|
||||
\\\"json\\\", \\\"schema\\\": TopArticles}],\\\\\\n)\\\\\\nprint(doc.json)\\\\\\n```\\\\\\n\\\\\\n##
|
||||
Using the Node SDK\\\\\\n\\\\\\n[Permalink: Using the Node SDK](https://github.com/firecrawl/firecrawl#using-the-node-sdk)\\\\\\n\\\\\\n###
|
||||
Installation\\\\\\n\\\\\\n[Permalink: Installation](https://github.com/firecrawl/firecrawl#installation)\\\\\\n\\\\\\nTo
|
||||
install the Firecrawl Node SDK, you can use npm:\\\\\\n\\\\\\n```\\\\\\nnpm
|
||||
install @mendable/firecrawl-js\\\\\\n```\\\\\\n\\\\\\n### Usage\\\\\\n\\\\\\n[Permalink:
|
||||
Usage](https://github.com/firecrawl/firecrawl#usage)\\\\\\n\\\\\\n1. Get an
|
||||
API key from [firecrawl.dev](https://firecrawl.dev/)\\\\\\n2. Set the API
|
||||
key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter
|
||||
to the `Firecrawl` class.\\\\\\n\\\\\\n```\\\\\\nimport Firecrawl from '@mendable/firecrawl-js';\\\\\\n\\\\\\nconst
|
||||
firecrawl = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });\\\\\\n\\\\\\n//
|
||||
Scrape a website\\\\\\nconst doc = await firecrawl.scrape('https://firecrawl.dev',
|
||||
{\\\\\\n formats: ['markdown', 'html'],\\\\\\n});\\\\\\nconsole.log(doc);\\\\\\n\\\\\\n//
|
||||
Crawl a website\\\\\\nconst response = await firecrawl.crawl('https://firecrawl.dev',
|
||||
{\\\\\\n limit: 100,\\\\\\n scrapeOptions: { formats: ['markdown', 'html']
|
||||
},\\\\\\n});\\\\\\nconsole.log(response);\\\\\\n```\\\\\\n\\\\\\n### Extracting
|
||||
structured data from a URL\\\\\\n\\\\\\n[Permalink: Extracting structured
|
||||
data from a URL](https://github.com/firecrawl/firecrawl#extracting-structured-data-from-a-url-1)\\\\\\n\\\\\\nWith
|
||||
LLM extraction, you can easily extract structured data from any URL. We support
|
||||
zod schema to make it easier for you too. Here is how to use it:\\\\\\n\\\\\\n```\\\\\\nimport
|
||||
Firecrawl from '@mendable/firecrawl-js';\\\\\\nimport { z } from 'zod';\\\\\\n\\\\\\nconst
|
||||
firecrawl = new Firecrawl({ apiKey: 'fc-YOUR_API_KEY' });\\\\\\n\\\\\\n//
|
||||
Define schema to extract contents into\\\\\\nconst schema = z.object({\\\\\\n
|
||||
\ top: z\\\\\\n .array(\\\\\\n z.object({\\\\\\n title: z.string(),\\\\\\n
|
||||
\ points: z.number(),\\\\\\n by: z.string(),\\\\\\n commentsURL:
|
||||
z.string(),\\\\\\n })\\\\\\n )\\\\\\n .length(5)\\\\\\n .describe('Top
|
||||
5 stories on Hacker News'),\\\\\\n});\\\\\\n\\\\\\n// Use the v2 extract API
|
||||
with direct Zod schema support\\\\\\nconst extractRes = await firecrawl.extract({\\\\\\n
|
||||
\ urls: ['https://news.ycombinator.com'],\\\\\\n schema,\\\\\\n prompt:
|
||||
'Extract the top 5 stories',\\\\\\n});\\\\\\n\\\\\\nconsole.log(extractRes);\\\\\\n```\\\\\\n\\\\\\n##
|
||||
Open Source vs Cloud Offering\\\\\\n\\\\\\n[Permalink: Open Source vs Cloud
|
||||
Offering](https://github.com/firecrawl/firecrawl#open-source-vs-cloud-offering)\\\\\\n\\\\\\nFirecrawl
|
||||
is open source available under the AGPL-3.0 license.\\\\\\n\\\\\\nTo deliver
|
||||
the best possible product, we offer a hosted version of Firecrawl alongside
|
||||
our open-source offering. The cloud solution allows us to continuously innovate
|
||||
and maintain a high-quality, sustainable service for all users.\\\\\\n\\\\\\nFirecrawl
|
||||
Cloud is available at [firecrawl.dev](https://firecrawl.dev/) and offers a
|
||||
range of features that are not available in the open source version:\\\\\\n\\\\\\n[](https://raw.githubusercontent.com/firecrawl/firecrawl/main/img/open-source-cloud.png)\\\\\\n\\\\\\n##
|
||||
Contributing\\\\\\n\\\\\\n[Permalink: Contributing](https://github.com/firecrawl/firecrawl#contributing)\\\\\\n\\\\\\nWe
|
||||
love contributions! Please read our [contributing guide](https://github.com/firecrawl/firecrawl/blob/main/CONTRIBUTING.md)
|
||||
before submitting a pull request. If you'd like to self-host, refer to the
|
||||
[self-hosting guide](https://github.com/firecrawl/firecrawl/blob/main/SELF_HOST.md).\\\\\\n\\\\\\n_It
|
||||
is the sole responsibility of the end users to respect websites' policies
|
||||
when scraping, searching and crawling with Firecrawl. Users are advised to
|
||||
adhere to the applicable privacy policies and terms of use of the websites
|
||||
prior to initiating any scraping activities. By default, Firecrawl respects
|
||||
the directives specified in the websites' robots.txt files when crawling.
|
||||
By utilizing Firecrawl, you expressly agree to comply with these conditions._\\\\\\n\\\\\\n##
|
||||
Contributors\\\\\\n\\\\\\n[Permalink: Contributors](https://github.com/firecrawl/firecrawl#contributors)\\\\\\n\\\\\\n[](https://github.com/firecrawl/firecrawl/graphs/contributors)\\\\\\n\\\\\\n##
|
||||
License Disclaimer\\\\\\n\\\\\\n[Permalink: License Disclaimer](https://github.com/firecrawl/firecrawl#license-disclaimer)\\\\\\n\\\\\\nThis
|
||||
project is primarily licensed under the GNU Affero General Public License
|
||||
v3.0 (AGPL-3.0), as specified in the LICENSE file in the root directory of
|
||||
this repository. However, certain components of this project are licensed
|
||||
under the MIT License. Refer to the LICENSE files in these specific directories
|
||||
for details.\\\\\\n\\\\\\nPlease note:\\\\\\n\\\\\\n- The AGPL-3.0 license
|
||||
applies to all parts of the project unless otherwise specified.\\\\\\n- The
|
||||
SDKs and some UI components are licensed under the MIT License. Refer to the
|
||||
LICENSE files in these specific directories for details.\\\\\\n- When using
|
||||
or contributing to this project, ensure you comply with the appropriate license
|
||||
terms for the specific component you are working with.\\\\\\n\\\\\\nFor more
|
||||
details on the licensing of specific components, please refer to the LICENSE
|
||||
files in the respective directories or contact the project maintainers.\\\\\\n\\\\\\n[\u2191
|
||||
Back to Top \u2191](https://github.com/firecrawl/firecrawl#readme-top)\\\\\\n\\\\\\n##
|
||||
About\\\\\\n\\\\\\n\U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\\\\\\n\\\\\\n\\\\\\n[firecrawl.dev](https://firecrawl.dev/
|
||||
\\\"https://firecrawl.dev\\\")\\\\\\n\\\\\\n### Topics\\\\\\n\\\\\\n[markdown](https://github.com/topics/markdown
|
||||
\\\"Topic: markdown\\\") [crawler](https://github.com/topics/crawler \\\"Topic:
|
||||
crawler\\\") [scraper](https://github.com/topics/scraper \\\"Topic: scraper\\\")
|
||||
[ai](https://github.com/topics/ai \\\"Topic: ai\\\") [html-to-markdown](https://github.com/topics/html-to-markdown
|
||||
\\\"Topic: html-to-markdown\\\") [web-crawler](https://github.com/topics/web-crawler
|
||||
\\\"Topic: web-crawler\\\") [scraping](https://github.com/topics/scraping
|
||||
\\\"Topic: scraping\\\") [web-scraper](https://github.com/topics/web-scraper
|
||||
\\\"Topic: web-scraper\\\") [web-scraping](https://github.com/topics/web-scraping
|
||||
\\\"Topic: web-scraping\\\") [data-extraction](https://github.com/topics/data-extraction
|
||||
\\\"Topic: data-extraction\\\") [webscraping](https://github.com/topics/webscraping
|
||||
\\\"Topic: webscraping\\\") [web-data-extraction](https://github.com/topics/web-data-extraction
|
||||
\\\"Topic: web-data-extraction\\\") [ai-agents](https://github.com/topics/ai-agents
|
||||
\\\"Topic: ai-agents\\\") [web-search](https://github.com/topics/web-search
|
||||
\\\"Topic: web-search\\\") [ai-search](https://github.com/topics/ai-search
|
||||
\\\"Topic: ai-search\\\") [web-data](https://github.com/topics/web-data \\\"Topic:
|
||||
web-data\\\") [llm](https://github.com/topics/llm \\\"Topic: llm\\\") [ai-crawler](https://github.com/topics/ai-crawler
|
||||
\\\"Topic: ai-crawler\\\") [ai-scraping](https://github.com/topics/ai-scraping
|
||||
\\\"Topic: ai-scraping\\\")\\\\\\n\\\\\\n### Resources\\\\\\n\\\\\\n[Readme](https://github.com/firecrawl/firecrawl#readme-ov-file)\\\\\\n\\\\\\n###
|
||||
License\\\\\\n\\\\\\n[AGPL-3.0 license](https://github.com/firecrawl/firecrawl#AGPL-3.0-1-ov-file)\\\\\\n\\\\\\n###
|
||||
Contributing\\\\\\n\\\\\\n[Contributing](https://github.com/firecrawl/firecrawl#contributing-ov-file)\\\\\\n\\\\\\n###
|
||||
Uh oh!\\\\\\n\\\\\\nThere was an error while loading. [Please reload this
|
||||
page](https://github.com/firecrawl/firecrawl).\\\\\\n\\\\\\n[Activity](https://github.com/firecrawl/firecrawl/activity)\\\\\\n\\\\\\n[Custom
|
||||
properties](https://github.com/firecrawl/firecrawl/custom-properties)\\\\\\n\\\\\\n###
|
||||
Stars\\\\\\n\\\\\\n[**65.2k**\\\\\\\\\\nstars](https://github.com/firecrawl/firecrawl/stargazers)\\\\\\n\\\\\\n###
|
||||
Watchers\\\\\\n\\\\\\n[**256**\\\\\\\\\\nwatching](https://github.com/firecrawl/firecrawl/watchers)\\\\\\n\\\\\\n###
|
||||
Forks\\\\\\n\\\\\\n[**5.1k**\\\\\\\\\\nforks](https://github.com/firecrawl/firecrawl/forks)\\\\\\n\\\\\\n[Report
|
||||
repository](https://github.com/contact/report-content?content_url=https%3A%2F%2Fgithub.com%2Ffirecrawl%2Ffirecrawl&report=firecrawl+%28user%29)\\\\\\n\\\\\\n##
|
||||
[Releases\\\\ 28](https://github.com/firecrawl/firecrawl/releases)\\\\\\n\\\\\\n[v2.4.0\\\\\\\\\\nLatest\\\\\\\\\\n\\\\\\\\\\n2
|
||||
weeks agoOct 13, 2025](https://github.com/firecrawl/firecrawl/releases/tag/v2.4.0)\\\\\\n\\\\\\n[\\\\+
|
||||
27 releases](https://github.com/firecrawl/firecrawl/releases)\\\\\\n\\\\\\n##
|
||||
[Packages\\\\ 3](https://github.com/orgs/firecrawl/packages?repo_name=firecrawl)\\\\\\n\\\\\\n-
|
||||
[firecrawl](https://github.com/orgs/firecrawl/packages/container/package/firecrawl)\\\\\\n-
|
||||
[playwright-service](https://github.com/orgs/firecrawl/packages/container/package/playwright-service)\\\\\\n-
|
||||
[nuq-postgres](https://github.com/orgs/firecrawl/packages/container/package/nuq-postgres)\\\\\\n\\\\\\n##
|
||||
[Contributors\\\\ 121](https://github.com/firecrawl/firecrawl/graphs/contributors)\\\\\\n\\\\\\n[\\\\+
|
||||
107 contributors](https://github.com/firecrawl/firecrawl/graphs/contributors)\\\\\\n\\\\\\n##
|
||||
Languages\\\\\\n\\\\\\n- [TypeScript73.5%](https://github.com/firecrawl/firecrawl/search?l=typescript)\\\\\\n-
|
||||
[Python18.9%](https://github.com/firecrawl/firecrawl/search?l=python)\\\\\\n-
|
||||
[Rust6.0%](https://github.com/firecrawl/firecrawl/search?l=rust)\\\\\\n- [Astro0.6%](https://github.com/firecrawl/firecrawl/search?l=astro)\\\\\\n-
|
||||
[JavaScript0.3%](https://github.com/firecrawl/firecrawl/search?l=javascript)\\\\\\n-
|
||||
[Jupyter Notebook0.2%](https://github.com/firecrawl/firecrawl/search?l=jupyter-notebook)\\\\\\n-
|
||||
Other0.5%\",\"metadata\":{\"octolytics-dimension-repository_network_root_id\":\"787076358\",\"visitor-hmac\":\"163b2538b2335f7d4000a770785477e15881e1101708ca5ff1e8c021095f6ca1\",\"og:type\":\"object\",\"language\":\"en\",\"route-action\":\"disambiguate\",\"og:title\":\"GitHub
|
||||
- firecrawl/firecrawl: \U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\",\"octolytics-dimension-repository_public\":\"true\",\"octolytics-dimension-repository_network_root_nwo\":\"firecrawl/firecrawl\",\"browser-errors-url\":\"https://api.github.com/_private/browser/errors\",\"browser-stats-url\":\"https://api.github.com/_private/browser/stats\",\"twitter:title\":\"GitHub
|
||||
- firecrawl/firecrawl: \U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\",\"ui-target\":\"full\",\"og:image\":\"https://repository-images.githubusercontent.com/787076358/f9616c09-3701-41ef-b5a6-fdf912ffb15b\",\"google-site-verification\":\"Apib7-x98H0j5cPqHWwSMm6dNU4GmODRoqxLiDzdx9I\",\"ogSiteName\":\"GitHub\",\"route-pattern\":\"/:user_id/:repository\",\"visitor-payload\":\"eyJyZWZlcnJlciI6IiIsInJlcXVlc3RfaWQiOiJBMTVGOjE3OUI0RDo2MzdFQUREOjg3MzEwOTM6NjkwMTE4MjUiLCJ2aXNpdG9yX2lkIjoiNDkyNzk2MzExNjg5OTIxMTMwMSIsInJlZ2lvbl9lZGdlIjoiaWFkIiwicmVnaW9uX3JlbmRlciI6ImlhZCJ9\",\"og:description\":\"\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data - firecrawl/firecrawl\",\"expected-hostname\":\"github.com\",\"release\":\"66136a30a16cc69206f1249b6ba072daa2174535\",\"title\":\"GitHub
|
||||
- firecrawl/firecrawl: \U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\",\"ogDescription\":\"\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data - firecrawl/firecrawl\",\"twitter:card\":\"summary_large_image\",\"fb:app_id\":\"1401488693436528\",\"color-scheme\":\"light
|
||||
dark\",\"twitter:description\":\"\U0001F525 The Web Data API for AI - Turn
|
||||
entire websites into LLM-ready markdown or structured data - firecrawl/firecrawl\",\"favicon\":\"https://github.githubassets.com/favicons/favicon.svg\",\"viewport\":\"width=device-width\",\"twitter:image\":\"https://repository-images.githubusercontent.com/787076358/f9616c09-3701-41ef-b5a6-fdf912ffb15b\",\"user-login\":\"\",\"description\":\"\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data - firecrawl/firecrawl\",\"octolytics-dimension-repository_nwo\":\"firecrawl/firecrawl\",\"octolytics-dimension-user_id\":\"135057108\",\"twitter:site\":\"@github\",\"og:url\":\"https://github.com/firecrawl/firecrawl\",\"octolytics-dimension-user_login\":\"firecrawl\",\"hostname\":\"github.com\",\"current-catalog-service-hash\":\"f3abb0cc802f3d7b95fc8762b94bdcb13bf39634c40c357301c4aa1d67a256fb\",\"html-safe-nonce\":\"e5653da3800db7de2c8b4a64ff6367242043a9e452608e9a6941a1c9e8346cfc\",\"apple-itunes-app\":\"app-id=1477376905,
|
||||
app-argument=https://github.com/firecrawl/firecrawl\",\"turbo-cache-control\":\"no-preview\",\"og:site_name\":\"GitHub\",\"request-id\":\"A15F:179B4D:637EADD:8731093:69011825\",\"octolytics-url\":\"https://collector.github.com/github/collect\",\"octolytics-dimension-repository_is_fork\":\"false\",\"fetch-nonce\":\"v2:bc80e85b-edaf-e746-9f2c-ffdc75854b07\",\"og:image:alt\":\"\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data - firecrawl/firecrawl\",\"github-keyboard-shortcuts\":\"repository,copilot\",\"ogTitle\":\"GitHub
|
||||
- firecrawl/firecrawl: \U0001F525 The Web Data API for AI - Turn entire websites
|
||||
into LLM-ready markdown or structured data\",\"ogImage\":\"https://repository-images.githubusercontent.com/787076358/f9616c09-3701-41ef-b5a6-fdf912ffb15b\",\"analytics-location\":\"/<user-name>/<repo-name>\",\"route-controller\":\"files\",\"octolytics-dimension-repository_id\":\"787076358\",\"ogUrl\":\"https://github.com/firecrawl/firecrawl\",\"go-import\":\"github.com/firecrawl/firecrawl
|
||||
git https://github.com/firecrawl/firecrawl.git\",\"hovercard-subject-tag\":\"repository:787076358\",\"theme-color\":\"#1e2327\",\"turbo-body-classes\":\"logged-out
|
||||
env-production page-responsive\",\"scrapeId\":\"ec4d99a0-4c4f-4d1a-9fd2-08b8f891f883\",\"sourceURL\":\"https://github.com/firecrawl/firecrawl\",\"url\":\"https://github.com/firecrawl/firecrawl\",\"statusCode\":200,\"contentType\":\"text/html;
|
||||
charset=utf-8\",\"proxyUsed\":\"basic\",\"cacheState\":\"hit\",\"cachedAt\":\"2025-10-28T19:23:20.106Z\"}},{\"url\":\"https://x.com/firecrawl_dev?lang=en\",\"title\":\"Firecrawl
|
||||
(@firecrawl_dev) / Posts / X\",\"description\":\"Firecrawl (@firecrawl_dev)
|
||||
- Posts - Turn websites into LLM-ready data. Built by @mendableai team Open
|
||||
source: | X (formerly Twitter)\",\"position\":3},{\"url\":\"https://github.com/firecrawl\",\"title\":\"Firecrawl
|
||||
- GitHub\",\"description\":\"Building AI applications? You need clean, structured
|
||||
data from the web. Firecrawl handles the complexity of modern web scraping
|
||||
so you can focus on building ...\",\"position\":4,\"category\":\"github\",\"markdown\":\"[Skip
|
||||
to content](https://github.com/firecrawl#start-of-content)\\n\\nYou signed
|
||||
in with another tab or window. [Reload](https://github.com/firecrawl) to refresh
|
||||
your session.You signed out in another tab or window. [Reload](https://github.com/firecrawl)
|
||||
to refresh your session.You switched accounts on another tab or window. [Reload](https://github.com/firecrawl)
|
||||
to refresh your session.Dismiss alert\\n\\n{{ message }}\\n\\n[README.md](https://github.com/firecrawl/.github/tree/main/profile/README.md)\\n\\n#
|
||||
\U0001F525 Firecrawl\\n\\n[Permalink: \U0001F525 Firecrawl](https://github.com/firecrawl#-firecrawl)\\n\\n[](https://raw.githubusercontent.com/mendableai/firecrawl/main/img/firecrawl_logo.png)\\n\\n###
|
||||
Transform any website into LLM-ready data\\n\\n[Permalink: Transform any website
|
||||
into LLM-ready data](https://github.com/firecrawl#transform-any-website-into-llm-ready-data)\\n\\nAdvanced
|
||||
web scraping, crawling, and data extraction infrastructure for AI applications\\n\\n[](https://firecrawl.dev/)
|
||||
[](https://docs.firecrawl.dev/)
|
||||
[](https://discord.com/invite/gSmWdAkdwd)\\n\\n[](https://github.com/mendableai/firecrawl/blob/main/LICENSE)[](https://github.com/mendableai/firecrawl/stargazers)[](https://pepy.tech/project/firecrawl-py)[](https://x.com/firecrawl_dev)\\n\\n*
|
||||
* *\\n\\n## Why Firecrawl?\\n\\n[Permalink: Why Firecrawl?](https://github.com/firecrawl#why-firecrawl)\\n\\n**Building
|
||||
AI applications?** You need clean, structured data from the web. Firecrawl
|
||||
handles the complexity of modern web scraping so you can focus on building
|
||||
great products.\\n\\n## Our Core Ecosystem\\n\\n[Permalink: Our Core Ecosystem](https://github.com/firecrawl#our-core-ecosystem)\\n\\n###
|
||||
Main Repository\\n\\n[Permalink: Main Repository](https://github.com/firecrawl#main-repository)\\n\\n[](https://github.com/mendableai/firecrawl)\\n\\n**[firecrawl](https://github.com/mendableai/firecrawl)**
|
||||
\\\\- Core API & SDK\\n\\nTurn entire websites into LLM-ready markdown or
|
||||
structured data. Our flagship product with 40k+ stars.\\n\\n### Cloud API\\n\\n[Permalink:
|
||||
Cloud API](https://github.com/firecrawl#cloud-api)\\n\\n[](https://firecrawl.dev/)\\n\\n**[Firecrawl](https://firecrawl.dev/)**
|
||||
\\\\- Hosted API Service\\n\\nProduction-ready web scraping without infrastructure
|
||||
management. Get your API key and start scraping in minutes with our reliable,
|
||||
scalable cloud service.\\n\\n### MCP Integration\\n\\n[Permalink: MCP Integration](https://github.com/firecrawl#mcp-integration)\\n\\n[](https://github.com/mendableai/firecrawl-mcp-server)\\n\\n**[firecrawl-mcp-server](https://github.com/mendableai/firecrawl-mcp-server)**
|
||||
\\\\- Model Context Protocol Server\\n\\nAdd powerful web scraping capabilities
|
||||
to Claude, Cursor, and any MCP-compatible LLM client.\\n\\n## Community &
|
||||
Support\\n\\n[Permalink: Community & Support](https://github.com/firecrawl#community--support)\\n\\n[](https://discord.com/invite/gSmWdAkdwd)[](https://x.com/firecrawl_dev)[](https://www.linkedin.com/company/104100957/)[](https://github.com/mendableai/firecrawl/discussions)[](https://docs.firecrawl.dev/)\\n\\n##
|
||||
Built By Mendable\\n\\n[Permalink: Built By Mendable](https://github.com/firecrawl#built-by-mendable)\\n\\nWe're
|
||||
the team behind [Mendable.ai](https://mendable.ai/), passionate about making
|
||||
web data accessible for AI applications. Firecrawl powers thousands of AI
|
||||
products worldwide.\\n\\n* * *\\n\\n**Ready to build something amazing?**\\n\\n[Get
|
||||
your API key](https://firecrawl.dev/) and start scraping in minutes\\n\\n\\n[Star
|
||||
our main repo](https://github.com/mendableai/firecrawl) \u2022\\n[Try the
|
||||
playground](https://firecrawl.dev/playground) \u2022\\n[Read the docs](https://docs.firecrawl.dev/)\\n\\n##
|
||||
Pinned Loading\\n\\n1. [firecrawl](https://github.com/firecrawl/firecrawl)
|
||||
firecrawlPublic\\n\\n\\n\\n\\n\\n\\n\U0001F525 The Web Data API for AI - Turn
|
||||
entire websites into LLM-ready markdown or structured data\\n\\n\\n\\n\\nTypeScript[64.9k](https://github.com/firecrawl/firecrawl/stargazers)
|
||||
[5.1k](https://github.com/firecrawl/firecrawl/forks)\\n\\n2. [mendable-nextjs-chatbot](https://github.com/firecrawl/mendable-nextjs-chatbot)
|
||||
mendable-nextjs-chatbotPublic template\\n\\n\\n\\n\\n\\n\\nNext.js Starter
|
||||
Template for building chatbots with Mendable\\n\\n\\n\\n\\nTypeScript[256](https://github.com/firecrawl/mendable-nextjs-chatbot/stargazers)
|
||||
[52](https://github.com/firecrawl/mendable-nextjs-chatbot/forks)\\n\\n3. [rag-arena](https://github.com/firecrawl/rag-arena)
|
||||
rag-arenaPublic\\n\\n\\n\\n\\n\\n\\nOpen-source RAG evaluation through users'
|
||||
feedback\\n\\n\\n\\n\\nTypeScript[206](https://github.com/firecrawl/rag-arena/stargazers)
|
||||
[32](https://github.com/firecrawl/rag-arena/forks)\\n\\n4. [QA\\\\_clustering](https://github.com/firecrawl/QA_clustering)
|
||||
QA\\\\_clusteringPublic\\n\\n\\n\\n\\n\\n\\nAnalyzing chat interactions w/
|
||||
LLMs to improve \U0001F99C\U0001F517 Langchain docs\\n\\n\\n\\n\\nJupyter
|
||||
Notebook[80](https://github.com/firecrawl/QA_clustering/stargazers) [12](https://github.com/firecrawl/QA_clustering/forks)\\n\\n5.
|
||||
[data-connectors](https://github.com/firecrawl/data-connectors) data-connectorsPublic\\n\\n\\n\\n\\n\\n\\nLLM-ready
|
||||
data connectors\\n\\n\\n\\n\\nTypeScript[95](https://github.com/firecrawl/data-connectors/stargazers)
|
||||
[23](https://github.com/firecrawl/data-connectors/forks)\\n\\n6. [mendable-py](https://github.com/firecrawl/mendable-py)
|
||||
mendable-pyPublic\\n\\n\\n\\n\\n\\n\\nBuild Production Ready LLM Chat Apps
|
||||
in Minutes\\n\\n\\n\\n\\nPython[33](https://github.com/firecrawl/mendable-py/stargazers)
|
||||
[7](https://github.com/firecrawl/mendable-py/forks)\\n\\n\\n### Repositories\\n\\nLoading\\n\\nType\\n\\nAllPublicSourcesForksArchivedMirrorsTemplates\\n\\nLanguage\\n\\nAllCSSGoJavaJavaScriptJupyter
|
||||
NotebookMDXPythonRustTypeScript\\n\\nSort\\n\\nLast updatedNameStars\\n\\nShowing
|
||||
10 of 61 repositories\\n\\n- [firecrawl](https://github.com/firecrawl/firecrawl)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
The Web Data API for AI - Turn entire websites into LLM-ready markdown or
|
||||
structured data\\n\\n\\n\\n\\n\\n\\nfirecrawl/firecrawl\u2019s past year of
|
||||
commit activity\\n\\n\\n\\nTypeScript[64,949](https://github.com/firecrawl/firecrawl/stargazers)AGPL-3.0\\n[5,132](https://github.com/firecrawl/firecrawl/forks)
|
||||
[27](https://github.com/firecrawl/firecrawl/issues) [(2 issues need help)](https://github.com/firecrawl/firecrawl/issues?q=label%3A%22good+first+issue%22+is%3Aissue+is%3Aopen)
|
||||
[85](https://github.com/firecrawl/firecrawl/pulls)\\nUpdated 2 hours agoOct
|
||||
27, 2025\\n\\n- [firecrawl-docs](https://github.com/firecrawl/firecrawl-docs)\\nPublic\\n\\n\\n\\nDocumentation
|
||||
for Firecrawl.\\n\\n\\n\\n\\n\\n\\nfirecrawl/firecrawl-docs\u2019s past year
|
||||
of commit activity\\n\\n\\n\\nMDX[17](https://github.com/firecrawl/firecrawl-docs/stargazers)
|
||||
[35](https://github.com/firecrawl/firecrawl-docs/forks) [10](https://github.com/firecrawl/firecrawl-docs/issues)
|
||||
[5](https://github.com/firecrawl/firecrawl-docs/pulls)\\nUpdated 20 hours
|
||||
agoOct 26, 2025\\n\\n- [open-agent-builder](https://github.com/firecrawl/open-agent-builder)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
Visual workflow builder for AI agents powered by Firecrawl - drag-and-drop
|
||||
web scraping pipelines with real-time execution\\n\\n\\n\\n\\n\\n\\nfirecrawl/open-agent-builder\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nTypeScript[1,673](https://github.com/firecrawl/open-agent-builder/stargazers)
|
||||
[274](https://github.com/firecrawl/open-agent-builder/forks) [4](https://github.com/firecrawl/open-agent-builder/issues)
|
||||
[2](https://github.com/firecrawl/open-agent-builder/pulls)\\nUpdated last
|
||||
weekOct 20, 2025\\n\\n- [firecrawl-mcp-server](https://github.com/firecrawl/firecrawl-mcp-server)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
Official Firecrawl MCP Server - Adds powerful web scraping and search to Cursor,
|
||||
Claude and any other LLM clients.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n[**Uh
|
||||
oh!**](https://github.com/firecrawl/firecrawl-mcp-server/graphs/commit-activity)\\n\\n[There
|
||||
was an error while loading.](https://github.com/firecrawl/firecrawl-mcp-server/graphs/commit-activity)
|
||||
[Please reload this page](https://github.com/firecrawl).\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nfirecrawl/firecrawl-mcp-server\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nJavaScript[4,794](https://github.com/firecrawl/firecrawl-mcp-server/stargazers)MIT\\n[519](https://github.com/firecrawl/firecrawl-mcp-server/forks)
|
||||
[44](https://github.com/firecrawl/firecrawl-mcp-server/issues) [17](https://github.com/firecrawl/firecrawl-mcp-server/pulls)\\nUpdated
|
||||
last weekOct 19, 2025\\n\\n- [n8n-nodes-firecrawl](https://github.com/firecrawl/n8n-nodes-firecrawl)\\nPublic\\n\\n\\n\\nn8n
|
||||
node to interact with Firecrawl\\n\\n\\n\\n\\n\\n\\nfirecrawl/n8n-nodes-firecrawl\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nTypeScript[21](https://github.com/firecrawl/n8n-nodes-firecrawl/stargazers)MIT\\n[13](https://github.com/firecrawl/n8n-nodes-firecrawl/forks)
|
||||
[3](https://github.com/firecrawl/n8n-nodes-firecrawl/issues) [0](https://github.com/firecrawl/n8n-nodes-firecrawl/pulls)\\nUpdated
|
||||
2 weeks agoOct 17, 2025\\n\\n- [.github](https://github.com/firecrawl/.github)\\nPublic\\n\\n\\n\\n\\nfirecrawl/.github\u2019s
|
||||
past year of commit activity\\n\\n\\n\\n0\\n[1](https://github.com/firecrawl/.github/forks)
|
||||
[0](https://github.com/firecrawl/.github/issues) [0](https://github.com/firecrawl/.github/pulls)\\nUpdated
|
||||
2 weeks agoOct 12, 2025\\n\\n- [fire-enrich](https://github.com/firecrawl/fire-enrich)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
AI-powered data enrichment tool that transforms emails into rich datasets
|
||||
with company profiles, funding data, tech stacks, and more using Firecrawl
|
||||
and multi-agent AI\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n[**Uh oh!**](https://github.com/firecrawl/fire-enrich/graphs/commit-activity)\\n\\n[There
|
||||
was an error while loading.](https://github.com/firecrawl/fire-enrich/graphs/commit-activity)
|
||||
[Please reload this page](https://github.com/firecrawl).\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nfirecrawl/fire-enrich\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nTypeScript[953](https://github.com/firecrawl/fire-enrich/stargazers)MIT\\n[239](https://github.com/firecrawl/fire-enrich/forks)
|
||||
[12](https://github.com/firecrawl/fire-enrich/issues) [3](https://github.com/firecrawl/fire-enrich/pulls)\\nUpdated
|
||||
3 weeks agoOct 8, 2025\\n\\n- [firecrawl-java-sdk](https://github.com/firecrawl/firecrawl-java-sdk)\\nPublic\\n\\n\\n\\n\\nfirecrawl/firecrawl-java-sdk\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nJava[11](https://github.com/firecrawl/firecrawl-java-sdk/stargazers)MIT\\n[4](https://github.com/firecrawl/firecrawl-java-sdk/forks)
|
||||
[0](https://github.com/firecrawl/firecrawl-java-sdk/issues) [0](https://github.com/firecrawl/firecrawl-java-sdk/pulls)\\nUpdated
|
||||
last monthSep 28, 2025\\n\\n- [open-lovable](https://github.com/firecrawl/open-lovable)\\nPublic\\n\\n\\n\\n\U0001F525
|
||||
Clone and recreate any website as a modern React app in seconds\\n\\n\\n\\n\\n\\n\\nfirecrawl/open-lovable\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nTypeScript[21,320](https://github.com/firecrawl/open-lovable/stargazers)MIT\\n[3,986](https://github.com/firecrawl/open-lovable/forks)
|
||||
[70](https://github.com/firecrawl/open-lovable/issues) [33](https://github.com/firecrawl/open-lovable/pulls)\\nUpdated
|
||||
last monthSep 27, 2025\\n\\n- [mineru-api](https://github.com/firecrawl/mineru-api)\\nPublic\\n\\n\\n\\n\\nfirecrawl/mineru-api\u2019s
|
||||
past year of commit activity\\n\\n\\n\\nPython[12](https://github.com/firecrawl/mineru-api/stargazers)AGPL-3.0\\n[2](https://github.com/firecrawl/mineru-api/forks)
|
||||
[1](https://github.com/firecrawl/mineru-api/issues) [1](https://github.com/firecrawl/mineru-api/pulls)\\nUpdated
|
||||
on Sep 26Sep 26, 2025\\n\\n\\n[View all repositories](https://github.com/orgs/firecrawl/repositories?type=all)\\n\\n[**People**](https://github.com/orgs/firecrawl/people)\\n\\n[](https://github.com/alexnucci)[](https://github.com/micahstairs)[](https://github.com/nickscamara)[](https://github.com/mogery)[](https://github.com/developersdigest)\\n\\n####
|
||||
Top languages\\n\\n[TypeScript](https://github.com/orgs/firecrawl/repositories?language=typescript&type=all)
|
||||
[Python](https://github.com/orgs/firecrawl/repositories?language=python&type=all)
|
||||
[JavaScript](https://github.com/orgs/firecrawl/repositories?language=javascript&type=all)
|
||||
[Go](https://github.com/orgs/firecrawl/repositories?language=go&type=all)
|
||||
[MDX](https://github.com/orgs/firecrawl/repositories?language=mdx&type=all)\\n\\n####
|
||||
Most used topics\\n\\n[ai](https://github.com/search?q=topic%3Aai+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: ai\\\") [firecrawl](https://github.com/search?q=topic%3Afirecrawl+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: firecrawl\\\") [llm](https://github.com/search?q=topic%3Allm+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: llm\\\") [web-crawler](https://github.com/search?q=topic%3Aweb-crawler+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: web-crawler\\\") [web-scraping](https://github.com/search?q=topic%3Aweb-scraping+org%3Afirecrawl+fork%3Atrue&type=repositories
|
||||
\\\"Topic: web-scraping\\\")\\n\\nYou can\u2019t perform that action at this
|
||||
time.\",\"metadata\":{\"analytics-location\":\"/<org-login>\",\"apple-itunes-app\":\"app-id=1477376905,
|
||||
app-argument=https://github.com/firecrawl\",\"twitter:card\":\"summary_large_image\",\"google-site-verification\":\"Apib7-x98H0j5cPqHWwSMm6dNU4GmODRoqxLiDzdx9I\",\"description\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"og:image\":\"https://avatars.githubusercontent.com/u/135057108?s=280&v=4\",\"og:type\":\"profile\",\"visitor-payload\":\"eyJyZWZlcnJlciI6IiIsInJlcXVlc3RfaWQiOiI5REFEOjIzM0ExMzo4RDgzNEI6QzQ5OUNCOjY4RkYzODlGIiwidmlzaXRvcl9pZCI6IjQwMDQ0MTI5MTkxMDA5NDY1OTEiLCJyZWdpb25fZWRnZSI6ImlhZCIsInJlZ2lvbl9yZW5kZXIiOiJpYWQifQ==\",\"github-keyboard-shortcuts\":\"copilot\",\"user-login\":\"\",\"viewport\":\"width=device-width\",\"og:description\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"turbo-cache-control\":\"no-preview\",\"fetch-nonce\":\"v2:35a2e032-0081-3f6b-595e-967e32025c6f\",\"og:url\":\"https://github.com/firecrawl\",\"title\":\"Firecrawl
|
||||
\xB7 GitHub\",\"route-pattern\":\"/:user_id(.:format)\",\"route-action\":\"show\",\"octolytics-url\":\"https://collector.github.com/github/collect\",\"og:site_name\":\"GitHub\",\"twitter:title\":\"Firecrawl\",\"request-id\":\"9DAD:233A13:8D834B:C499CB:68FF389F\",\"ogSiteName\":\"GitHub\",\"fb:app_id\":\"1401488693436528\",\"language\":\"en\",\"twitter:image\":\"https://avatars.githubusercontent.com/u/135057108?s=280&v=4\",\"ogImage\":\"https://avatars.githubusercontent.com/u/135057108?s=280&v=4\",\"release\":\"c44b7f7aa5c70f3296484971978c9f4b1b473352\",\"theme-color\":\"#1e2327\",\"color-scheme\":\"light
|
||||
dark\",\"html-safe-nonce\":\"2d932295da6aa360d861f16279839ab109dc4e977a51bc99204477969d7d12c6\",\"ogTitle\":\"Firecrawl\",\"hovercard-subject-tag\":\"organization:135057108\",\"twitter:description\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"hostname\":\"github.com\",\"ogUrl\":\"https://github.com/firecrawl\",\"ogDescription\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"route-controller\":\"profiles\",\"favicon\":\"https://github.githubassets.com/favicons/favicon.svg\",\"visitor-hmac\":\"43cf3bbb9c57a3bcf0b1857fbcabcc78c274e7082e0d28c76cd6d4651bc2a920\",\"twitter:site\":\"@github\",\"ui-target\":\"full\",\"og:title\":\"Firecrawl\",\"expected-hostname\":\"github.com\",\"og:image:alt\":\"Web
|
||||
data API for AI. Firecrawl has 61 repositories available. Follow their code
|
||||
on GitHub.\",\"profile:username\":\"firecrawl\",\"current-catalog-service-hash\":\"4a1c50a83cf6cc4b55b6b9c53e553e3f847c876b87fb333f71f5d05db8f1a7db\",\"turbo-body-classes\":\"logged-out
|
||||
env-production page-responsive\",\"browser-stats-url\":\"https://api.github.com/_private/browser/stats\",\"browser-errors-url\":\"https://api.github.com/_private/browser/errors\",\"scrapeId\":\"65cbc300-be11-4a1a-9d20-c114fb8473a7\",\"sourceURL\":\"https://github.com/firecrawl\",\"url\":\"https://github.com/firecrawl\",\"statusCode\":200,\"contentType\":\"text/html;
|
||||
charset=utf-8\",\"proxyUsed\":\"basic\",\"cacheState\":\"hit\",\"cachedAt\":\"2025-10-27T09:17:20.756Z\"}},{\"url\":\"https://www.linkedin.com/company/firecrawl\",\"title\":\"Firecrawl
|
||||
| LinkedIn\",\"description\":\"Our Dify integration now uses Firecrawl /v2
|
||||
endpoints Scraping is 10x faster thanks to intelligent caching, plus we've
|
||||
added semantic ...\",\"position\":5}]},\"creditsUsed\":3}"
|
||||
headers:
|
||||
Access-Control-Allow-Origin:
|
||||
- '*'
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
||||
Content-Length:
|
||||
- '93428'
|
||||
Content-Type:
|
||||
- application/json; charset=utf-8
|
||||
Date:
|
||||
- Wed, 29 Oct 2025 14:37:39 GMT
|
||||
ETag:
|
||||
- W/"16cf4-kHwVbMu4CCVG2UIt6p1g/gz5M4M"
|
||||
Via:
|
||||
- 1.1 google
|
||||
X-Powered-By:
|
||||
- Express
|
||||
X-Response-Time:
|
||||
- 13172.495ms
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,18 @@
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
)
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_firecrawl_crawl_tool_integration():
|
||||
tool = FirecrawlCrawlWebsiteTool(config={
|
||||
"limit": 2,
|
||||
"max_discovery_depth": 1,
|
||||
"scrape_options": {"formats": ["markdown"]}
|
||||
})
|
||||
result = tool.run(url="https://firecrawl.dev")
|
||||
|
||||
assert result is not None
|
||||
assert hasattr(result, 'status')
|
||||
assert result.status in ["completed", "scraping"]
|
||||
@@ -0,0 +1,15 @@
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
)
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_firecrawl_scrape_tool_integration():
|
||||
tool = FirecrawlScrapeWebsiteTool()
|
||||
result = tool.run(url="https://firecrawl.dev")
|
||||
|
||||
assert result is not None
|
||||
assert hasattr(result, 'markdown')
|
||||
assert len(result.markdown) > 0
|
||||
assert "Firecrawl" in result.markdown or "firecrawl" in result.markdown.lower()
|
||||
12
lib/crewai-tools/tests/tools/firecrawl_search_tool_test.py
Normal file
12
lib/crewai-tools/tests/tools/firecrawl_search_tool_test.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_firecrawl_search_tool_integration():
|
||||
tool = FirecrawlSearchTool()
|
||||
result = tool.run(query="firecrawl")
|
||||
|
||||
assert result is not None
|
||||
assert hasattr(result, 'web') or hasattr(result, 'news') or hasattr(result, 'images')
|
||||
@@ -119,7 +119,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
self.tools_handler = tools_handler
|
||||
self.original_tools = original_tools or []
|
||||
self.step_callback = step_callback
|
||||
self.use_stop_words = self.llm.supports_stop_words()
|
||||
self.tools_description = tools_description
|
||||
self.function_calling_llm = function_calling_llm
|
||||
self.respect_context_window = respect_context_window
|
||||
@@ -128,14 +127,25 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
self.messages: list[LLMMessage] = []
|
||||
self.iterations = 0
|
||||
self.log_error_after = 3
|
||||
existing_stop = getattr(self.llm, "stop", [])
|
||||
self.llm.stop = list(
|
||||
set(
|
||||
existing_stop + self.stop
|
||||
if isinstance(existing_stop, list)
|
||||
else self.stop
|
||||
if self.llm:
|
||||
# This may be mutating the shared llm object and needs further evaluation
|
||||
existing_stop = getattr(self.llm, "stop", [])
|
||||
self.llm.stop = list(
|
||||
set(
|
||||
existing_stop + self.stop
|
||||
if isinstance(existing_stop, list)
|
||||
else self.stop
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def use_stop_words(self) -> bool:
|
||||
"""Check to determine if stop words are being used.
|
||||
|
||||
Returns:
|
||||
bool: True if tool should be used or not.
|
||||
"""
|
||||
return self.llm.supports_stop_words() if self.llm else False
|
||||
|
||||
def invoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Execute the agent with given inputs.
|
||||
|
||||
@@ -498,7 +498,9 @@ class LLM(BaseLLM):
|
||||
}
|
||||
|
||||
# Remove None values from params
|
||||
return {k: v for k, v in params.items() if v is not None}
|
||||
params = {k: v for k, v in params.items() if v is not None}
|
||||
|
||||
return self._apply_additional_drop_params(params)
|
||||
|
||||
def _handle_streaming_response(
|
||||
self,
|
||||
|
||||
@@ -223,6 +223,49 @@ class BaseLLM(ABC):
|
||||
|
||||
return content
|
||||
|
||||
def _apply_additional_drop_params(self, params: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Apply additional_drop_params filtering to remove unwanted parameters.
|
||||
|
||||
This method provides consistent parameter filtering across all LLM providers.
|
||||
It should be called after building the final params dict and before making
|
||||
the provider API call.
|
||||
|
||||
Args:
|
||||
params: The parameters dictionary to filter
|
||||
|
||||
Returns:
|
||||
Filtered parameters dictionary with specified params removed
|
||||
|
||||
Example:
|
||||
>>> llm = LLM(model="o1-mini", additional_drop_params=["stop"])
|
||||
>>> params = {"model": "o1-mini", "messages": [...], "stop": ["\\n"]}
|
||||
>>> filtered = llm._apply_additional_drop_params(params)
|
||||
>>> "stop" in filtered
|
||||
False
|
||||
"""
|
||||
drop_params = (
|
||||
self.additional_params.get("additional_drop_params")
|
||||
or self.additional_params.get("drop_additionnal_params")
|
||||
or []
|
||||
)
|
||||
|
||||
if not drop_params:
|
||||
return params
|
||||
|
||||
filtered_params = params.copy()
|
||||
|
||||
for param_name in drop_params:
|
||||
if param_name in filtered_params:
|
||||
logging.debug(
|
||||
f"Dropping parameter '{param_name}' as specified in additional_drop_params"
|
||||
)
|
||||
filtered_params.pop(param_name)
|
||||
|
||||
filtered_params.pop("additional_drop_params", None)
|
||||
filtered_params.pop("drop_additionnal_params", None)
|
||||
|
||||
return filtered_params
|
||||
|
||||
def get_context_window_size(self) -> int:
|
||||
"""Get the context window size for the LLM.
|
||||
|
||||
|
||||
@@ -201,7 +201,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
if tools and self.supports_tools:
|
||||
params["tools"] = self._convert_tools_for_interference(tools)
|
||||
|
||||
return params
|
||||
return self._apply_additional_drop_params(params)
|
||||
|
||||
def _convert_tools_for_interference(self, tools: list[dict]) -> list[dict]:
|
||||
"""Convert CrewAI tool format to Anthropic tool use format."""
|
||||
|
||||
@@ -273,7 +273,7 @@ class AzureCompletion(BaseLLM):
|
||||
params["tools"] = self._convert_tools_for_interference(tools)
|
||||
params["tool_choice"] = "auto"
|
||||
|
||||
return params
|
||||
return self._apply_additional_drop_params(params)
|
||||
|
||||
def _convert_tools_for_interference(self, tools: list[dict]) -> list[dict]:
|
||||
"""Convert CrewAI tool format to Azure OpenAI function calling format."""
|
||||
|
||||
@@ -249,7 +249,9 @@ class OpenAICompletion(BaseLLM):
|
||||
"timeout",
|
||||
}
|
||||
|
||||
return {k: v for k, v in params.items() if k not in crewai_specific_params}
|
||||
params = {k: v for k, v in params.items() if k not in crewai_specific_params}
|
||||
|
||||
return self._apply_additional_drop_params(params)
|
||||
|
||||
def _convert_tools_for_interference(self, tools: list[dict]) -> list[dict]:
|
||||
"""Convert CrewAI tool format to OpenAI function calling format."""
|
||||
|
||||
@@ -725,3 +725,108 @@ def test_native_provider_falls_back_to_litellm_when_not_in_supported_list():
|
||||
# Should fall back to LiteLLM
|
||||
assert llm.is_litellm is True
|
||||
assert llm.model == "groq/llama-3.1-70b-versatile"
|
||||
|
||||
|
||||
def test_additional_drop_params_filters_parameters_in_litellm():
|
||||
"""Test that additional_drop_params correctly filters out specified parameters in LiteLLM path."""
|
||||
with patch("crewai.llm.LITELLM_AVAILABLE", True), patch("crewai.llm.litellm"):
|
||||
llm = LLM(
|
||||
model="o1-mini",
|
||||
stop=["stop_sequence"],
|
||||
additional_drop_params=["stop"],
|
||||
is_litellm=True,
|
||||
)
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
params = llm._prepare_completion_params(messages)
|
||||
|
||||
assert "stop" not in params
|
||||
assert "additional_drop_params" not in params
|
||||
assert params["model"] == "o1-mini"
|
||||
|
||||
|
||||
def test_additional_drop_params_with_agent():
|
||||
"""Test that additional_drop_params works when LLM is used with an Agent."""
|
||||
from unittest.mock import patch, MagicMock
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
with patch("crewai.llm.LITELLM_AVAILABLE", True), patch("crewai.llm.litellm") as mock_litellm:
|
||||
llm = LLM(
|
||||
model="o1-mini",
|
||||
stop=["stop_sequence"],
|
||||
additional_drop_params=["stop"],
|
||||
is_litellm=True,
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
role="Test Agent",
|
||||
goal="Test the LLM response format functionality.",
|
||||
backstory="An AI developer testing LLM integrations.",
|
||||
llm=llm,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Say hello",
|
||||
expected_output="A greeting",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "Hello!"
|
||||
mock_response.choices[0].message.tool_calls = None
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
mock_litellm.completion.return_value = mock_response
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task], verbose=False)
|
||||
crew.kickoff()
|
||||
|
||||
# Verify that litellm.completion was called
|
||||
assert mock_litellm.completion.called
|
||||
|
||||
# Get the kwargs passed to litellm.completion
|
||||
call_kwargs = mock_litellm.completion.call_args[1]
|
||||
|
||||
assert "stop" not in call_kwargs
|
||||
assert "additional_drop_params" not in call_kwargs
|
||||
|
||||
|
||||
def test_additional_drop_params_supports_misspelled_variant():
|
||||
"""Test that drop_additionnal_params (misspelled) is also supported for backwards compatibility."""
|
||||
with patch("crewai.llm.LITELLM_AVAILABLE", True), patch("crewai.llm.litellm"):
|
||||
llm = LLM(
|
||||
model="o1-mini",
|
||||
stop=["stop_sequence"],
|
||||
drop_additionnal_params=["stop"],
|
||||
is_litellm=True,
|
||||
)
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
params = llm._prepare_completion_params(messages)
|
||||
|
||||
assert "stop" not in params
|
||||
assert "drop_additionnal_params" not in params
|
||||
assert params["model"] == "o1-mini"
|
||||
|
||||
|
||||
def test_additional_drop_params_filters_multiple_parameters():
|
||||
"""Test that additional_drop_params can filter multiple parameters."""
|
||||
with patch("crewai.llm.LITELLM_AVAILABLE", True), patch("crewai.llm.litellm"):
|
||||
llm = LLM(
|
||||
model="o1-mini",
|
||||
stop=["stop_sequence"],
|
||||
temperature=0.7,
|
||||
additional_drop_params=["stop", "temperature"],
|
||||
is_litellm=True,
|
||||
)
|
||||
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
params = llm._prepare_completion_params(messages)
|
||||
|
||||
assert "stop" not in params
|
||||
assert "temperature" not in params
|
||||
assert "additional_drop_params" not in params
|
||||
assert params["model"] == "o1-mini"
|
||||
|
||||
Reference in New Issue
Block a user