mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
docs: migrate embedder→embedding_model and require vectordb across tool docs; add provider examples (en/ko/pt-BR) (#3804)
Some checks failed
Some checks failed
* docs(tools): migrate embedder->embedding_model, require vectordb; add Chroma/Qdrant examples across en/ko/pt-BR PDF/TXT/XML/MDX/DOCX/CSV/Directory docs * docs(observability): apply latest Datadog tweaks in ko and pt-BR
This commit is contained in:
@@ -54,25 +54,25 @@ The following parameters can be used to customize the `CSVSearchTool`'s behavior
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = CSVSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -46,23 +46,25 @@ tool = DirectorySearchTool(directory='/path/to/directory')
|
||||
The DirectorySearchTool uses OpenAI for embeddings and summarization by default. Customization options for these settings include changing the model provider and configuration, enhancing flexibility for advanced users.
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DirectorySearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # Options include ollama, google, anthropic, llama2, and more
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Additional configurations here
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -56,25 +56,25 @@ The following parameters can be used to customize the `DOCXSearchTool`'s behavio
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = DOCXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
@@ -48,27 +48,25 @@ tool = MDXSearchTool(mdx='path/to/your/document.mdx')
|
||||
The tool defaults to using OpenAI for embeddings and summarization. For customization, utilize a configuration dictionary as shown below:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = MDXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # Options include google, openai, anthropic, llama2, etc.
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# Optional parameters can be included here.
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# Optional title for the embeddings can be added here.
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -45,28 +45,64 @@ tool = PDFSearchTool(pdf='path/to/your/document.pdf')
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows. Note: a vector database is required because generated embeddings must be stored and queried from a vectordb.
|
||||
|
||||
```python Code
|
||||
from crewai_tools import PDFSearchTool
|
||||
|
||||
# - embedding_model (required): choose provider + provider-specific config
|
||||
# - vectordb (required): choose vector DB and pass its config
|
||||
|
||||
tool = PDFSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
# Supported providers: "openai", "azure", "google-generativeai", "google-vertex",
|
||||
# "voyageai", "cohere", "huggingface", "jina", "sentence-transformer",
|
||||
# "text2vec", "ollama", "openclip", "instructor", "onnx", "roboflow", "watsonx", "custom"
|
||||
"provider": "openai", # or: "google-generativeai", "cohere", "ollama", ...
|
||||
"config": {
|
||||
# Model identifier for the chosen provider. "model" will be auto-mapped to "model_name" internally.
|
||||
"model": "text-embedding-3-small",
|
||||
# Optional: API key. If omitted, the tool will use provider-specific env vars when available
|
||||
# (e.g., OPENAI_API_KEY for provider="openai").
|
||||
# "api_key": "sk-...",
|
||||
|
||||
# Provider-specific examples:
|
||||
# --- Google Generative AI ---
|
||||
# (Set provider="google-generativeai" above)
|
||||
# "model": "models/embedding-001",
|
||||
# "task_type": "retrieval_document",
|
||||
# "title": "Embeddings",
|
||||
|
||||
# --- Cohere ---
|
||||
# (Set provider="cohere" above)
|
||||
# "model": "embed-english-v3.0",
|
||||
|
||||
# --- Ollama (local) ---
|
||||
# (Set provider="ollama" above)
|
||||
# "model": "nomic-embed-text",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# For ChromaDB: pass "settings" (chromadb.config.Settings) or rely on defaults.
|
||||
# Example (uncomment and import):
|
||||
# from chromadb.config import Settings
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# For Qdrant: pass "vectors_config" (qdrant_client.models.VectorParams).
|
||||
# Example (uncomment and import):
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Note: collection name is controlled by the tool (default: "rag_tool_collection"), not set here.
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -57,25 +57,41 @@ By default, the tool uses OpenAI for both embeddings and summarization.
|
||||
To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = TXTSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
# Required: embeddings provider + config
|
||||
"embedding_model": {
|
||||
"provider": "openai", # or google-generativeai, cohere, ollama, ...
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...", # optional if env var is set
|
||||
# Provider examples:
|
||||
# Google → model: "models/embedding-001", task_type: "retrieval_document"
|
||||
# Cohere → model: "embed-english-v3.0"
|
||||
# Ollama → model: "nomic-embed-text"
|
||||
},
|
||||
},
|
||||
|
||||
# Required: vector database config
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# Chroma settings (optional persistence)
|
||||
# "settings": Settings(
|
||||
# persist_directory="/content/chroma",
|
||||
# allow_reset=True,
|
||||
# is_persistent=True,
|
||||
# ),
|
||||
|
||||
# Qdrant vector params example:
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
|
||||
# Note: collection name is controlled by the tool (default: "rag_tool_collection").
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
@@ -54,25 +54,25 @@ It is an optional parameter during the tool's initialization but must be provide
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python Code
|
||||
from chromadb.config import Settings
|
||||
|
||||
tool = XMLSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google", # or openai, ollama, ...
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
config={
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-3-small",
|
||||
# "api_key": "sk-...",
|
||||
},
|
||||
},
|
||||
"vectordb": {
|
||||
"provider": "chromadb", # or "qdrant"
|
||||
"config": {
|
||||
# "settings": Settings(persist_directory="/content/chroma", allow_reset=True, is_persistent=True),
|
||||
# from qdrant_client.models import VectorParams, Distance
|
||||
# "vectors_config": VectorParams(size=384, distance=Distance.COSINE),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
Reference in New Issue
Block a user