fix: rag tool embeddings config

* fix: ensure config is not flattened, add tests

* chore: refactor inits to model_validator

* chore: refactor rag tool config parsing

* chore: add initial docs

* chore: add additional validation aliases for provider env vars

* chore: add solid docs

* chore: move imports to top

* fix: revert circular import

* fix: lazy import qdrant-client

* fix: allow collection name config

* chore: narrow model names for google

* chore: update additional docs

* chore: add backward compat on model name aliases

* chore: add tests for config changes
This commit is contained in:
Greyson LaLonde
2025-11-24 16:51:28 -05:00
committed by GitHub
parent 9c84475691
commit a928cde6ee
46 changed files with 1850 additions and 291 deletions

View File

@@ -64,10 +64,10 @@ tool = JSONSearchTool(
},
},
"embedding_model": {
"provider": "google", # or openai, ollama, ...
"provider": "google-generativeai", # or openai, ollama, ...
"config": {
"model": "models/embedding-001",
"task_type": "retrieval_document",
"model_name": "gemini-embedding-001",
"task_type": "RETRIEVAL_DOCUMENT",
# Further customization options can be added here.
},
},

View File

@@ -63,15 +63,15 @@ tool = PDFSearchTool(
"config": {
# Model identifier for the chosen provider. "model" will be auto-mapped to "model_name" internally.
"model": "text-embedding-3-small",
# Optional: API key. If omitted, the tool will use provider-specific env vars when available
# (e.g., OPENAI_API_KEY for provider="openai").
# Optional: API key. If omitted, the tool will use provider-specific env vars
# (e.g., OPENAI_API_KEY or EMBEDDINGS_OPENAI_API_KEY for OpenAI).
# "api_key": "sk-...",
# Provider-specific examples:
# --- Google Generative AI ---
# (Set provider="google-generativeai" above)
# "model": "models/embedding-001",
# "task_type": "retrieval_document",
# "model_name": "gemini-embedding-001",
# "task_type": "RETRIEVAL_DOCUMENT",
# "title": "Embeddings",
# --- Cohere ---

View File

@@ -66,9 +66,9 @@ tool = TXTSearchTool(
"provider": "openai", # or google-generativeai, cohere, ollama, ...
"config": {
"model": "text-embedding-3-small",
# "api_key": "sk-...", # optional if env var is set
# "api_key": "sk-...", # optional if env var is set (e.g., OPENAI_API_KEY or EMBEDDINGS_OPENAI_API_KEY)
# Provider examples:
# Google → model: "models/embedding-001", task_type: "retrieval_document"
# Google → model_name: "gemini-embedding-001", task_type: "RETRIEVAL_DOCUMENT"
# Cohere → model: "embed-english-v3.0"
# Ollama → model: "nomic-embed-text"
},