mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-02-02 12:08:15 +00:00
Compare commits
12 Commits
pr-2174
...
fix/embedd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a78f1f6ff5 | ||
|
|
794574957e | ||
|
|
66b19311a7 | ||
|
|
9fc84fc1ac | ||
|
|
f8f9df6d1d | ||
|
|
6e94edb777 | ||
|
|
bbe896d48c | ||
|
|
9298054436 | ||
|
|
90b7937796 | ||
|
|
520933b4c5 | ||
|
|
21265aeab4 | ||
|
|
70e656b5a9 |
@@ -150,6 +150,8 @@ result = crew.kickoff(
|
||||
|
||||
Here are examples of how to use different types of knowledge sources:
|
||||
|
||||
Note: Please ensure that you create the ./knowldge folder. All source files (e.g., .txt, .pdf, .xlsx, .json) should be placed in this folder for centralized management.
|
||||
|
||||
### Text File Knowledge Source
|
||||
```python
|
||||
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
|
||||
@@ -460,12 +462,12 @@ class SpaceNewsKnowledgeSource(BaseKnowledgeSource):
|
||||
data = response.json()
|
||||
articles = data.get('results', [])
|
||||
|
||||
formatted_data = self._format_articles(articles)
|
||||
formatted_data = self.validate_content(articles)
|
||||
return {self.api_endpoint: formatted_data}
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to fetch space news: {str(e)}")
|
||||
|
||||
def _format_articles(self, articles: list) -> str:
|
||||
def validate_content(self, articles: list) -> str:
|
||||
"""Format articles into readable text."""
|
||||
formatted = "Space News Articles:\n\n"
|
||||
for article in articles:
|
||||
|
||||
@@ -158,7 +158,11 @@ In this section, you'll find detailed examples that help you select, configure,
|
||||
|
||||
<Accordion title="Anthropic">
|
||||
```toml Code
|
||||
# Required
|
||||
ANTHROPIC_API_KEY=sk-ant-...
|
||||
|
||||
# Optional
|
||||
ANTHROPIC_API_BASE=<custom-base-url>
|
||||
```
|
||||
|
||||
Example usage in your CrewAI project:
|
||||
@@ -250,6 +254,40 @@ In this section, you'll find detailed examples that help you select, configure,
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
)
|
||||
```
|
||||
|
||||
Before using Amazon Bedrock, make sure you have boto3 installed in your environment
|
||||
|
||||
[Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html) is a managed service that provides access to multiple foundation models from top AI companies through a unified API, enabling secure and responsible AI application development.
|
||||
|
||||
| Model | Context Window | Best For |
|
||||
|-------------------------|----------------------|-------------------------------------------------------------------|
|
||||
| Amazon Nova Pro | Up to 300k tokens | High-performance, model balancing accuracy, speed, and cost-effectiveness across diverse tasks. |
|
||||
| Amazon Nova Micro | Up to 128k tokens | High-performance, cost-effective text-only model optimized for lowest latency responses. |
|
||||
| Amazon Nova Lite | Up to 300k tokens | High-performance, affordable multimodal processing for images, video, and text with real-time capabilities. |
|
||||
| Claude 3.7 Sonnet | Up to 128k tokens | High-performance, best for complex reasoning, coding & AI agents |
|
||||
| Claude 3.5 Sonnet v2 | Up to 200k tokens | State-of-the-art model specialized in software engineering, agentic capabilities, and computer interaction at optimized cost. |
|
||||
| Claude 3.5 Sonnet | Up to 200k tokens | High-performance model delivering superior intelligence and reasoning across diverse tasks with optimal speed-cost balance. |
|
||||
| Claude 3.5 Haiku | Up to 200k tokens | Fast, compact multimodal model optimized for quick responses and seamless human-like interactions |
|
||||
| Claude 3 Sonnet | Up to 200k tokens | Multimodal model balancing intelligence and speed for high-volume deployments. |
|
||||
| Claude 3 Haiku | Up to 200k tokens | Compact, high-speed multimodal model optimized for quick responses and natural conversational interactions |
|
||||
| Claude 3 Opus | Up to 200k tokens | Most advanced multimodal model excelling at complex tasks with human-like reasoning and superior contextual understanding. |
|
||||
| Claude 2.1 | Up to 200k tokens | Enhanced version with expanded context window, improved reliability, and reduced hallucinations for long-form and RAG applications |
|
||||
| Claude | Up to 100k tokens | Versatile model excelling in sophisticated dialogue, creative content, and precise instruction following. |
|
||||
| Claude Instant | Up to 100k tokens | Fast, cost-effective model for everyday tasks like dialogue, analysis, summarization, and document Q&A |
|
||||
| Llama 3.1 405B Instruct | Up to 128k tokens | Advanced LLM for synthetic data generation, distillation, and inference for chatbots, coding, and domain-specific tasks. |
|
||||
| Llama 3.1 70B Instruct | Up to 128k tokens | Powers complex conversations with superior contextual understanding, reasoning and text generation. |
|
||||
| Llama 3.1 8B Instruct | Up to 128k tokens | Advanced state-of-the-art model with language understanding, superior reasoning, and text generation. |
|
||||
| Llama 3 70B Instruct | Up to 8k tokens | Powers complex conversations with superior contextual understanding, reasoning and text generation. |
|
||||
| Llama 3 8B Instruct | Up to 8k tokens | Advanced state-of-the-art LLM with language understanding, superior reasoning, and text generation. |
|
||||
| Titan Text G1 - Lite | Up to 4k tokens | Lightweight, cost-effective model optimized for English tasks and fine-tuning with focus on summarization and content generation. |
|
||||
| Titan Text G1 - Express | Up to 8k tokens | Versatile model for general language tasks, chat, and RAG applications with support for English and 100+ languages. |
|
||||
| Cohere Command | Up to 4k tokens | Model specialized in following user commands and delivering practical enterprise solutions. |
|
||||
| Jurassic-2 Mid | Up to 8,191 tokens | Cost-effective model balancing quality and affordability for diverse language tasks like Q&A, summarization, and content generation. |
|
||||
| Jurassic-2 Ultra | Up to 8,191 tokens | Model for advanced text generation and comprehension, excelling in complex tasks like analysis and content creation. |
|
||||
| Jamba-Instruct | Up to 256k tokens | Model with extended context window optimized for cost-effective text generation, summarization, and Q&A. |
|
||||
| Mistral 7B Instruct | Up to 32k tokens | This LLM follows instructions, completes requests, and generates creative text. |
|
||||
| Mistral 8x7B Instruct | Up to 32k tokens | An MOE LLM that follows instructions, completes requests, and generates creative text. |
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Amazon SageMaker">
|
||||
|
||||
@@ -60,7 +60,8 @@ my_crew = Crew(
|
||||
```python Code
|
||||
from crewai import Crew, Process
|
||||
from crewai.memory import LongTermMemory, ShortTermMemory, EntityMemory
|
||||
from crewai.memory.storage import LTMSQLiteStorage, RAGStorage
|
||||
from crewai.memory.storage.rag_storage import RAGStorage
|
||||
from crewai.memory.storage.ltm_sqlite_storage import LTMSQLiteStorage
|
||||
from typing import List, Optional
|
||||
|
||||
# Assemble your crew with memory capabilities
|
||||
@@ -119,7 +120,7 @@ Example using environment variables:
|
||||
import os
|
||||
from crewai import Crew
|
||||
from crewai.memory import LongTermMemory
|
||||
from crewai.memory.storage import LTMSQLiteStorage
|
||||
from crewai.memory.storage.ltm_sqlite_storage import LTMSQLiteStorage
|
||||
|
||||
# Configure storage path using environment variable
|
||||
storage_path = os.getenv("CREWAI_STORAGE_DIR", "./storage")
|
||||
@@ -148,7 +149,7 @@ crew = Crew(memory=True) # Uses default storage locations
|
||||
```python
|
||||
from crewai import Crew
|
||||
from crewai.memory import LongTermMemory
|
||||
from crewai.memory.storage import LTMSQLiteStorage
|
||||
from crewai.memory.storage.ltm_sqlite_storage import LTMSQLiteStorage
|
||||
|
||||
# Configure custom storage paths
|
||||
crew = Crew(
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---title: Customizing Prompts
|
||||
---
|
||||
title: Customizing Prompts
|
||||
description: Dive deeper into low-level prompt customization for CrewAI, enabling super custom and complex use cases for different models and languages.
|
||||
icon: message-pen
|
||||
---
|
||||
|
||||
@@ -7,8 +7,10 @@ icon: file-code
|
||||
# `JSONSearchTool`
|
||||
|
||||
<Note>
|
||||
The JSONSearchTool is currently in an experimental phase. This means the tool is under active development, and users might encounter unexpected behavior or changes.
|
||||
We highly encourage feedback on any issues or suggestions for improvements.
|
||||
The JSONSearchTool is currently in an experimental phase. This means the tool
|
||||
is under active development, and users might encounter unexpected behavior or
|
||||
changes. We highly encourage feedback on any issues or suggestions for
|
||||
improvements.
|
||||
</Note>
|
||||
|
||||
## Description
|
||||
@@ -60,7 +62,7 @@ tool = JSONSearchTool(
|
||||
# stream=true,
|
||||
},
|
||||
},
|
||||
"embedder": {
|
||||
"embedding_model": {
|
||||
"provider": "google", # or openai, ollama, ...
|
||||
"config": {
|
||||
"model": "models/embedding-001",
|
||||
@@ -70,4 +72,4 @@ tool = JSONSearchTool(
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
```
|
||||
|
||||
@@ -8,8 +8,8 @@ icon: vector-square
|
||||
|
||||
## Description
|
||||
|
||||
The `RagTool` is designed to answer questions by leveraging the power of Retrieval-Augmented Generation (RAG) through EmbedChain.
|
||||
It provides a dynamic knowledge base that can be queried to retrieve relevant information from various data sources.
|
||||
The `RagTool` is designed to answer questions by leveraging the power of Retrieval-Augmented Generation (RAG) through EmbedChain.
|
||||
It provides a dynamic knowledge base that can be queried to retrieve relevant information from various data sources.
|
||||
This tool is particularly useful for applications that require access to a vast array of information and need to provide contextually relevant answers.
|
||||
|
||||
## Example
|
||||
@@ -138,7 +138,7 @@ config = {
|
||||
"model": "gpt-4",
|
||||
}
|
||||
},
|
||||
"embedder": {
|
||||
"embedding_model": {
|
||||
"provider": "openai",
|
||||
"config": {
|
||||
"model": "text-embedding-ada-002"
|
||||
@@ -151,4 +151,4 @@ rag_tool = RagTool(config=config, summarize=True)
|
||||
|
||||
## Conclusion
|
||||
|
||||
The `RagTool` provides a powerful way to create and query knowledge bases from various data sources. By leveraging Retrieval-Augmented Generation, it enables agents to access and retrieve relevant information efficiently, enhancing their ability to provide accurate and contextually appropriate responses.
|
||||
The `RagTool` provides a powerful way to create and query knowledge bases from various data sources. By leveraging Retrieval-Augmented Generation, it enables agents to access and retrieve relevant information efficiently, enhancing their ability to provide accurate and contextually appropriate responses.
|
||||
|
||||
@@ -20,7 +20,6 @@ from crewai.tools.agent_tools.agent_tools import AgentTools
|
||||
from crewai.utilities import Converter, Prompts
|
||||
from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
|
||||
from crewai.utilities.converter import generate_model_description
|
||||
from crewai.utilities.embedding_configurator import EmbeddingConfig
|
||||
from crewai.utilities.events.agent_events import (
|
||||
AgentExecutionCompletedEvent,
|
||||
AgentExecutionErrorEvent,
|
||||
@@ -109,7 +108,7 @@ class Agent(BaseAgent):
|
||||
default="safe",
|
||||
description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).",
|
||||
)
|
||||
embedder: Optional[EmbeddingConfig] = Field(
|
||||
embedder: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description="Embedder configuration for the agent.",
|
||||
)
|
||||
@@ -135,7 +134,7 @@ class Agent(BaseAgent):
|
||||
self.cache_handler = CacheHandler()
|
||||
self.set_cache_handler(self.cache_handler)
|
||||
|
||||
def set_knowledge(self, crew_embedder: Optional[EmbeddingConfig] = None):
|
||||
def set_knowledge(self, crew_embedder: Optional[Dict[str, Any]] = None):
|
||||
try:
|
||||
if self.embedder is None and crew_embedder:
|
||||
self.embedder = crew_embedder
|
||||
|
||||
@@ -25,7 +25,6 @@ from crewai.tools.base_tool import BaseTool, Tool
|
||||
from crewai.utilities import I18N, Logger, RPMController
|
||||
from crewai.utilities.config import process_config
|
||||
from crewai.utilities.converter import Converter
|
||||
from crewai.utilities.embedding_configurator import EmbeddingConfig
|
||||
|
||||
T = TypeVar("T", bound="BaseAgent")
|
||||
|
||||
@@ -363,5 +362,5 @@ class BaseAgent(ABC, BaseModel):
|
||||
self._rpm_controller = rpm_controller
|
||||
self.create_agent_executor()
|
||||
|
||||
def set_knowledge(self, crew_embedder: Optional[EmbeddingConfig] = None):
|
||||
def set_knowledge(self, crew_embedder: Optional[Dict[str, Any]] = None):
|
||||
pass
|
||||
|
||||
@@ -10,6 +10,7 @@ dependencies = [
|
||||
|
||||
[project.scripts]
|
||||
kickoff = "{{folder_name}}.main:kickoff"
|
||||
run_crew = "{{folder_name}}.main:kickoff"
|
||||
plot = "{{folder_name}}.main:plot"
|
||||
|
||||
[build-system]
|
||||
|
||||
@@ -41,7 +41,6 @@ from crewai.tools.base_tool import Tool
|
||||
from crewai.types.usage_metrics import UsageMetrics
|
||||
from crewai.utilities import I18N, FileHandler, Logger, RPMController
|
||||
from crewai.utilities.constants import TRAINING_DATA_FILE
|
||||
from crewai.utilities.embedding_configurator import EmbeddingConfig
|
||||
from crewai.utilities.evaluators.crew_evaluator_handler import CrewEvaluator
|
||||
from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
|
||||
from crewai.utilities.events.crew_events import (
|
||||
@@ -146,7 +145,7 @@ class Crew(BaseModel):
|
||||
default=None,
|
||||
description="An instance of the UserMemory to be used by the Crew to store/fetch memories of a specific user.",
|
||||
)
|
||||
embedder: Optional[EmbeddingConfig] = Field(
|
||||
embedder: Optional[dict] = Field(
|
||||
default=None,
|
||||
description="Configuration for the embedder to be used for the crew.",
|
||||
)
|
||||
|
||||
@@ -5,7 +5,6 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||
from crewai.utilities.embedding_configurator import EmbeddingConfig
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false" # removes logging from fastembed
|
||||
|
||||
@@ -22,14 +21,14 @@ class Knowledge(BaseModel):
|
||||
sources: List[BaseKnowledgeSource] = Field(default_factory=list)
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
storage: Optional[KnowledgeStorage] = Field(default=None)
|
||||
embedder: Optional[EmbeddingConfig] = None
|
||||
embedder: Optional[Dict[str, Any]] = None
|
||||
collection_name: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
collection_name: str,
|
||||
sources: List[BaseKnowledgeSource],
|
||||
embedder: Optional[EmbeddingConfig] = None,
|
||||
embedder: Optional[Dict[str, Any]] = None,
|
||||
storage: Optional[KnowledgeStorage] = None,
|
||||
**data,
|
||||
):
|
||||
|
||||
@@ -15,7 +15,6 @@ from chromadb.config import Settings
|
||||
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
|
||||
from crewai.utilities import EmbeddingConfigurator
|
||||
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
|
||||
from crewai.utilities.embedding_configurator import EmbeddingConfig
|
||||
from crewai.utilities.logger import Logger
|
||||
from crewai.utilities.paths import db_storage_path
|
||||
|
||||
@@ -49,7 +48,7 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedder: Optional[EmbeddingConfig] = None,
|
||||
embedder: Optional[Dict[str, Any]] = None,
|
||||
collection_name: Optional[str] = None,
|
||||
):
|
||||
self.collection_name = collection_name
|
||||
@@ -188,7 +187,7 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
||||
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
|
||||
)
|
||||
|
||||
def _set_embedder_config(self, embedder: Optional[EmbeddingConfig] = None) -> None:
|
||||
def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Set the embedding configuration for the knowledge storage.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import (
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
get_args,
|
||||
get_origin,
|
||||
)
|
||||
|
||||
from pydantic import (
|
||||
@@ -178,15 +180,29 @@ class Task(BaseModel):
|
||||
"""
|
||||
if v is not None:
|
||||
sig = inspect.signature(v)
|
||||
if len(sig.parameters) != 1:
|
||||
positional_args = [
|
||||
param
|
||||
for param in sig.parameters.values()
|
||||
if param.default is inspect.Parameter.empty
|
||||
]
|
||||
if len(positional_args) != 1:
|
||||
raise ValueError("Guardrail function must accept exactly one parameter")
|
||||
|
||||
# Check return annotation if present, but don't require it
|
||||
return_annotation = sig.return_annotation
|
||||
if return_annotation != inspect.Signature.empty:
|
||||
|
||||
return_annotation_args = get_args(return_annotation)
|
||||
if not (
|
||||
return_annotation == Tuple[bool, Any]
|
||||
or str(return_annotation) == "Tuple[bool, Any]"
|
||||
get_origin(return_annotation) is tuple
|
||||
and len(return_annotation_args) == 2
|
||||
and return_annotation_args[0] is bool
|
||||
and (
|
||||
return_annotation_args[1] is Any
|
||||
or return_annotation_args[1] is str
|
||||
or return_annotation_args[1] is TaskOutput
|
||||
or return_annotation_args[1] == Union[str, TaskOutput]
|
||||
)
|
||||
):
|
||||
raise ValueError(
|
||||
"If return type is annotated, it must be Tuple[bool, Any]"
|
||||
|
||||
@@ -1,84 +1,8 @@
|
||||
import os
|
||||
from typing import Any, Callable, Literal, cast
|
||||
from typing import Any, Dict, Optional, cast
|
||||
|
||||
from chromadb import Documents, EmbeddingFunction, Embeddings
|
||||
from chromadb.api.types import validate_embedding_function
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class EmbeddingProviderConfig(BaseModel):
|
||||
"""Configuration model for embedding providers.
|
||||
|
||||
Attributes:
|
||||
# Core Model Configuration
|
||||
model (str | None): The model identifier for embeddings, used across multiple providers
|
||||
like OpenAI, Azure, Watson, etc.
|
||||
embedder (str | Callable | None): Custom embedding function or callable for custom
|
||||
embedding implementations.
|
||||
|
||||
# API Authentication & Configuration
|
||||
api_key (str | None): Authentication key for various providers (OpenAI, VertexAI,
|
||||
Google, Cohere, VoyageAI, Watson).
|
||||
api_base (str | None): Base API URL override for OpenAI and Azure services.
|
||||
api_type (str | None): API type specification, particularly used for Azure configuration.
|
||||
api_version (str | None): API version for OpenAI and Azure services.
|
||||
api_url (str | None): API endpoint URL, used by HuggingFace and Watson services.
|
||||
url (str | None): Base URL for the embedding service, primarily used for Ollama and
|
||||
HuggingFace endpoints.
|
||||
|
||||
# Service-Specific Configuration
|
||||
project_id (str | None): Project identifier used by VertexAI and Watson services.
|
||||
organization_id (str | None): Organization identifier for OpenAI and Azure services.
|
||||
deployment_id (str | None): Deployment identifier for OpenAI and Azure services.
|
||||
region (str | None): Geographic region for VertexAI services.
|
||||
session (str | None): Session configuration for Amazon Bedrock embeddings.
|
||||
|
||||
# Request Configuration
|
||||
task_type (str | None): Specifies the task type for Google Generative AI embeddings.
|
||||
default_headers (str | None): Custom headers for OpenAI and Azure API requests.
|
||||
dimensions (str | None): Output dimensions specification for OpenAI and Azure embeddings.
|
||||
"""
|
||||
|
||||
# Core Model Configuration
|
||||
model: str | None = None
|
||||
embedder: str | Callable | None = None
|
||||
|
||||
# API Authentication & Configuration
|
||||
api_key: str | None = None
|
||||
api_base: str | None = None
|
||||
api_type: str | None = None
|
||||
api_version: str | None = None
|
||||
api_url: str | None = None
|
||||
url: str | None = None
|
||||
|
||||
# Service-Specific Configuration
|
||||
project_id: str | None = None
|
||||
organization_id: str | None = None
|
||||
deployment_id: str | None = None
|
||||
region: str | None = None
|
||||
session: str | None = None
|
||||
|
||||
# Request Configuration
|
||||
task_type: str | None = None
|
||||
default_headers: str | None = None
|
||||
dimensions: str | None = None
|
||||
|
||||
|
||||
class EmbeddingConfig(BaseModel):
|
||||
provider: Literal[
|
||||
"openai",
|
||||
"azure",
|
||||
"ollama",
|
||||
"vertexai",
|
||||
"google",
|
||||
"cohere",
|
||||
"voyageai",
|
||||
"bedrock",
|
||||
"huggingface",
|
||||
"watson",
|
||||
"custom",
|
||||
]
|
||||
config: EmbeddingProviderConfig | None = None
|
||||
|
||||
|
||||
class EmbeddingConfigurator:
|
||||
@@ -99,19 +23,15 @@ class EmbeddingConfigurator:
|
||||
|
||||
def configure_embedder(
|
||||
self,
|
||||
embedder_config: EmbeddingConfig | None = None,
|
||||
embedder_config: Optional[Dict[str, Any]] = None,
|
||||
) -> EmbeddingFunction:
|
||||
"""Configures and returns an embedding function based on the provided config."""
|
||||
if embedder_config is None:
|
||||
return self._create_default_embedding_function()
|
||||
|
||||
provider = embedder_config.provider
|
||||
config = (
|
||||
embedder_config.config
|
||||
if embedder_config.config
|
||||
else EmbeddingProviderConfig()
|
||||
)
|
||||
model_name = config.model if provider != "custom" else None
|
||||
provider = embedder_config.get("provider")
|
||||
config = embedder_config.get("config", {})
|
||||
model_name = config.get("model") if provider != "custom" else None
|
||||
|
||||
if provider not in self.embedding_functions:
|
||||
raise Exception(
|
||||
@@ -136,123 +56,123 @@ class EmbeddingConfigurator:
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_openai(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_openai(config, model_name):
|
||||
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
||||
OpenAIEmbeddingFunction,
|
||||
)
|
||||
|
||||
return OpenAIEmbeddingFunction(
|
||||
api_key=config.api_key or os.getenv("OPENAI_API_KEY"),
|
||||
api_key=config.get("api_key") or os.getenv("OPENAI_API_KEY"),
|
||||
model_name=model_name,
|
||||
api_base=config.api_base,
|
||||
api_type=config.api_type,
|
||||
api_version=config.api_version,
|
||||
default_headers=config.default_headers,
|
||||
dimensions=config.dimensions,
|
||||
deployment_id=config.deployment_id,
|
||||
organization_id=config.organization_id,
|
||||
api_base=config.get("api_base", None),
|
||||
api_type=config.get("api_type", None),
|
||||
api_version=config.get("api_version", None),
|
||||
default_headers=config.get("default_headers", None),
|
||||
dimensions=config.get("dimensions", None),
|
||||
deployment_id=config.get("deployment_id", None),
|
||||
organization_id=config.get("organization_id", None),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_azure(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_azure(config, model_name):
|
||||
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
||||
OpenAIEmbeddingFunction,
|
||||
)
|
||||
|
||||
return OpenAIEmbeddingFunction(
|
||||
api_key=config.api_key,
|
||||
api_base=config.api_base,
|
||||
api_type=config.api_type if config.api_type else "azure",
|
||||
api_version=config.api_version,
|
||||
api_key=config.get("api_key"),
|
||||
api_base=config.get("api_base"),
|
||||
api_type=config.get("api_type", "azure"),
|
||||
api_version=config.get("api_version"),
|
||||
model_name=model_name,
|
||||
default_headers=config.default_headers,
|
||||
dimensions=config.dimensions,
|
||||
deployment_id=config.deployment_id,
|
||||
organization_id=config.organization_id,
|
||||
default_headers=config.get("default_headers"),
|
||||
dimensions=config.get("dimensions"),
|
||||
deployment_id=config.get("deployment_id"),
|
||||
organization_id=config.get("organization_id"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_ollama(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_ollama(config, model_name):
|
||||
from chromadb.utils.embedding_functions.ollama_embedding_function import (
|
||||
OllamaEmbeddingFunction,
|
||||
)
|
||||
|
||||
return OllamaEmbeddingFunction(
|
||||
url=config.url if config.url else "http://localhost:11434/api/embeddings",
|
||||
url=config.get("url", "http://localhost:11434/api/embeddings"),
|
||||
model_name=model_name,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_vertexai(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_vertexai(config, model_name):
|
||||
from chromadb.utils.embedding_functions.google_embedding_function import (
|
||||
GoogleVertexEmbeddingFunction,
|
||||
)
|
||||
|
||||
return GoogleVertexEmbeddingFunction(
|
||||
model_name=model_name,
|
||||
api_key=config.api_key,
|
||||
project_id=config.project_id,
|
||||
region=config.region,
|
||||
api_key=config.get("api_key"),
|
||||
project_id=config.get("project_id"),
|
||||
region=config.get("region"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_google(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_google(config, model_name):
|
||||
from chromadb.utils.embedding_functions.google_embedding_function import (
|
||||
GoogleGenerativeAiEmbeddingFunction,
|
||||
)
|
||||
|
||||
return GoogleGenerativeAiEmbeddingFunction(
|
||||
model_name=model_name,
|
||||
api_key=config.api_key,
|
||||
task_type=config.task_type,
|
||||
api_key=config.get("api_key"),
|
||||
task_type=config.get("task_type"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_cohere(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_cohere(config, model_name):
|
||||
from chromadb.utils.embedding_functions.cohere_embedding_function import (
|
||||
CohereEmbeddingFunction,
|
||||
)
|
||||
|
||||
return CohereEmbeddingFunction(
|
||||
model_name=model_name,
|
||||
api_key=config.api_key,
|
||||
api_key=config.get("api_key"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_voyageai(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_voyageai(config, model_name):
|
||||
from chromadb.utils.embedding_functions.voyageai_embedding_function import (
|
||||
VoyageAIEmbeddingFunction,
|
||||
)
|
||||
|
||||
return VoyageAIEmbeddingFunction(
|
||||
model_name=model_name,
|
||||
api_key=config.api_key,
|
||||
api_key=config.get("api_key"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_bedrock(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_bedrock(config, model_name):
|
||||
from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (
|
||||
AmazonBedrockEmbeddingFunction,
|
||||
)
|
||||
|
||||
# Allow custom model_name override with backwards compatibility
|
||||
kwargs = {"session": config.session}
|
||||
kwargs = {"session": config.get("session")}
|
||||
if model_name is not None:
|
||||
kwargs["model_name"] = model_name
|
||||
return AmazonBedrockEmbeddingFunction(**kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _configure_huggingface(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_huggingface(config, model_name):
|
||||
from chromadb.utils.embedding_functions.huggingface_embedding_function import (
|
||||
HuggingFaceEmbeddingServer,
|
||||
)
|
||||
|
||||
return HuggingFaceEmbeddingServer(
|
||||
url=config.api_url,
|
||||
url=config.get("api_url"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _configure_watson(config: EmbeddingProviderConfig, model_name: str):
|
||||
def _configure_watson(config, model_name):
|
||||
try:
|
||||
import ibm_watsonx_ai.foundation_models as watson_models
|
||||
from ibm_watsonx_ai import Credentials
|
||||
@@ -273,10 +193,12 @@ class EmbeddingConfigurator:
|
||||
}
|
||||
|
||||
embedding = watson_models.Embeddings(
|
||||
model_id=config.model,
|
||||
model_id=config.get("model"),
|
||||
params=embed_params,
|
||||
credentials=Credentials(api_key=config.api_key, url=config.api_url),
|
||||
project_id=config.project_id,
|
||||
credentials=Credentials(
|
||||
api_key=config.get("api_key"), url=config.get("api_url")
|
||||
),
|
||||
project_id=config.get("project_id"),
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -289,8 +211,8 @@ class EmbeddingConfigurator:
|
||||
return WatsonEmbeddingFunction()
|
||||
|
||||
@staticmethod
|
||||
def _configure_custom(config: EmbeddingProviderConfig):
|
||||
custom_embedder = config.embedder
|
||||
def _configure_custom(config):
|
||||
custom_embedder = config.get("embedder")
|
||||
if isinstance(custom_embedder, EmbeddingFunction):
|
||||
try:
|
||||
validate_embedding_function(custom_embedder)
|
||||
|
||||
@@ -67,15 +67,12 @@ class CrewAIEventsBus:
|
||||
source: The object emitting the event
|
||||
event: The event instance to emit
|
||||
"""
|
||||
event_type = type(event)
|
||||
if event_type in self._handlers:
|
||||
for handler in self._handlers[event_type]:
|
||||
handler(source, event)
|
||||
self._signal.send(source, event=event)
|
||||
for event_type, handlers in self._handlers.items():
|
||||
if isinstance(event, event_type):
|
||||
for handler in handlers:
|
||||
handler(source, event)
|
||||
|
||||
def clear_handlers(self) -> None:
|
||||
"""Clear all registered event handlers - useful for testing"""
|
||||
self._handlers.clear()
|
||||
self._signal.send(source, event=event)
|
||||
|
||||
def register_handler(
|
||||
self, event_type: Type[EventTypes], handler: Callable[[Any, EventTypes], None]
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from functools import partial
|
||||
from typing import Tuple, Union
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
@@ -215,6 +217,75 @@ def test_multiple_output_type_error():
|
||||
)
|
||||
|
||||
|
||||
def test_guardrail_type_error():
|
||||
desc = "Give me a list of 5 interesting ideas to explore for na article, what makes them unique and interesting."
|
||||
expected_output = "Bullet point list of 5 interesting ideas."
|
||||
# Lambda function
|
||||
Task(
|
||||
description=desc,
|
||||
expected_output=expected_output,
|
||||
guardrail=lambda x: (True, x),
|
||||
)
|
||||
|
||||
# Function
|
||||
def guardrail_fn(x: TaskOutput) -> tuple[bool, TaskOutput]:
|
||||
return (True, x)
|
||||
|
||||
Task(
|
||||
description=desc,
|
||||
expected_output=expected_output,
|
||||
guardrail=guardrail_fn,
|
||||
)
|
||||
|
||||
class Object:
|
||||
def guardrail_fn(self, x: TaskOutput) -> tuple[bool, TaskOutput]:
|
||||
return (True, x)
|
||||
|
||||
@classmethod
|
||||
def guardrail_class_fn(cls, x: TaskOutput) -> tuple[bool, str]:
|
||||
return (True, x)
|
||||
|
||||
@staticmethod
|
||||
def guardrail_static_fn(x: TaskOutput) -> tuple[bool, Union[str, TaskOutput]]:
|
||||
return (True, x)
|
||||
|
||||
obj = Object()
|
||||
# Method
|
||||
Task(
|
||||
description=desc,
|
||||
expected_output=expected_output,
|
||||
guardrail=obj.guardrail_fn,
|
||||
)
|
||||
# Class method
|
||||
Task(
|
||||
description=desc,
|
||||
expected_output=expected_output,
|
||||
guardrail=Object.guardrail_class_fn,
|
||||
)
|
||||
# Static method
|
||||
Task(
|
||||
description=desc,
|
||||
expected_output=expected_output,
|
||||
guardrail=Object.guardrail_static_fn,
|
||||
)
|
||||
|
||||
def error_fn(x: TaskOutput, y: bool) -> Tuple[bool, TaskOutput]:
|
||||
return (y, x)
|
||||
|
||||
Task(
|
||||
description=desc,
|
||||
expected_output=expected_output,
|
||||
guardrail=partial(error_fn, y=True),
|
||||
)
|
||||
|
||||
with pytest.raises(ValidationError):
|
||||
Task(
|
||||
description=desc,
|
||||
expected_output=expected_output,
|
||||
guardrail=error_fn,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_output_pydantic_sequential():
|
||||
class ScoreOutput(BaseModel):
|
||||
|
||||
34
tests/utilities/events/test_crewai_event_bus.py
Normal file
34
tests/utilities/events/test_crewai_event_bus.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from unittest.mock import Mock
|
||||
|
||||
from crewai.utilities.events.base_events import CrewEvent
|
||||
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||
|
||||
|
||||
class TestEvent(CrewEvent):
|
||||
pass
|
||||
|
||||
|
||||
def test_specific_event_handler():
|
||||
mock_handler = Mock()
|
||||
|
||||
@crewai_event_bus.on(TestEvent)
|
||||
def handler(source, event):
|
||||
mock_handler(source, event)
|
||||
|
||||
event = TestEvent(type="test_event")
|
||||
crewai_event_bus.emit("source_object", event)
|
||||
|
||||
mock_handler.assert_called_once_with("source_object", event)
|
||||
|
||||
|
||||
def test_wildcard_event_handler():
|
||||
mock_handler = Mock()
|
||||
|
||||
@crewai_event_bus.on(CrewEvent)
|
||||
def handler(source, event):
|
||||
mock_handler(source, event)
|
||||
|
||||
event = TestEvent(type="test_event")
|
||||
crewai_event_bus.emit("source_object", event)
|
||||
|
||||
mock_handler.assert_called_once_with("source_object", event)
|
||||
Reference in New Issue
Block a user