Compare commits

..

2 Commits

Author SHA1 Message Date
Greyson LaLonde
b3f0ac6221 Merge branch 'main' into gl/chore/disable-line-length-linting 2025-09-08 11:35:33 -04:00
Greyson Lalonde
c7d80348ec chore: disable E501 line length linting rule 2025-09-06 02:40:31 -04:00
1166 changed files with 30678 additions and 81341 deletions

View File

@@ -1,46 +0,0 @@
name: Build uv cache
on:
push:
branches:
- main
paths:
- "uv.lock"
- "pyproject.toml"
workflow_dispatch:
permissions:
contents: read
jobs:
build-cache:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.8.4"
python-version: ${{ matrix.python-version }}
enable-cache: false
- name: Install dependencies and populate cache
run: |
echo "Building global UV cache for Python ${{ matrix.python-version }}..."
uv sync --all-groups --all-extras
echo "Cache populated successfully"
- name: Save uv caches
uses: actions/cache/save@v4
with:
path: |
~/.cache/uv
~/.local/share/uv
.venv
key: uv-main-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}

View File

@@ -1,102 +0,0 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL Advanced"
on:
push:
branches: [ "main" ]
paths-ignore:
- "src/crewai/cli/templates/**"
pull_request:
branches: [ "main" ]
paths-ignore:
- "src/crewai/cli/templates/**"
jobs:
analyze:
name: Analyze (${{ matrix.language }})
# Runner size impacts CodeQL analysis time. To learn more, please see:
# - https://gh.io/recommended-hardware-resources-for-running-codeql
# - https://gh.io/supported-runners-and-hardware-resources
# - https://gh.io/using-larger-runners (GitHub.com only)
# Consider using larger runners or machines with greater resources for possible analysis time improvements.
runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
permissions:
# required for all workflows
security-events: write
# required to fetch internal or private CodeQL packs
packages: read
# only required for workflows in private repositories
actions: read
contents: read
strategy:
fail-fast: false
matrix:
include:
- language: actions
build-mode: none
- language: python
build-mode: none
# CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift'
# Use `c-cpp` to analyze code written in C, C++ or both
# Use 'java-kotlin' to analyze code written in Java, Kotlin or both
# Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
# To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
# see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
# If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
# your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
steps:
- name: Checkout repository
uses: actions/checkout@v4
# Add any setup steps before running the `github/codeql-action/init` action.
# This includes steps like installing compilers or runtimes (`actions/setup-node`
# or others). This is typically only required for manual builds.
# - name: Setup runtime (example)
# uses: actions/setup-example@v1
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality
# If the analyze step fails for one of the languages you are analyzing with
# "We were unable to automatically build your code", modify the matrix above
# to set the build mode to "manual" for that language. Then modify this step
# to build your code.
# Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
- if: matrix.build-mode == 'manual'
shell: bash
run: |
echo 'If you are using a "manual" build mode for one or more of the' \
'languages you are analyzing, replace this with the commands to build' \
'your code, for example:'
echo ' make bootstrap'
echo ' make release'
exit 1
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"

View File

@@ -2,9 +2,6 @@ name: Lint
on: [pull_request]
permissions:
contents: read
jobs:
lint:
runs-on: ubuntu-latest
@@ -18,27 +15,19 @@ jobs:
- name: Fetch Target Branch
run: git fetch origin $TARGET_BRANCH --depth=1
- name: Restore global uv cache
id: cache-restore
uses: actions/cache/restore@v4
with:
path: |
~/.cache/uv
~/.local/share/uv
.venv
key: uv-main-py3.11-${{ hashFiles('uv.lock') }}
restore-keys: |
uv-main-py3.11-
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.8.4"
python-version: "3.11"
enable-cache: false
enable-cache: true
cache-dependency-glob: |
**/pyproject.toml
**/uv.lock
- name: Set up Python
run: uv python install 3.11
- name: Install dependencies
run: uv sync --all-packages --all-extras --no-install-project
run: uv sync --dev --no-install-project
- name: Get Changed Python Files
id: changed-files
@@ -56,13 +45,3 @@ jobs:
| tr ' ' '\n' \
| grep -v 'src/crewai/cli/templates/' \
| xargs -I{} uv run ruff check "{}"
- name: Save uv caches
if: steps.cache-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: |
~/.cache/uv
~/.local/share/uv
.venv
key: uv-main-py3.11-${{ hashFiles('uv.lock') }}

29
.github/workflows/security-checker.yml vendored Normal file
View File

@@ -0,0 +1,29 @@
name: Security Checker
on: [pull_request]
jobs:
security-check:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
cache-dependency-glob: |
**/pyproject.toml
**/uv.lock
- name: Set up Python
run: uv python install 3.11
- name: Install dependencies
run: uv sync --dev --no-install-project
- name: Run Bandit
run: uv run bandit -c pyproject.toml -r src/ -ll

View File

@@ -3,7 +3,7 @@ name: Run Tests
on: [pull_request]
permissions:
contents: read
contents: write
env:
OPENAI_API_KEY: fake-api-key
@@ -22,77 +22,29 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for proper diff
# - name: Restore global uv cache
# id: cache-restore
# uses: actions/cache/restore@v4
# with:
# path: |
# ~/.cache/uv
# ~/.local/share/uv
# .venv
# key: uv-main-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}
# restore-keys: |
# uv-main-py${{ matrix.python-version }}-
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.8.4"
python-version: ${{ matrix.python-version }}
enable-cache: false
enable-cache: true
cache-dependency-glob: |
**/pyproject.toml
**/uv.lock
- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}
- name: Install the project
run: uv sync --all-packages --all-extras
# - name: Restore test durations
# uses: actions/cache/restore@v4
# with:
# path: .test_durations_py*
# key: test-durations-py${{ matrix.python-version }}
run: uv sync --dev --all-extras
- name: Run tests (group ${{ matrix.group }} of 8)
run: |
PYTHON_VERSION_SAFE=$(echo "${{ matrix.python-version }}" | tr '.' '_')
DURATION_FILE=".test_durations_py${PYTHON_VERSION_SAFE}"
# Temporarily always skip cached durations to fix test splitting
# When durations don't match, pytest-split runs duplicate tests instead of splitting
echo "Using even test splitting (duration cache disabled until fix merged)"
DURATIONS_ARG=""
# Original logic (disabled temporarily):
# if [ ! -f "$DURATION_FILE" ]; then
# echo "No cached durations found, tests will be split evenly"
# DURATIONS_ARG=""
# elif git diff origin/${{ github.base_ref }}...HEAD --name-only 2>/dev/null | grep -q "^tests/.*\.py$"; then
# echo "Test files have changed, skipping cached durations to avoid mismatches"
# DURATIONS_ARG=""
# else
# echo "No test changes detected, using cached test durations for optimal splitting"
# DURATIONS_ARG="--durations-path=${DURATION_FILE}"
# fi
uv run pytest lib/crewai \
uv run pytest \
--block-network \
--timeout=30 \
-vv \
--splits 8 \
--group ${{ matrix.group }} \
$DURATIONS_ARG \
--durations=10 \
-n auto \
--maxfail=3 \
-m "not requires_local_services"
# - name: Save uv caches
# if: steps.cache-restore.outputs.cache-hit != 'true'
# uses: actions/cache/save@v4
# with:
# path: |
# ~/.cache/uv
# ~/.local/share/uv
# .venv
# key: uv-main-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}
--maxfail=3

View File

@@ -3,7 +3,7 @@ name: Run Type Checks
on: [pull_request]
permissions:
contents: read
contents: write
jobs:
type-checker-matrix:
@@ -20,27 +20,19 @@ jobs:
with:
fetch-depth: 0 # Fetch all history for proper diff
- name: Restore global uv cache
id: cache-restore
uses: actions/cache/restore@v4
with:
path: |
~/.cache/uv
~/.local/share/uv
.venv
key: uv-main-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}
restore-keys: |
uv-main-py${{ matrix.python-version }}-
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.8.4"
python-version: ${{ matrix.python-version }}
enable-cache: false
enable-cache: true
cache-dependency-glob: |
**/pyproject.toml
**/uv.lock
- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}
- name: Install dependencies
run: uv sync --all-packages --all-extras
run: uv sync --dev --all-extras --no-install-project
- name: Get changed Python files
id: changed-files
@@ -74,16 +66,6 @@ jobs:
if: steps.changed-files.outputs.has_changes == 'false'
run: echo "No Python files in src/ were modified - skipping type checks"
- name: Save uv caches
if: steps.cache-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: |
~/.cache/uv
~/.local/share/uv
.venv
key: uv-main-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}
# Summary job to provide single status for branch protection
type-checker:
name: type-checker

View File

@@ -1,71 +0,0 @@
name: Update Test Durations
on:
push:
branches:
- main
paths:
- 'tests/**/*.py'
workflow_dispatch:
permissions:
contents: read
jobs:
update-durations:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10', '3.11', '3.12', '3.13']
env:
OPENAI_API_KEY: fake-api-key
PYTHONUNBUFFERED: 1
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Restore global uv cache
id: cache-restore
uses: actions/cache/restore@v4
with:
path: |
~/.cache/uv
~/.local/share/uv
.venv
key: uv-main-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}
restore-keys: |
uv-main-py${{ matrix.python-version }}-
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.8.4"
python-version: ${{ matrix.python-version }}
enable-cache: false
- name: Install the project
run: uv sync --all-groups --all-extras
- name: Run all tests and store durations
run: |
PYTHON_VERSION_SAFE=$(echo "${{ matrix.python-version }}" | tr '.' '_')
uv run pytest --store-durations --durations-path=.test_durations_py${PYTHON_VERSION_SAFE} -n auto
continue-on-error: true
- name: Save durations to cache
if: always()
uses: actions/cache/save@v4
with:
path: .test_durations_py*
key: test-durations-py${{ matrix.python-version }}
- name: Save uv caches
if: steps.cache-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: |
~/.cache/uv
~/.local/share/uv
.venv
key: uv-main-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}

1
.gitignore vendored
View File

@@ -2,6 +2,7 @@
.pytest_cache
__pycache__
dist/
lib/
.env
assets/*
.idea

View File

@@ -6,19 +6,13 @@ repos:
entry: uv run ruff check
language: system
types: [python]
files: ^lib/crewai/src/
exclude: ^lib/crewai/
- id: ruff-format
name: ruff-format
entry: uv run ruff format
language: system
types: [python]
files: ^lib/crewai/src/
exclude: ^lib/crewai/
- id: mypy
name: mypy
entry: uv run mypy
language: system
types: [python]
files: ^lib/crewai/src/
exclude: ^lib/crewai/

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

View File

@@ -5,82 +5,6 @@ icon: "clock"
mode: "wide"
---
<Update label="Sep 20, 2025">
## v0.193.2
[View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.193.2)
## What's Changed
- Updated pyproject templates to use the right version
</Update>
<Update label="Sep 20, 2025">
## v0.193.1
[View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.193.1)
## What's Changed
- Series of minor fixes and linter improvements
</Update>
<Update label="Sep 19, 2025">
## v0.193.0
[View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.193.0)
## Core Improvements & Fixes
- Fixed handling of the `model` parameter during OpenAI adapter initialization
- Resolved test duration cache issues in CI workflows
- Fixed flaky test related to repeated tool usage by agents
- Added missing event exports to `__init__.py` for consistent module behavior
- Dropped message storage from metadata in Mem0 to reduce bloat
- Fixed L2 distance metric support for backward compatibility in vector search
## New Features & Enhancements
- Introduced thread-safe platform context management
- Added test duration caching for optimized `pytest-split` runs
- Added ephemeral trace improvements for better trace control
- Made search parameters for RAG, knowledge, and memory fully configurable
- Enabled ChromaDB to use OpenAI API for embedding functions
- Added deeper observability tools for user-level insights
- Unified RAG storage system with instance-specific client support
## Documentation & Guides
- Updated `RagTool` references to reflect CrewAI native RAG implementation
- Improved internal docs for `langgraph` and `openai` agent adapters with type annotations and docstrings
</Update>
<Update label="Sep 11, 2025">
## v0.186.1
[View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.186.1)
## What's Changed
- Fixed version not being found and silently failing reversion
- Bumped CrewAI version to 0.186.1 and updated dependencies in the CLI
</Update>
<Update label="Sep 10, 2025">
## v0.186.0
[View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.186.0)
## What's Changed
- Refer to the GitHub release notes for detailed changes
</Update>
<Update label="Sep 04, 2025">
## v0.177.0

View File

@@ -404,10 +404,6 @@ crewai config reset
After resetting configuration, re-run `crewai login` to authenticate again.
</Tip>
<Tip>
CrewAI CLI handles authentication to the Tool Repository automatically when adding packages to your project. Just append `crewai` before any `uv` command to use it. E.g. `crewai uv add requests`. For more information, see [Tool Repository](https://docs.crewai.com/enterprise/features/tool-repository) docs.
</Tip>
<Note>
Configuration settings are stored in `~/.config/crewai/settings.json`. Some settings like organization name and UUID are read-only and managed through authentication and organization commands. Tool repository related settings are hidden and cannot be set directly by users.
</Note>

View File

@@ -7,7 +7,7 @@ mode: "wide"
## Overview
The CrewAI framework provides a sophisticated memory system designed to significantly enhance AI agent capabilities. CrewAI offers **two distinct memory approaches** that serve different use cases:
The CrewAI framework provides a sophisticated memory system designed to significantly enhance AI agent capabilities. CrewAI offers **three distinct memory approaches** that serve different use cases:
1. **Basic Memory System** - Built-in short-term, long-term, and entity memory
2. **External Memory** - Standalone external memory providers

View File

@@ -52,36 +52,6 @@ researcher = Agent(
)
```
## Adding other packages after installing a tool
After installing a tool from the CrewAI Enterprise Tool Repository, you need to use the `crewai uv` command to add other packages to your project.
Using pure `uv` commands will fail due to authentication to tool repository being handled by the CLI. By using the `crewai uv` command, you can add other packages to your project without having to worry about authentication.
Any `uv` command can be used with the `crewai uv` command, making it a powerful tool for managing your project's dependencies without the hassle of managing authentication through environment variables or other methods.
Say that you have installed a custom tool from the CrewAI Enterprise Tool Repository called "my-tool":
```bash
crewai tool install my-tool
```
And now you want to add another package to your project, you can use the following command:
```bash
crewai uv add requests
```
Other commands like `uv sync` or `uv remove` can also be used with the `crewai uv` command:
```bash
crewai uv sync
```
```bash
crewai uv remove requests
```
This will add the package to your project and update `pyproject.toml` accordingly.
## Creating and Publishing Tools
To create a new tool project:

View File

@@ -142,7 +142,7 @@ with MCPServerAdapter(server_params, "tool_name", connect_timeout=60) as mcp_too
## Using with CrewBase
To use MCPServer tools within a CrewBase class, use the `get_mcp_tools` method. Server configurations should be provided via the `mcp_server_params` attribute. You can pass either a single configuration or a list of multiple server configurations.
To use MCPServer tools within a CrewBase class, use the `mcp_tools` method. Server configurations should be provided via the mcp_server_params attribute. You can pass either a single configuration or a list of multiple server configurations.
```python
@CrewBase
@@ -175,34 +175,6 @@ class CrewWithMCP:
# ... rest of your crew setup ...
```
### Connection Timeout Configuration
You can configure the connection timeout for MCP servers by setting the `mcp_connect_timeout` class attribute. If no timeout is specified, it defaults to 30 seconds.
```python
@CrewBase
class CrewWithMCP:
mcp_server_params = [...]
mcp_connect_timeout = 60 # 60 seconds timeout for all MCP connections
@agent
def your_agent(self):
return Agent(config=self.agents_config["your_agent"], tools=self.get_mcp_tools())
```
```python
@CrewBase
class CrewWithDefaultTimeout:
mcp_server_params = [...]
# No mcp_connect_timeout specified - uses default 30 seconds
@agent
def your_agent(self):
return Agent(config=self.agents_config["your_agent"], tools=self.get_mcp_tools())
```
### Filtering Tools
You can filter which tools are available to your agent by passing a list of tool names to the `get_mcp_tools` method.
```python
@@ -214,22 +186,6 @@ def another_agent(self):
)
```
The timeout configuration applies to all MCP tool calls within the crew:
```python
@CrewBase
class CrewWithCustomTimeout:
mcp_server_params = [...]
mcp_connect_timeout = 90 # 90 seconds timeout for all MCP connections
@agent
def filtered_agent(self):
return Agent(
config=self.agents_config["your_agent"],
tools=self.get_mcp_tools("tool_1", "tool_2") # specific tools with custom timeout
)
```
## Explore MCP Integrations
<CardGroup cols={2}>

View File

@@ -27,7 +27,7 @@ Follow the steps below to get Crewing! 🚣‍♂️
<Step title="Navigate to your new crew project">
<CodeGroup>
```shell Terminal
cd latest_ai_development
cd latest-ai-development
```
</CodeGroup>
</Step>

View File

@@ -9,7 +9,7 @@ mode: "wide"
## Description
The `RagTool` is designed to answer questions by leveraging the power of Retrieval-Augmented Generation (RAG) through CrewAI's native RAG system.
The `RagTool` is designed to answer questions by leveraging the power of Retrieval-Augmented Generation (RAG) through EmbedChain.
It provides a dynamic knowledge base that can be queried to retrieve relevant information from various data sources.
This tool is particularly useful for applications that require access to a vast array of information and need to provide contextually relevant answers.
@@ -76,8 +76,8 @@ The `RagTool` can be used with a wide variety of data sources, including:
The `RagTool` accepts the following parameters:
- **summarize**: Optional. Whether to summarize the retrieved content. Default is `False`.
- **adapter**: Optional. A custom adapter for the knowledge base. If not provided, a CrewAIRagAdapter will be used.
- **config**: Optional. Configuration for the underlying CrewAI RAG system.
- **adapter**: Optional. A custom adapter for the knowledge base. If not provided, an EmbedchainAdapter will be used.
- **config**: Optional. Configuration for the underlying EmbedChain App.
## Adding Content
@@ -130,23 +130,44 @@ from crewai_tools import RagTool
# Create a RAG tool with custom configuration
config = {
"vectordb": {
"provider": "qdrant",
"app": {
"name": "custom_app",
},
"llm": {
"provider": "openai",
"config": {
"collection_name": "my-collection"
"model": "gpt-4",
}
},
"embedding_model": {
"provider": "openai",
"config": {
"model": "text-embedding-3-small"
"model": "text-embedding-ada-002"
}
},
"vectordb": {
"provider": "elasticsearch",
"config": {
"collection_name": "my-collection",
"cloud_id": "deployment-name:xxxx",
"api_key": "your-key",
"verify_certs": False
}
},
"chunker": {
"chunk_size": 400,
"chunk_overlap": 100,
"length_function": "len",
"min_chunk_size": 0
}
}
rag_tool = RagTool(config=config, summarize=True)
```
The internal RAG tool utilizes the Embedchain adapter, allowing you to pass any configuration options that are supported by Embedchain.
You can refer to the [Embedchain documentation](https://docs.embedchain.ai/components/introduction) for details.
Make sure to review the configuration options available in the .yaml file.
## Conclusion
The `RagTool` provides a powerful way to create and query knowledge bases from various data sources. By leveraging Retrieval-Augmented Generation, it enables agents to access and retrieve relevant information efficiently, enhancing their ability to provide accurate and contextually appropriate responses.

View File

@@ -5,82 +5,6 @@ icon: "clock"
mode: "wide"
---
<Update label="2025년 9월 20일">
## v0.193.2
[GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/0.193.2)
## 변경 사항
- 올바른 버전을 사용하도록 pyproject 템플릿 업데이트
</Update>
<Update label="2025년 9월 20일">
## v0.193.1
[GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/0.193.1)
## 변경 사항
- 일련의 사소한 수정 및 린터 개선
</Update>
<Update label="2025년 9월 19일">
## v0.193.0
[GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/0.193.0)
## 핵심 개선 사항 및 수정 사항
- OpenAI 어댑터 초기화 중 `model` 매개변수 처리 수정
- CI 워크플로에서 테스트 소요 시간 캐시 문제 해결
- 에이전트의 반복 도구 사용과 관련된 불안정한 테스트 수정
- 일관된 모듈 동작을 위해 누락된 이벤트 내보내기를 `__init__.py`에 추가
- 메타데이터 부하를 줄이기 위해 Mem0에서 메시지 저장 제거
- 벡터 검색의 하위 호환성을 위해 L2 거리 메트릭 지원 수정
## 새로운 기능 및 향상 사항
- 스레드 안전한 플랫폼 컨텍스트 관리 도입
- `pytest-split` 실행 최적화를 위한 테스트 소요 시간 캐싱 추가
- 더 나은 추적 제어를 위한 일시적(trace) 개선
- RAG, 지식, 메모리 검색 매개변수를 완전 구성 가능하게 변경
- ChromaDB가 임베딩 함수에 OpenAI API를 사용할 수 있도록 지원
- 사용자 수준 인사이트를 위한 심화된 관찰 가능성 도구 추가
- 인스턴스별 클라이언트를 지원하는 통합 RAG 스토리지 시스템
## 문서 및 가이드
- CrewAI 네이티브 RAG 구현을 반영하도록 `RagTool` 참조 업데이트
- 타입 주석과 도크스트링을 포함해 `langgraph` 및 `openai` 에이전트 어댑터 내부 문서 개선
</Update>
<Update label="2025년 9월 11일">
## v0.186.1
[GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/0.186.1)
## 변경 사항
- 버전을 찾지 못해 조용히 되돌리는(reversion) 문제 수정
- CLI에서 CrewAI 버전을 0.186.1로 올리고 의존성 업데이트
</Update>
<Update label="2025년 9월 10일">
## v0.186.0
[GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/0.186.0)
## 변경 사항
- 자세한 변경 사항은 GitHub 릴리스 노트를 참조하세요
</Update>
<Update label="2025년 9월 4일">
## v0.177.0

View File

@@ -7,8 +7,8 @@ mode: "wide"
## 개요
[Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP)는 AI 에이전트가 MCP 서버로 알려진 외부 서비스와 통신함으로써 LLM에 컨텍스트를 제공할 수 있도록 표준화된 방식을 제공합니다.
`crewai-tools` 라이브러리는 CrewAI의 기능을 확장하여, 이러한 MCP 서버에서 제공하는 툴을 에이전트에 원활하게 통합할 수 있도록 해줍니다.
[Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP)는 AI 에이전트가 MCP 서버로 알려진 외부 서비스와 통신함으로써 LLM에 컨텍스트를 제공할 수 있도록 표준화된 방식을 제공합니다.
`crewai-tools` 라이브러리는 CrewAI의 기능을 확장하여, 이러한 MCP 서버에서 제공하는 툴을 에이전트에 원활하게 통합할 수 있도록 해줍니다.
이를 통해 여러분의 crew는 방대한 기능 에코시스템에 접근할 수 있습니다.
현재 다음과 같은 전송 메커니즘을 지원합니다:
@@ -142,7 +142,7 @@ with MCPServerAdapter(server_params, "tool_name", connect_timeout=60) as mcp_too
## CrewBase와 함께 사용하기
CrewBase 클래스 내에서 MCPServer 도구를 사용하려면 `get_mcp_tools` 메서드를 사용하세요. 서버 구성은 `mcp_server_params` 속성을 통해 제공되어야 합니다. 단일 구성 또는 여러 서버 구성을 리스트 형태로 전달할 수 있습니다.
CrewBase 클래스 내에서 MCPServer 도구를 사용하려면 `mcp_tools` 메서드를 사용하세요. 서버 구성은 mcp_server_params 속성을 통해 제공되어야 합니다. 단일 구성 또는 여러 서버 구성을 리스트 형태로 전달할 수 있습니다.
```python
@CrewBase
@@ -175,34 +175,6 @@ class CrewWithMCP:
# ... 나머지 crew 설정 ...
```
### 연결 타임아웃 구성
`mcp_connect_timeout` 클래스 속성을 설정하여 MCP 서버의 연결 타임아웃을 구성할 수 있습니다. 타임아웃을 지정하지 않으면 기본값으로 30초가 사용됩니다.
```python
@CrewBase
class CrewWithMCP:
mcp_server_params = [...]
mcp_connect_timeout = 60 # 모든 MCP 연결에 60초 타임아웃
@agent
def your_agent(self):
return Agent(config=self.agents_config["your_agent"], tools=self.get_mcp_tools())
```
```python
@CrewBase
class CrewWithDefaultTimeout:
mcp_server_params = [...]
# mcp_connect_timeout 지정하지 않음 - 기본 30초 사용
@agent
def your_agent(self):
return Agent(config=self.agents_config["your_agent"], tools=self.get_mcp_tools())
```
### 도구 필터링
`get_mcp_tools` 메서드에 도구 이름의 리스트를 전달하여, 에이전트에 제공되는 도구를 필터링할 수 있습니다.
```python
@@ -214,22 +186,6 @@ def another_agent(self):
)
```
타임아웃 구성은 crew 내의 모든 MCP 도구 호출에 적용됩니다:
```python
@CrewBase
class CrewWithCustomTimeout:
mcp_server_params = [...]
mcp_connect_timeout = 90 # 모든 MCP 연결에 90초 타임아웃
@agent
def filtered_agent(self):
return Agent(
config=self.agents_config["your_agent"],
tools=self.get_mcp_tools("tool_1", "tool_2") # 사용자 지정 타임아웃으로 특정 도구
)
```
## MCP 통합 탐색
<CardGroup cols={2}>
@@ -305,4 +261,4 @@ SSE 전송은 적절하게 보안되지 않은 경우 DNS 리바인딩 공격에
### 제한 사항
* **지원되는 프리미티브**: 현재 `MCPServerAdapter`는 주로 MCP `tools`를 어댑팅하는 기능을 지원합니다. 다른 MCP 프리미티브(예: `prompts` 또는 `resources`)는 현재 이 어댑터를 통해 CrewAI 컴포넌트로 직접 통합되어 있지 않습니다.
* **출력 처리**: 어댑터는 일반적으로 MCP tool의 주요 텍스트 출력(예: `.content[0].text`)을 처리합니다. 복잡하거나 멀티모달 출력의 경우 이 패턴에 맞지 않으면 별도의 커스텀 처리가 필요할 수 있습니다.
* **출력 처리**: 어댑터는 일반적으로 MCP tool의 주요 텍스트 출력(예: `.content[0].text`)을 처리합니다. 복잡하거나 멀티모달 출력의 경우 이 패턴에 맞지 않으면 별도의 커스텀 처리가 필요할 수 있습니다.

View File

@@ -27,7 +27,7 @@ mode: "wide"
<Step title="새로운 crew 프로젝트로 이동하기">
<CodeGroup>
```shell Terminal
cd latest_ai_development
cd latest-ai-development
```
</CodeGroup>
</Step>

View File

@@ -5,82 +5,6 @@ icon: "clock"
mode: "wide"
---
<Update label="20 set 2025">
## v0.193.2
[Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.193.2)
## O que Mudou
- Atualizados templates do pyproject para usar a versão correta
</Update>
<Update label="20 set 2025">
## v0.193.1
[Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.193.1)
## O que Mudou
- Série de pequenas correções e melhorias de linter
</Update>
<Update label="19 set 2025">
## v0.193.0
[Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.193.0)
## Melhorias e Correções Principais
- Corrigido manuseio do parâmetro `model` durante a inicialização do adaptador OpenAI
- Resolvidos problemas de cache da duração de testes nos fluxos de CI
- Corrigido teste instável relacionado ao uso repetido de ferramentas pelos agentes
- Adicionadas exportações de eventos ausentes no `__init__.py` para comportamento consistente do módulo
- Removido armazenamento de mensagem dos metadados no Mem0 para reduzir inchaço
- Corrigido suporte à métrica de distância L2 para compatibilidade retroativa na busca vetorial
## Novos Recursos e Melhorias
- Introduzida gestão de contexto de plataforma com segurança de threads
- Adicionado cache da duração de testes para execuções otimizadas do `pytest-split`
- Melhorias de traces efêmeros para melhor controle de rastreamento
- Parâmetros de busca para RAG, conhecimento e memória totalmente configuráveis
- Habilitado ChromaDB para usar a OpenAI API para funções de embedding
- Adicionadas ferramentas de observabilidade mais profundas para insights ao nível do usuário
- Sistema de armazenamento RAG unificado com suporte a cliente específico por instância
## Documentação e Guias
- Atualizadas referências do `RagTool` para refletir a implementação nativa de RAG do CrewAI
- Melhorada documentação interna para adaptadores de agente `langgraph` e `openai` com anotações de tipo e docstrings
</Update>
<Update label="11 set 2025">
## v0.186.1
[Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.186.1)
## O que Mudou
- Corrigida falha silenciosa de reversão quando a versão não era encontrada
- Versão do CrewAI atualizada para 0.186.1 e dependências do CLI atualizadas
</Update>
<Update label="10 set 2025">
## v0.186.0
[Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/0.186.0)
## O que Mudou
- Consulte as notas de lançamento no GitHub para detalhes completos
</Update>
<Update label="04 set 2025">
## v0.177.0

View File

@@ -118,7 +118,7 @@ with MCPServerAdapter(server_params, connect_timeout=60) as mcp_tools:
## Usando com CrewBase
Para usar ferramentas de servidores MCP dentro de uma classe CrewBase, utilize o método `get_mcp_tools`. As configurações dos servidores devem ser fornecidas via o atributo `mcp_server_params`. Você pode passar uma configuração única ou uma lista com múltiplas configurações.
Para usar ferramentas de servidores MCP dentro de uma classe CrewBase, utilize o método `mcp_tools`. As configurações dos servidores devem ser fornecidas via o atributo mcp_server_params. Você pode passar uma configuração única ou uma lista com múltiplas configurações.
```python
@CrewBase
@@ -146,65 +146,10 @@ class CrewWithMCP:
@agent
def your_agent(self):
return Agent(config=self.agents_config["your_agent"], tools=self.get_mcp_tools()) # obter todas as ferramentas disponíveis
return Agent(config=self.agents_config["your_agent"], tools=self.get_mcp_tools()) # você também pode filtrar quais ferramentas estarão disponíveis
# ... restante da configuração do seu crew ...
```
### Configuração de Timeout de Conexão
Você pode configurar o timeout de conexão para servidores MCP definindo o atributo de classe `mcp_connect_timeout`. Se nenhum timeout for especificado, o padrão é 30 segundos.
```python
@CrewBase
class CrewWithMCP:
mcp_server_params = [...]
mcp_connect_timeout = 60 # timeout de 60 segundos para todas as conexões MCP
@agent
def your_agent(self):
return Agent(config=self.agents_config["your_agent"], tools=self.get_mcp_tools())
```
```python
@CrewBase
class CrewWithDefaultTimeout:
mcp_server_params = [...]
# Nenhum mcp_connect_timeout especificado - usa padrão de 30 segundos
@agent
def your_agent(self):
return Agent(config=self.agents_config["your_agent"], tools=self.get_mcp_tools())
```
### Filtragem de Ferramentas
Você pode filtrar quais ferramentas estão disponíveis para seu agente passando uma lista de nomes de ferramentas para o método `get_mcp_tools`.
```python
@agent
def another_agent(self):
return Agent(
config=self.agents_config["your_agent"],
tools=self.get_mcp_tools("tool_1", "tool_2") # obter ferramentas específicas
)
```
A configuração de timeout se aplica a todas as chamadas de ferramentas MCP dentro do crew:
```python
@CrewBase
class CrewWithCustomTimeout:
mcp_server_params = [...]
mcp_connect_timeout = 90 # timeout de 90 segundos para todas as conexões MCP
@agent
def filtered_agent(self):
return Agent(
config=self.agents_config["your_agent"],
tools=self.get_mcp_tools("tool_1", "tool_2") # ferramentas específicas com timeout personalizado
)
```
## Explore Integrações MCP
<CardGroup cols={2}>

View File

@@ -27,7 +27,7 @@ Siga os passos abaixo para começar a tripular! 🚣‍♂️
<Step title="Navegue até o novo projeto da sua tripulação">
<CodeGroup>
```shell Terminal
cd latest_ai_development
cd latest-ai-development
```
</CodeGroup>
</Step>

View File

@@ -1 +0,0 @@
3.13

View File

View File

@@ -1,124 +0,0 @@
[project]
name = "crewai"
dynamic = ["version"]
description = ""
readme = "README.md"
authors = [
{ name = "Greyson Lalonde", email = "greyson.r.lalonde@gmail.com" }
]
keywords = [
"crewai",
"ai",
"agents",
"framework",
"orchestration",
"llm",
"core",
"typed",
]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Software Development :: Libraries :: Python Modules",
"Typing :: Typed",
]
requires-python = ">=3.10, <3.14"
dependencies = [
# Core Dependencies
"crewai",
"pydantic>=2.11.9",
"openai>=1.13.3",
"litellm==1.74.9",
"instructor>=1.3.3",
# Text Processing
"pdfplumber>=0.11.4",
"regex>=2024.9.11",
# Telemetry and Monitoring
"opentelemetry-api>=1.30.0",
"opentelemetry-sdk>=1.30.0",
"opentelemetry-exporter-otlp-proto-http>=1.30.0",
"tokenizers>=0.20.3",
"openpyxl>=3.1.5",
"pyvis>=0.3.2",
# Authentication and Security
"python-dotenv>=1.1.1",
"pyjwt>=2.9.0",
# Configuration and Utils
"click>=8.1.7",
"appdirs>=1.4.4",
"jsonref>=1.1.0",
"json-repair==0.25.2",
"tomli-w>=1.1.0",
"tomli>=2.0.2",
"blinker>=1.9.0",
"json5>=0.10.0",
"portalocker==2.7.0",
"chromadb~=1.1.0",
"pydantic-settings>=2.10.1",
"uv>=0.4.25",
]
[project.optional-dependencies]
tools = [
"crewai-tools",
]
embeddings = [
"tiktoken~=0.8.0"
]
pdfplumber = [
"pdfplumber>=0.11.4",
]
pandas = [
"pandas>=2.2.3",
]
openpyxl = [
"openpyxl>=3.1.5",
]
mem0 = ["mem0ai>=0.1.94"]
docling = [
"docling>=2.12.0",
]
aisuite = [
"aisuite>=0.1.10",
]
qdrant = [
"qdrant-client[fastembed]>=1.14.3",
]
aws = [
"boto3>=1.40.38",
]
watson = [
"ibm-watsonx-ai>=1.3.39",
]
voyageai = [
"voyageai>=0.3.5",
]
[project.scripts]
crewai = "crewai.cli.cli:crewai"
[project.urls]
Homepage = "https://crewai.com"
Documentation = "https://docs.crewai.com"
Repository = "https://github.com/crewAIInc/crewAI"
[tool.pytest.ini_options]
testpaths = ["tests"]
asyncio_mode = "strict"
asyncio_default_fixture_loop_scope = "function"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.version]
path = "src/crewai/__init__.py"
[tool.hatch.build.targets.wheel]
packages = ["src/crewai"]

View File

@@ -1,12 +0,0 @@
from crewai.agents.cache.cache_handler import CacheHandler
from crewai.agents.parser import AgentAction, AgentFinish, OutputParserError, parse
from crewai.agents.tools_handler import ToolsHandler
__all__ = [
"AgentAction",
"AgentFinish",
"CacheHandler",
"OutputParserError",
"ToolsHandler",
"parse",
]

View File

@@ -1,58 +0,0 @@
"""Base converter adapter for structured output conversion."""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from crewai.agents.agent_adapters.base_agent_adapter import BaseAgentAdapter
from crewai.task import Task
class BaseConverterAdapter(ABC):
"""Abstract base class for converter adapters in CrewAI.
Defines the common interface for converting agent outputs to structured formats.
All converter adapters must implement the methods defined here.
"""
def __init__(self, agent_adapter: BaseAgentAdapter) -> None:
"""Initialize the converter adapter.
Args:
agent_adapter: The agent adapter to configure for structured output.
"""
self.agent_adapter = agent_adapter
@abstractmethod
def configure_structured_output(self, task: Task) -> None:
"""Configure agents to return structured output.
Must support both JSON and Pydantic output formats.
Args:
task: The task requiring structured output.
"""
@abstractmethod
def enhance_system_prompt(self, base_prompt: str) -> str:
"""Enhance the system prompt with structured output instructions.
Args:
base_prompt: The original system prompt.
Returns:
Enhanced prompt with structured output guidance.
"""
@abstractmethod
def post_process_result(self, result: str) -> str:
"""Post-process the result to ensure proper string format.
Args:
result: The raw result from agent execution.
Returns:
Processed result as a string.
"""

View File

@@ -1,315 +0,0 @@
"""LangGraph agent adapter for CrewAI integration.
This module contains the LangGraphAgentAdapter class that integrates LangGraph ReAct agents
with CrewAI's agent system. Provides memory persistence, tool integration, and structured
output functionality.
"""
from collections.abc import Callable
from typing import Any, cast
from pydantic import ConfigDict, Field, PrivateAttr
from crewai.agents.agent_adapters.base_agent_adapter import BaseAgentAdapter
from crewai.agents.agent_adapters.langgraph.langgraph_tool_adapter import (
LangGraphToolAdapter,
)
from crewai.agents.agent_adapters.langgraph.protocols import (
LangGraphCheckPointMemoryModule,
LangGraphPrebuiltModule,
)
from crewai.agents.agent_adapters.langgraph.structured_output_converter import (
LangGraphConverterAdapter,
)
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.events.event_bus import crewai_event_bus
from crewai.events.types.agent_events import (
AgentExecutionCompletedEvent,
AgentExecutionErrorEvent,
AgentExecutionStartedEvent,
)
from crewai.tools.agent_tools.agent_tools import AgentTools
from crewai.tools.base_tool import BaseTool
from crewai.utilities import Logger
from crewai.utilities.converter import Converter
from crewai.utilities.import_utils import require
class LangGraphAgentAdapter(BaseAgentAdapter):
"""Adapter for LangGraph agents to work with CrewAI.
This adapter integrates LangGraph's ReAct agents with CrewAI's agent system,
providing memory persistence, tool integration, and structured output support.
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
_logger: Logger = PrivateAttr(default_factory=Logger)
_tool_adapter: LangGraphToolAdapter = PrivateAttr()
_graph: Any = PrivateAttr(default=None)
_memory: Any = PrivateAttr(default=None)
_max_iterations: int = PrivateAttr(default=10)
function_calling_llm: Any = Field(default=None)
step_callback: Callable[..., Any] | None = Field(default=None)
model: str = Field(default="gpt-4o")
verbose: bool = Field(default=False)
def __init__(
self,
role: str,
goal: str,
backstory: str,
tools: list[BaseTool] | None = None,
llm: Any = None,
max_iterations: int = 10,
agent_config: dict[str, Any] | None = None,
**kwargs,
) -> None:
"""Initialize the LangGraph agent adapter.
Args:
role: The role description for the agent.
goal: The primary goal the agent should achieve.
backstory: Background information about the agent.
tools: Optional list of tools available to the agent.
llm: Language model to use, defaults to gpt-4o.
max_iterations: Maximum number of iterations for task execution.
agent_config: Additional configuration for the LangGraph agent.
**kwargs: Additional arguments passed to the base adapter.
"""
super().__init__(
role=role,
goal=goal,
backstory=backstory,
tools=tools,
llm=llm or self.model,
agent_config=agent_config,
**kwargs,
)
self._tool_adapter = LangGraphToolAdapter(tools=tools)
self._converter_adapter: LangGraphConverterAdapter = LangGraphConverterAdapter(
self
)
self._max_iterations = max_iterations
self._setup_graph()
def _setup_graph(self) -> None:
"""Set up the LangGraph workflow graph.
Initializes the memory saver and creates a ReAct agent with the configured
tools, memory checkpointer, and debug settings.
"""
memory_saver: type[Any] = cast(
LangGraphCheckPointMemoryModule,
require(
"langgraph.checkpoint.memory",
purpose="LangGraph core functionality",
),
).MemorySaver
create_react_agent: Callable[..., Any] = cast(
LangGraphPrebuiltModule,
require(
"langgraph.prebuilt",
purpose="LangGraph core functionality",
),
).create_react_agent
self._memory = memory_saver()
converted_tools: list[Any] = self._tool_adapter.tools()
if self._agent_config:
self._graph = create_react_agent(
model=self.llm,
tools=converted_tools,
checkpointer=self._memory,
debug=self.verbose,
**self._agent_config,
)
else:
self._graph = create_react_agent(
model=self.llm,
tools=converted_tools or [],
checkpointer=self._memory,
debug=self.verbose,
)
def _build_system_prompt(self) -> str:
"""Build a system prompt for the LangGraph agent.
Creates a prompt that includes the agent's role, goal, and backstory,
then enhances it through the converter adapter for structured output.
Returns:
The complete system prompt string.
"""
base_prompt = f"""
You are {self.role}.
Your goal is: {self.goal}
Your backstory: {self.backstory}
When working on tasks, think step-by-step and use the available tools when necessary.
"""
return self._converter_adapter.enhance_system_prompt(base_prompt)
def execute_task(
self,
task: Any,
context: str | None = None,
tools: list[BaseTool] | None = None,
) -> str:
"""Execute a task using the LangGraph workflow.
Configures the agent, processes the task through the LangGraph workflow,
and handles event emission for execution tracking.
Args:
task: The task object to execute.
context: Optional context information for the task.
tools: Optional additional tools for this specific execution.
Returns:
The final answer from the task execution.
Raises:
Exception: If task execution fails.
"""
self.create_agent_executor(tools)
self.configure_structured_output(task)
try:
task_prompt = task.prompt() if hasattr(task, "prompt") else str(task)
if context:
task_prompt = self.i18n.slice("task_with_context").format(
task=task_prompt, context=context
)
crewai_event_bus.emit(
self,
event=AgentExecutionStartedEvent(
agent=self,
tools=self.tools,
task_prompt=task_prompt,
task=task,
),
)
session_id = f"task_{id(task)}"
config: dict[str, dict[str, str]] = {
"configurable": {"thread_id": session_id}
}
result: dict[str, Any] = self._graph.invoke(
{
"messages": [
("system", self._build_system_prompt()),
("user", task_prompt),
]
},
config,
)
messages: list[Any] = result.get("messages", [])
last_message: Any = messages[-1] if messages else None
final_answer: str = ""
if isinstance(last_message, dict):
final_answer = last_message.get("content", "")
elif hasattr(last_message, "content"):
final_answer = getattr(last_message, "content", "")
final_answer = (
self._converter_adapter.post_process_result(final_answer)
or "Task execution completed but no clear answer was provided."
)
crewai_event_bus.emit(
self,
event=AgentExecutionCompletedEvent(
agent=self, task=task, output=final_answer
),
)
return final_answer
except Exception as e:
self._logger.log("error", f"Error executing LangGraph task: {e!s}")
crewai_event_bus.emit(
self,
event=AgentExecutionErrorEvent(
agent=self,
task=task,
error=str(e),
),
)
raise
def create_agent_executor(self, tools: list[BaseTool] | None = None) -> None:
"""Configure the LangGraph agent for execution.
Args:
tools: Optional tools to configure for the agent.
"""
self.configure_tools(tools)
def configure_tools(self, tools: list[BaseTool] | None = None) -> None:
"""Configure tools for the LangGraph agent.
Merges additional tools with existing ones and updates the graph's
available tools through the tool adapter.
Args:
tools: Optional additional tools to configure.
"""
if tools:
all_tools: list[BaseTool] = list(self.tools or []) + list(tools or [])
self._tool_adapter.configure_tools(all_tools)
available_tools: list[Any] = self._tool_adapter.tools()
self._graph.tools = available_tools
def get_delegation_tools(self, agents: list[BaseAgent]) -> list[BaseTool]:
"""Implement delegation tools support for LangGraph.
Creates delegation tools that allow this agent to delegate tasks to other agents.
Args:
agents: List of agents available for delegation.
Returns:
List of delegation tools.
"""
agent_tools: AgentTools = AgentTools(agents=agents)
return agent_tools.tools()
@staticmethod
def get_output_converter(
llm: Any, text: str, model: Any, instructions: str
) -> Converter:
"""Convert output format if needed.
Args:
llm: Language model instance.
text: Text to convert.
model: Model configuration.
instructions: Conversion instructions.
Returns:
Converter instance for output transformation.
"""
return Converter(llm=llm, text=text, model=model, instructions=instructions)
def configure_structured_output(self, task: Any) -> None:
"""Configure the structured output for LangGraph.
Uses the converter adapter to set up structured output formatting
based on the task requirements.
Args:
task: Task object containing output requirements.
"""
self._converter_adapter.configure_structured_output(task)

View File

@@ -1,100 +0,0 @@
"""LangGraph tool adapter for CrewAI tool integration.
This module contains the LangGraphToolAdapter class that converts CrewAI tools
to LangGraph-compatible format using langchain_core.tools.
"""
import inspect
from collections.abc import Awaitable
from typing import Any
from crewai.agents.agent_adapters.base_tool_adapter import BaseToolAdapter
from crewai.tools.base_tool import BaseTool
class LangGraphToolAdapter(BaseToolAdapter):
"""Adapts CrewAI tools to LangGraph agent tool compatible format.
Converts CrewAI BaseTool instances to langchain_core.tools format
that can be used by LangGraph agents.
"""
def __init__(self, tools: list[BaseTool] | None = None) -> None:
"""Initialize the tool adapter.
Args:
tools: Optional list of CrewAI tools to adapt.
"""
super().__init__()
self.original_tools: list[BaseTool] = tools or []
self.converted_tools: list[Any] = []
def configure_tools(self, tools: list[BaseTool]) -> None:
"""Configure and convert CrewAI tools to LangGraph-compatible format.
LangGraph expects tools in langchain_core.tools format. This method
converts CrewAI BaseTool instances to StructuredTool instances.
Args:
tools: List of CrewAI tools to convert.
"""
from langchain_core.tools import BaseTool as LangChainBaseTool
from langchain_core.tools import StructuredTool
converted_tools: list[Any] = []
if self.original_tools:
all_tools: list[BaseTool] = tools + self.original_tools
else:
all_tools = tools
for tool in all_tools:
if isinstance(tool, LangChainBaseTool):
converted_tools.append(tool)
continue
sanitized_name: str = self.sanitize_tool_name(tool.name)
async def tool_wrapper(
*args: Any, tool: BaseTool = tool, **kwargs: Any
) -> Any:
"""Wrapper function to adapt CrewAI tool calls to LangGraph format.
Args:
*args: Positional arguments for the tool.
tool: The CrewAI tool to wrap.
**kwargs: Keyword arguments for the tool.
Returns:
The result from the tool execution.
"""
output: Any | Awaitable[Any]
if len(args) > 0 and isinstance(args[0], str):
output = tool.run(args[0])
elif "input" in kwargs:
output = tool.run(kwargs["input"])
else:
output = tool.run(**kwargs)
if inspect.isawaitable(output):
result: Any = await output
else:
result = output
return result
converted_tool: StructuredTool = StructuredTool(
name=sanitized_name,
description=tool.description,
func=tool_wrapper,
args_schema=tool.args_schema,
)
converted_tools.append(converted_tool)
self.converted_tools = converted_tools
def tools(self) -> list[Any]:
"""Get the list of converted tools.
Returns:
List of LangGraph-compatible tools.
"""
return self.converted_tools or []

View File

@@ -1,55 +0,0 @@
"""Type protocols for LangGraph modules."""
from typing import Any, Protocol, runtime_checkable
@runtime_checkable
class LangGraphMemorySaver(Protocol):
"""Protocol for LangGraph MemorySaver.
Defines the interface for LangGraph's memory persistence mechanism.
"""
def __init__(self) -> None:
"""Initialize the memory saver."""
...
@runtime_checkable
class LangGraphCheckPointMemoryModule(Protocol):
"""Protocol for LangGraph checkpoint memory module.
Defines the interface for modules containing memory checkpoint functionality.
"""
MemorySaver: type[LangGraphMemorySaver]
@runtime_checkable
class LangGraphPrebuiltModule(Protocol):
"""Protocol for LangGraph prebuilt module.
Defines the interface for modules containing prebuilt agent factories.
"""
def create_react_agent(
self,
model: Any,
tools: list[Any],
checkpointer: Any,
debug: bool = False,
**kwargs: Any,
) -> Any:
"""Create a ReAct agent with the given configuration.
Args:
model: The language model to use for the agent.
tools: List of tools available to the agent.
checkpointer: Memory checkpointer for state persistence.
debug: Whether to enable debug mode.
**kwargs: Additional configuration options.
Returns:
The configured ReAct agent instance.
"""
...

View File

@@ -1,242 +0,0 @@
"""OpenAI agents adapter for CrewAI integration.
This module contains the OpenAIAgentAdapter class that integrates OpenAI Assistants
with CrewAI's agent system, providing tool integration and structured output support.
"""
from typing import Any, cast
from pydantic import ConfigDict, Field, PrivateAttr
from typing_extensions import Unpack
from crewai.agents.agent_adapters.base_agent_adapter import BaseAgentAdapter
from crewai.agents.agent_adapters.openai_agents.openai_agent_tool_adapter import (
OpenAIAgentToolAdapter,
)
from crewai.agents.agent_adapters.openai_agents.protocols import (
AgentKwargs,
OpenAIAgentsModule,
)
from crewai.agents.agent_adapters.openai_agents.protocols import (
OpenAIAgent as OpenAIAgentProtocol,
)
from crewai.agents.agent_adapters.openai_agents.structured_output_converter import (
OpenAIConverterAdapter,
)
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.events.event_bus import crewai_event_bus
from crewai.events.types.agent_events import (
AgentExecutionCompletedEvent,
AgentExecutionErrorEvent,
AgentExecutionStartedEvent,
)
from crewai.tools import BaseTool
from crewai.tools.agent_tools.agent_tools import AgentTools
from crewai.utilities import Logger
from crewai.utilities.import_utils import require
openai_agents_module = cast(
OpenAIAgentsModule,
require(
"agents",
purpose="OpenAI agents functionality",
),
)
OpenAIAgent = openai_agents_module.Agent
Runner = openai_agents_module.Runner
enable_verbose_stdout_logging = openai_agents_module.enable_verbose_stdout_logging
class OpenAIAgentAdapter(BaseAgentAdapter):
"""Adapter for OpenAI Assistants.
Integrates OpenAI Assistants API with CrewAI's agent system, providing
tool configuration, structured output handling, and task execution.
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
_openai_agent: OpenAIAgentProtocol = PrivateAttr()
_logger: Logger = PrivateAttr(default_factory=Logger)
_active_thread: str | None = PrivateAttr(default=None)
function_calling_llm: Any = Field(default=None)
step_callback: Any = Field(default=None)
_tool_adapter: OpenAIAgentToolAdapter = PrivateAttr()
_converter_adapter: OpenAIConverterAdapter = PrivateAttr()
def __init__(
self,
**kwargs: Unpack[AgentKwargs],
) -> None:
"""Initialize the OpenAI agent adapter.
Args:
**kwargs: All initialization arguments including role, goal, backstory,
model, tools, and agent_config.
Raises:
ImportError: If OpenAI agent dependencies are not installed.
"""
self.llm = kwargs.pop("model", "gpt-4o-mini")
super().__init__(**kwargs)
self._tool_adapter = OpenAIAgentToolAdapter(tools=kwargs.get("tools"))
self._converter_adapter = OpenAIConverterAdapter(agent_adapter=self)
def _build_system_prompt(self) -> str:
"""Build a system prompt for the OpenAI agent.
Creates a prompt containing the agent's role, goal, and backstory,
then enhances it with structured output instructions if needed.
Returns:
The complete system prompt string.
"""
base_prompt = f"""
You are {self.role}.
Your goal is: {self.goal}
Your backstory: {self.backstory}
When working on tasks, think step-by-step and use the available tools when necessary.
"""
return self._converter_adapter.enhance_system_prompt(base_prompt)
def execute_task(
self,
task: Any,
context: str | None = None,
tools: list[BaseTool] | None = None,
) -> str:
"""Execute a task using the OpenAI Assistant.
Configures the assistant, processes the task, and handles event emission
for execution tracking.
Args:
task: The task object to execute.
context: Optional context information for the task.
tools: Optional additional tools for this execution.
Returns:
The final answer from the task execution.
Raises:
Exception: If task execution fails.
"""
self._converter_adapter.configure_structured_output(task)
self.create_agent_executor(tools)
if self.verbose:
enable_verbose_stdout_logging()
try:
task_prompt: str = task.prompt()
if context:
task_prompt = self.i18n.slice("task_with_context").format(
task=task_prompt, context=context
)
crewai_event_bus.emit(
self,
event=AgentExecutionStartedEvent(
agent=self,
tools=self.tools,
task_prompt=task_prompt,
task=task,
),
)
result: Any = self.agent_executor.run_sync(self._openai_agent, task_prompt)
final_answer: str = self.handle_execution_result(result)
crewai_event_bus.emit(
self,
event=AgentExecutionCompletedEvent(
agent=self, task=task, output=final_answer
),
)
return final_answer
except Exception as e:
self._logger.log("error", f"Error executing OpenAI task: {e!s}")
crewai_event_bus.emit(
self,
event=AgentExecutionErrorEvent(
agent=self,
task=task,
error=str(e),
),
)
raise
def create_agent_executor(self, tools: list[BaseTool] | None = None) -> None:
"""Configure the OpenAI agent for execution.
While OpenAI handles execution differently through Runner,
this method sets up tools and agent configuration.
Args:
tools: Optional tools to configure for the agent.
Notes:
TODO: Properly type agent_executor in BaseAgent to avoid type issues
when assigning Runner class to this attribute.
"""
all_tools: list[BaseTool] = list(self.tools or []) + list(tools or [])
instructions: str = self._build_system_prompt()
self._openai_agent = OpenAIAgent(
name=self.role,
instructions=instructions,
model=self.llm,
**self._agent_config or {},
)
if all_tools:
self.configure_tools(all_tools)
self.agent_executor = Runner
def configure_tools(self, tools: list[BaseTool] | None = None) -> None:
"""Configure tools for the OpenAI Assistant.
Args:
tools: Optional tools to configure for the assistant.
"""
if tools:
self._tool_adapter.configure_tools(tools)
if self._tool_adapter.converted_tools:
self._openai_agent.tools = self._tool_adapter.converted_tools
def handle_execution_result(self, result: Any) -> str:
"""Process OpenAI Assistant execution result.
Converts any structured output to a string through the converter adapter.
Args:
result: The execution result from the OpenAI assistant.
Returns:
Processed result as a string.
"""
return self._converter_adapter.post_process_result(result.final_output)
def get_delegation_tools(self, agents: list[BaseAgent]) -> list[BaseTool]:
"""Implement delegation tools support.
Creates delegation tools that allow this agent to delegate tasks to other agents.
Args:
agents: List of agents available for delegation.
Returns:
List of delegation tools.
"""
agent_tools: AgentTools = AgentTools(agents=agents)
return agent_tools.tools()
def configure_structured_output(self, task: Any) -> None:
"""Configure the structured output for the specific agent implementation.
Args:
task: The task object containing output format specifications.
"""
self._converter_adapter.configure_structured_output(task)

View File

@@ -1,162 +0,0 @@
"""OpenAI agent tool adapter for CrewAI tool integration.
This module contains the OpenAIAgentToolAdapter class that converts CrewAI tools
to OpenAI Assistant-compatible format using the agents library.
"""
import inspect
import json
import re
from collections.abc import Awaitable
from typing import Any, cast
from crewai.agents.agent_adapters.base_tool_adapter import BaseToolAdapter
from crewai.agents.agent_adapters.openai_agents.protocols import (
OpenAIFunctionTool,
OpenAITool,
)
from crewai.tools import BaseTool
from crewai.utilities.import_utils import require
agents_module = cast(
Any,
require(
"agents",
purpose="OpenAI agents functionality",
),
)
FunctionTool = agents_module.FunctionTool
Tool = agents_module.Tool
class OpenAIAgentToolAdapter(BaseToolAdapter):
"""Adapter for OpenAI Assistant tools.
Converts CrewAI BaseTool instances to OpenAI Assistant FunctionTool format
that can be used by OpenAI agents.
"""
def __init__(self, tools: list[BaseTool] | None = None) -> None:
"""Initialize the tool adapter.
Args:
tools: Optional list of CrewAI tools to adapt.
"""
super().__init__()
self.original_tools: list[BaseTool] = tools or []
self.converted_tools: list[OpenAITool] = []
def configure_tools(self, tools: list[BaseTool]) -> None:
"""Configure tools for the OpenAI Assistant.
Merges provided tools with original tools and converts them to
OpenAI Assistant format.
Args:
tools: List of CrewAI tools to configure.
"""
if self.original_tools:
all_tools: list[BaseTool] = tools + self.original_tools
else:
all_tools = tools
if all_tools:
self.converted_tools = self._convert_tools_to_openai_format(all_tools)
@staticmethod
def _convert_tools_to_openai_format(
tools: list[BaseTool] | None,
) -> list[OpenAITool]:
"""Convert CrewAI tools to OpenAI Assistant tool format.
Args:
tools: List of CrewAI tools to convert.
Returns:
List of OpenAI Assistant FunctionTool instances.
"""
if not tools:
return []
def sanitize_tool_name(name: str) -> str:
"""Convert tool name to match OpenAI's required pattern.
Args:
name: Original tool name.
Returns:
Sanitized tool name matching OpenAI requirements.
"""
return re.sub(r"[^a-zA-Z0-9_-]", "_", name).lower()
def create_tool_wrapper(tool: BaseTool) -> Any:
"""Create a wrapper function that handles the OpenAI function tool interface.
Args:
tool: The CrewAI tool to wrap.
Returns:
Async wrapper function for OpenAI agent integration.
"""
async def wrapper(context_wrapper: Any, arguments: Any) -> Any:
"""Wrapper function to adapt CrewAI tool calls to OpenAI format.
Args:
context_wrapper: OpenAI context wrapper.
arguments: Tool arguments from OpenAI.
Returns:
Tool execution result.
"""
# Get the parameter name from the schema
param_name: str = next(
iter(tool.args_schema.model_json_schema()["properties"].keys())
)
# Handle different argument types
args_dict: dict[str, Any]
if isinstance(arguments, dict):
args_dict = arguments
elif isinstance(arguments, str):
try:
args_dict = json.loads(arguments)
except json.JSONDecodeError:
args_dict = {param_name: arguments}
else:
args_dict = {param_name: str(arguments)}
# Run the tool with the processed arguments
output: Any | Awaitable[Any] = tool._run(**args_dict)
# Await if the tool returned a coroutine
if inspect.isawaitable(output):
result: Any = await output
else:
result = output
# Ensure the result is JSON serializable
if isinstance(result, (dict, list, str, int, float, bool, type(None))):
return result
return str(result)
return wrapper
openai_tools: list[OpenAITool] = []
for tool in tools:
schema: dict[str, Any] = tool.args_schema.model_json_schema()
schema.update({"additionalProperties": False, "type": "object"})
openai_tool: OpenAIFunctionTool = cast(
OpenAIFunctionTool,
FunctionTool(
name=sanitize_tool_name(tool.name),
description=tool.description,
params_json_schema=schema,
on_invoke_tool=create_tool_wrapper(tool),
),
)
openai_tools.append(openai_tool)
return openai_tools

View File

@@ -1,74 +0,0 @@
"""Type protocols for OpenAI agents modules."""
from collections.abc import Callable
from typing import Any, Protocol, TypedDict, runtime_checkable
from crewai.tools.base_tool import BaseTool
class AgentKwargs(TypedDict, total=False):
"""Typed dict for agent initialization kwargs."""
role: str
goal: str
backstory: str
model: str
tools: list[BaseTool] | None
agent_config: dict[str, Any] | None
@runtime_checkable
class OpenAIAgent(Protocol):
"""Protocol for OpenAI Agent."""
def __init__(
self,
name: str,
instructions: str,
model: str,
**kwargs: Any,
) -> None:
"""Initialize the OpenAI agent."""
...
tools: list[Any]
output_type: Any
@runtime_checkable
class OpenAIRunner(Protocol):
"""Protocol for OpenAI Runner."""
@classmethod
def run_sync(cls, agent: OpenAIAgent, message: str) -> Any:
"""Run agent synchronously with a message."""
...
@runtime_checkable
class OpenAIAgentsModule(Protocol):
"""Protocol for OpenAI agents module."""
Agent: type[OpenAIAgent]
Runner: type[OpenAIRunner]
enable_verbose_stdout_logging: Callable[[], None]
@runtime_checkable
class OpenAITool(Protocol):
"""Protocol for OpenAI Tool."""
@runtime_checkable
class OpenAIFunctionTool(Protocol):
"""Protocol for OpenAI FunctionTool."""
def __init__(
self,
name: str,
description: str,
params_json_schema: dict[str, Any],
on_invoke_tool: Any,
) -> None:
"""Initialize the function tool."""
...

View File

@@ -1,56 +0,0 @@
"""Base output converter for transforming text into structured formats."""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any
from pydantic import BaseModel, Field
class OutputConverter(BaseModel, ABC):
"""Abstract base class for converting text to structured formats.
Uses language models to transform unstructured text into either Pydantic models
or JSON objects based on provided instructions and target schemas.
Attributes:
text: The input text to be converted.
llm: The language model used for conversion.
model: The target Pydantic model class for structuring output.
instructions: Specific instructions for the conversion process.
max_attempts: Maximum number of conversion attempts (default: 3).
"""
text: str = Field(description="Text to be converted.")
llm: Any = Field(description="The language model to be used to convert the text.")
model: type[BaseModel] = Field(
description="The model to be used to convert the text."
)
instructions: str = Field(description="Conversion instructions to the LLM.")
max_attempts: int = Field(
description="Max number of attempts to try to get the output formatted.",
default=3,
)
@abstractmethod
def to_pydantic(self, current_attempt: int = 1) -> BaseModel:
"""Convert text to a Pydantic model instance.
Args:
current_attempt: Current attempt number for retry logic.
Returns:
Pydantic model instance with structured data.
"""
@abstractmethod
def to_json(self, current_attempt: int = 1) -> dict[str, Any]:
"""Convert text to a JSON dictionary.
Args:
current_attempt: Current attempt number for retry logic.
Returns:
Dictionary containing structured JSON data.
"""

View File

@@ -1,75 +0,0 @@
"""Token usage tracking utilities.
This module provides utilities for tracking token consumption and request
metrics during agent execution.
"""
from crewai.types.usage_metrics import UsageMetrics
class TokenProcess:
"""Track token usage during agent processing.
Attributes:
total_tokens: Total number of tokens used.
prompt_tokens: Number of tokens used in prompts.
cached_prompt_tokens: Number of cached prompt tokens used.
completion_tokens: Number of tokens used in completions.
successful_requests: Number of successful requests made.
"""
def __init__(self) -> None:
"""Initialize token tracking with zero values."""
self.total_tokens: int = 0
self.prompt_tokens: int = 0
self.cached_prompt_tokens: int = 0
self.completion_tokens: int = 0
self.successful_requests: int = 0
def sum_prompt_tokens(self, tokens: int) -> None:
"""Add prompt tokens to the running totals.
Args:
tokens: Number of prompt tokens to add.
"""
self.prompt_tokens += tokens
self.total_tokens += tokens
def sum_completion_tokens(self, tokens: int) -> None:
"""Add completion tokens to the running totals.
Args:
tokens: Number of completion tokens to add.
"""
self.completion_tokens += tokens
self.total_tokens += tokens
def sum_cached_prompt_tokens(self, tokens: int) -> None:
"""Add cached prompt tokens to the running total.
Args:
tokens: Number of cached prompt tokens to add.
"""
self.cached_prompt_tokens += tokens
def sum_successful_requests(self, requests: int) -> None:
"""Add successful requests to the running total.
Args:
requests: Number of successful requests to add.
"""
self.successful_requests += requests
def get_summary(self) -> UsageMetrics:
"""Get a summary of all tracked metrics.
Returns:
UsageMetrics object with current totals.
"""
return UsageMetrics(
total_tokens=self.total_tokens,
prompt_tokens=self.prompt_tokens,
cached_prompt_tokens=self.cached_prompt_tokens,
completion_tokens=self.completion_tokens,
successful_requests=self.successful_requests,
)

View File

@@ -1,45 +0,0 @@
"""Cache handler for tool usage results."""
from typing import Any
from pydantic import BaseModel, PrivateAttr
class CacheHandler(BaseModel):
"""Handles caching of tool execution results.
Provides in-memory caching for tool outputs based on tool name and input.
Notes:
- TODO: Make thread-safe.
"""
_cache: dict[str, Any] = PrivateAttr(default_factory=dict)
def add(self, tool: str, input: str, output: Any) -> None:
"""Add a tool result to the cache.
Args:
tool: Name of the tool.
input: Input string used for the tool.
output: Output result from tool execution.
Notes:
- TODO: Rename 'input' parameter to avoid shadowing builtin.
"""
self._cache[f"{tool}-{input}"] = output
def read(self, tool: str, input: str) -> Any | None:
"""Retrieve a cached tool result.
Args:
tool: Name of the tool.
input: Input string used for the tool.
Returns:
Cached result if found, None otherwise.
Notes:
- TODO: Rename 'input' parameter to avoid shadowing builtin.
"""
return self._cache.get(f"{tool}-{input}")

View File

@@ -1,27 +0,0 @@
import contextvars
import os
from contextlib import contextmanager
_platform_integration_token: contextvars.ContextVar[str | None] = (
contextvars.ContextVar("platform_integration_token", default=None)
)
def set_platform_integration_token(integration_token: str) -> None:
_platform_integration_token.set(integration_token)
def get_platform_integration_token() -> str | None:
token = _platform_integration_token.get()
if token is None:
token = os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN")
return token
@contextmanager
def platform_context(integration_token: str):
token = _platform_integration_token.set(integration_token)
try:
yield
finally:
_platform_integration_token.reset(token)

View File

@@ -1,166 +0,0 @@
"""CrewAI events system for monitoring and extending agent behavior.
This module provides the event infrastructure that allows users to:
- Monitor agent, task, and crew execution
- Track memory operations and performance
- Build custom logging and analytics
- Extend CrewAI with custom event handlers
"""
from crewai.events.base_event_listener import BaseEventListener
from crewai.events.event_bus import crewai_event_bus
from crewai.events.types.agent_events import (
AgentEvaluationCompletedEvent,
AgentEvaluationFailedEvent,
AgentEvaluationStartedEvent,
AgentExecutionCompletedEvent,
AgentExecutionErrorEvent,
AgentExecutionStartedEvent,
LiteAgentExecutionCompletedEvent,
LiteAgentExecutionErrorEvent,
LiteAgentExecutionStartedEvent,
)
from crewai.events.types.crew_events import (
CrewKickoffCompletedEvent,
CrewKickoffFailedEvent,
CrewKickoffStartedEvent,
CrewTestCompletedEvent,
CrewTestFailedEvent,
CrewTestResultEvent,
CrewTestStartedEvent,
CrewTrainCompletedEvent,
CrewTrainFailedEvent,
CrewTrainStartedEvent,
)
from crewai.events.types.flow_events import (
FlowCreatedEvent,
FlowEvent,
FlowFinishedEvent,
FlowPlotEvent,
FlowStartedEvent,
MethodExecutionFailedEvent,
MethodExecutionFinishedEvent,
MethodExecutionStartedEvent,
)
from crewai.events.types.knowledge_events import (
KnowledgeQueryCompletedEvent,
KnowledgeQueryFailedEvent,
KnowledgeQueryStartedEvent,
KnowledgeRetrievalCompletedEvent,
KnowledgeRetrievalStartedEvent,
KnowledgeSearchQueryFailedEvent,
)
from crewai.events.types.llm_events import (
LLMCallCompletedEvent,
LLMCallFailedEvent,
LLMCallStartedEvent,
LLMStreamChunkEvent,
)
from crewai.events.types.llm_guardrail_events import (
LLMGuardrailCompletedEvent,
LLMGuardrailStartedEvent,
)
from crewai.events.types.logging_events import (
AgentLogsExecutionEvent,
AgentLogsStartedEvent,
)
from crewai.events.types.memory_events import (
MemoryQueryCompletedEvent,
MemoryQueryFailedEvent,
MemoryQueryStartedEvent,
MemoryRetrievalCompletedEvent,
MemoryRetrievalStartedEvent,
MemorySaveCompletedEvent,
MemorySaveFailedEvent,
MemorySaveStartedEvent,
)
from crewai.events.types.reasoning_events import (
AgentReasoningCompletedEvent,
AgentReasoningFailedEvent,
AgentReasoningStartedEvent,
ReasoningEvent,
)
from crewai.events.types.task_events import (
TaskCompletedEvent,
TaskEvaluationEvent,
TaskFailedEvent,
TaskStartedEvent,
)
from crewai.events.types.tool_usage_events import (
ToolExecutionErrorEvent,
ToolSelectionErrorEvent,
ToolUsageErrorEvent,
ToolUsageEvent,
ToolUsageFinishedEvent,
ToolUsageStartedEvent,
ToolValidateInputErrorEvent,
)
__all__ = [
"AgentEvaluationCompletedEvent",
"AgentEvaluationFailedEvent",
"AgentEvaluationStartedEvent",
"AgentExecutionCompletedEvent",
"AgentExecutionErrorEvent",
"AgentExecutionStartedEvent",
"AgentLogsExecutionEvent",
"AgentLogsStartedEvent",
"AgentReasoningCompletedEvent",
"AgentReasoningFailedEvent",
"AgentReasoningStartedEvent",
"BaseEventListener",
"CrewKickoffCompletedEvent",
"CrewKickoffFailedEvent",
"CrewKickoffStartedEvent",
"CrewTestCompletedEvent",
"CrewTestFailedEvent",
"CrewTestResultEvent",
"CrewTestStartedEvent",
"CrewTrainCompletedEvent",
"CrewTrainFailedEvent",
"CrewTrainStartedEvent",
"FlowCreatedEvent",
"FlowEvent",
"FlowFinishedEvent",
"FlowPlotEvent",
"FlowStartedEvent",
"KnowledgeQueryCompletedEvent",
"KnowledgeQueryFailedEvent",
"KnowledgeQueryStartedEvent",
"KnowledgeRetrievalCompletedEvent",
"KnowledgeRetrievalStartedEvent",
"KnowledgeSearchQueryFailedEvent",
"LLMCallCompletedEvent",
"LLMCallFailedEvent",
"LLMCallStartedEvent",
"LLMGuardrailCompletedEvent",
"LLMGuardrailStartedEvent",
"LLMStreamChunkEvent",
"LiteAgentExecutionCompletedEvent",
"LiteAgentExecutionErrorEvent",
"LiteAgentExecutionStartedEvent",
"MemoryQueryCompletedEvent",
"MemoryQueryFailedEvent",
"MemoryQueryStartedEvent",
"MemoryRetrievalCompletedEvent",
"MemoryRetrievalStartedEvent",
"MemorySaveCompletedEvent",
"MemorySaveFailedEvent",
"MemorySaveStartedEvent",
"MethodExecutionFailedEvent",
"MethodExecutionFinishedEvent",
"MethodExecutionStartedEvent",
"ReasoningEvent",
"TaskCompletedEvent",
"TaskEvaluationEvent",
"TaskFailedEvent",
"TaskStartedEvent",
"ToolExecutionErrorEvent",
"ToolSelectionErrorEvent",
"ToolUsageErrorEvent",
"ToolUsageEvent",
"ToolUsageFinishedEvent",
"ToolUsageStartedEvent",
"ToolValidateInputErrorEvent",
"crewai_event_bus",
]

View File

@@ -1,229 +0,0 @@
import logging
import uuid
import webbrowser
from pathlib import Path
from rich.console import Console
from rich.panel import Panel
from crewai.events.listeners.tracing.trace_batch_manager import TraceBatchManager
from crewai.events.listeners.tracing.utils import (
mark_first_execution_completed,
prompt_user_for_trace_viewing,
should_auto_collect_first_time_traces,
)
logger = logging.getLogger(__name__)
def _update_or_create_env_file():
"""Update or create .env file with CREWAI_TRACING_ENABLED=true."""
env_path = Path(".env")
env_content = ""
variable_name = "CREWAI_TRACING_ENABLED"
variable_value = "true"
# Read existing content if file exists
if env_path.exists():
with open(env_path, "r") as f:
env_content = f.read()
# Check if CREWAI_TRACING_ENABLED is already set
lines = env_content.splitlines()
variable_exists = False
updated_lines = []
for line in lines:
if line.strip().startswith(f"{variable_name}="):
# Update existing variable
updated_lines.append(f"{variable_name}={variable_value}")
variable_exists = True
else:
updated_lines.append(line)
# Add variable if it doesn't exist
if not variable_exists:
if updated_lines and not updated_lines[-1].strip():
# If last line is empty, replace it
updated_lines[-1] = f"{variable_name}={variable_value}"
else:
# Add new line and then the variable
updated_lines.append(f"{variable_name}={variable_value}")
# Write updated content
with open(env_path, "w") as f:
f.write("\n".join(updated_lines))
if updated_lines: # Add final newline if there's content
f.write("\n")
class FirstTimeTraceHandler:
"""Handles the first-time user trace collection and display flow."""
def __init__(self):
self.is_first_time: bool = False
self.collected_events: bool = False
self.trace_batch_id: str | None = None
self.ephemeral_url: str | None = None
self.batch_manager: TraceBatchManager | None = None
def initialize_for_first_time_user(self) -> bool:
"""Check if this is first time and initialize collection."""
self.is_first_time = should_auto_collect_first_time_traces()
return self.is_first_time
def set_batch_manager(self, batch_manager: TraceBatchManager):
"""Set reference to batch manager for sending events."""
self.batch_manager = batch_manager
def mark_events_collected(self):
"""Mark that events have been collected during execution."""
self.collected_events = True
def handle_execution_completion(self):
"""Handle the completion flow as shown in your diagram."""
if not self.is_first_time or not self.collected_events:
return
try:
user_wants_traces = prompt_user_for_trace_viewing(timeout_seconds=20)
if user_wants_traces:
self._initialize_backend_and_send_events()
# Enable tracing for future runs by updating .env file
try:
_update_or_create_env_file()
except Exception: # noqa: S110
pass
if self.ephemeral_url:
self._display_ephemeral_trace_link()
mark_first_execution_completed()
except Exception as e:
self._gracefully_fail(f"Error in trace handling: {e}")
mark_first_execution_completed()
def _initialize_backend_and_send_events(self):
"""Initialize backend batch and send collected events."""
if not self.batch_manager:
return
try:
if not self.batch_manager.backend_initialized:
original_metadata = (
self.batch_manager.current_batch.execution_metadata
if self.batch_manager.current_batch
else {}
)
user_context = {
"privacy_level": "standard",
"user_id": "first_time_user",
"session_id": str(uuid.uuid4()),
"trace_id": self.batch_manager.trace_batch_id,
}
execution_metadata = {
"execution_type": original_metadata.get("execution_type", "crew"),
"crew_name": original_metadata.get(
"crew_name", "First Time Execution"
),
"flow_name": original_metadata.get("flow_name"),
"agent_count": original_metadata.get("agent_count", 1),
"task_count": original_metadata.get("task_count", 1),
"crewai_version": original_metadata.get("crewai_version"),
}
self.batch_manager._initialize_backend_batch(
user_context=user_context,
execution_metadata=execution_metadata,
use_ephemeral=True,
)
self.batch_manager.backend_initialized = True
if self.batch_manager.event_buffer:
self.batch_manager._send_events_to_backend()
self.batch_manager.finalize_batch()
self.ephemeral_url = self.batch_manager.ephemeral_trace_url
if not self.ephemeral_url:
self._show_local_trace_message()
except Exception as e:
self._gracefully_fail(f"Backend initialization failed: {e}")
def _display_ephemeral_trace_link(self):
"""Display the ephemeral trace link to the user and automatically open browser."""
console = Console()
try:
webbrowser.open(self.ephemeral_url)
except Exception: # noqa: S110
pass
panel_content = f"""
🎉 Your First CrewAI Execution Trace is Ready!
View your execution details here:
{self.ephemeral_url}
This trace shows:
• Agent decisions and interactions
• Task execution timeline
• Tool usage and results
• LLM calls and responses
✅ Tracing has been enabled for future runs! (CREWAI_TRACING_ENABLED=true added to .env)
You can also add tracing=True to your Crew(tracing=True) / Flow(tracing=True) for more control.
📝 Note: This link will expire in 24 hours.
""".strip()
panel = Panel(
panel_content,
title="🔍 Execution Trace Generated",
border_style="bright_green",
padding=(1, 2),
)
console.print("\n")
console.print(panel)
console.print()
def _gracefully_fail(self, error_message: str):
"""Handle errors gracefully without disrupting user experience."""
console = Console()
console.print(f"[yellow]Note: {error_message}[/yellow]")
logger.debug(f"First-time trace error: {error_message}")
def _show_local_trace_message(self):
"""Show message when traces were collected locally but couldn't be uploaded."""
console = Console()
panel_content = f"""
📊 Your execution traces were collected locally!
Unfortunately, we couldn't upload them to the server right now, but here's what we captured:
{len(self.batch_manager.event_buffer)} trace events
• Execution duration: {self.batch_manager.calculate_duration("execution")}ms
• Batch ID: {self.batch_manager.trace_batch_id}
Tracing has been enabled for future runs! (CREWAI_TRACING_ENABLED=true added to .env)
The traces include agent decisions, task execution, and tool usage.
""".strip()
panel = Panel(
panel_content,
title="🔍 Local Traces Collected",
border_style="yellow",
padding=(1, 2),
)
console.print("\n")
console.print(panel)
console.print()

View File

@@ -1,379 +0,0 @@
import getpass
import hashlib
import json
import logging
import os
import platform
import re
import subprocess
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any
import click
from rich.console import Console
from rich.panel import Panel
from rich.text import Text
from crewai.utilities.paths import db_storage_path
from crewai.utilities.serialization import to_serializable
logger = logging.getLogger(__name__)
def is_tracing_enabled() -> bool:
return os.getenv("CREWAI_TRACING_ENABLED", "false").lower() == "true"
def on_first_execution_tracing_confirmation() -> bool:
if _is_test_environment():
return False
if is_first_execution():
mark_first_execution_done()
return click.confirm(
"This is the first execution of CrewAI. Do you want to enable tracing?",
default=True,
show_default=True,
)
return False
def _is_test_environment() -> bool:
"""Detect if we're running in a test environment."""
return os.environ.get("CREWAI_TESTING", "").lower() == "true"
def _get_machine_id() -> str:
"""Stable, privacy-preserving machine fingerprint (cross-platform)."""
parts = []
try:
mac = ":".join(
[f"{(uuid.getnode() >> b) & 0xFF:02x}" for b in range(0, 12, 2)][::-1]
)
parts.append(mac)
except Exception: # noqa: S110
pass
try:
sysname = platform.system()
parts.append(sysname)
except Exception:
sysname = "unknown"
parts.append(sysname)
try:
if sysname == "Darwin":
try:
res = subprocess.run(
["/usr/sbin/system_profiler", "SPHardwareDataType"],
capture_output=True,
text=True,
timeout=2,
)
m = re.search(r"Hardware UUID:\s*([A-Fa-f0-9\-]+)", res.stdout)
if m:
parts.append(m.group(1))
except Exception: # noqa: S110
pass
elif sysname == "Linux":
linux_id = _get_linux_machine_id()
if linux_id:
parts.append(linux_id)
elif sysname == "Windows":
try:
res = subprocess.run(
[
"C:\\Windows\\System32\\wbem\\wmic.exe",
"csproduct",
"get",
"UUID",
],
capture_output=True,
text=True,
timeout=2,
)
lines = [
line.strip() for line in res.stdout.splitlines() if line.strip()
]
if len(lines) >= 2:
parts.append(lines[1])
except Exception: # noqa: S110
pass
else:
generic_id = _get_generic_system_id()
if generic_id:
parts.append(generic_id)
except Exception: # noqa: S110
pass
if len(parts) <= 1:
try:
import socket
parts.append(socket.gethostname())
except Exception: # noqa: S110
pass
try:
parts.append(getpass.getuser())
except Exception: # noqa: S110
pass
try:
parts.append(platform.machine())
parts.append(platform.processor())
except Exception: # noqa: S110
pass
if not parts:
parts.append("unknown-system")
parts.append(str(uuid.uuid4()))
return hashlib.sha256("".join(parts).encode()).hexdigest()
def _get_linux_machine_id() -> str | None:
linux_id_sources = [
"/etc/machine-id",
"/sys/class/dmi/id/product_uuid",
"/proc/sys/kernel/random/boot_id",
"/sys/class/dmi/id/board_serial",
"/sys/class/dmi/id/chassis_serial",
]
for source in linux_id_sources:
try:
path = Path(source)
if path.exists() and path.is_file():
content = path.read_text().strip()
if content and content.lower() not in [
"unknown",
"to be filled by o.e.m.",
"",
]:
return content
except Exception: # noqa: S112, PERF203
continue
try:
import socket
hostname = socket.gethostname()
arch = platform.machine()
if hostname and arch:
return f"{hostname}-{arch}"
except Exception: # noqa: S110
pass
return None
def _get_generic_system_id() -> str | None:
try:
parts = []
try:
import socket
hostname = socket.gethostname()
if hostname:
parts.append(hostname)
except Exception: # noqa: S110
pass
try:
parts.append(platform.machine())
parts.append(platform.processor())
parts.append(platform.architecture()[0])
except Exception: # noqa: S110
pass
try:
container_id = os.environ.get(
"HOSTNAME", os.environ.get("CONTAINER_ID", "")
)
if container_id:
parts.append(container_id)
except Exception: # noqa: S110
pass
if parts:
return "-".join(filter(None, parts))
except Exception: # noqa: S110
pass
return None
def _user_data_file() -> Path:
base = Path(db_storage_path())
base.mkdir(parents=True, exist_ok=True)
return base / ".crewai_user.json"
def _load_user_data() -> dict:
p = _user_data_file()
if p.exists():
try:
return json.loads(p.read_text())
except (json.JSONDecodeError, OSError, PermissionError) as e:
logger.warning(f"Failed to load user data: {e}")
return {}
def _save_user_data(data: dict) -> None:
try:
p = _user_data_file()
p.write_text(json.dumps(data, indent=2))
except (OSError, PermissionError) as e:
logger.warning(f"Failed to save user data: {e}")
def get_user_id() -> str:
"""Stable, anonymized user identifier with caching."""
data = _load_user_data()
if "user_id" in data:
return data["user_id"]
try:
username = getpass.getuser()
except Exception:
username = "unknown"
seed = f"{username}|{_get_machine_id()}"
uid = hashlib.sha256(seed.encode()).hexdigest()
data["user_id"] = uid
_save_user_data(data)
return uid
def is_first_execution() -> bool:
"""True if this is the first execution for this user."""
data = _load_user_data()
return not data.get("first_execution_done", False)
def mark_first_execution_done() -> None:
"""Mark that the first execution has been completed."""
data = _load_user_data()
if data.get("first_execution_done", False):
return
data.update(
{
"first_execution_done": True,
"first_execution_at": datetime.now().timestamp(),
"user_id": get_user_id(),
"machine_id": _get_machine_id(),
}
)
_save_user_data(data)
def safe_serialize_to_dict(obj, exclude: set[str] | None = None) -> dict[str, Any]:
"""Safely serialize an object to a dictionary for event data."""
try:
serialized = to_serializable(obj, exclude)
if isinstance(serialized, dict):
return serialized
return {"serialized_data": serialized}
except Exception as e:
return {"serialization_error": str(e), "object_type": type(obj).__name__}
def truncate_messages(messages, max_content_length=500, max_messages=5):
"""Truncate message content and limit number of messages"""
if not messages or not isinstance(messages, list):
return messages
limited_messages = messages[:max_messages]
for msg in limited_messages:
if isinstance(msg, dict) and "content" in msg:
content = msg["content"]
if len(content) > max_content_length:
msg["content"] = content[:max_content_length] + "..."
return limited_messages
def should_auto_collect_first_time_traces() -> bool:
"""True if we should auto-collect traces for first-time user."""
if _is_test_environment():
return False
return is_first_execution()
def prompt_user_for_trace_viewing(timeout_seconds: int = 20) -> bool:
"""
Prompt user if they want to see their traces with timeout.
Returns True if user wants to see traces, False otherwise.
"""
if _is_test_environment():
return False
try:
import threading
console = Console()
content = Text()
content.append("🔍 ", style="cyan bold")
content.append(
"Detailed execution traces are available!\n\n", style="cyan bold"
)
content.append("View insights including:\n", style="white")
content.append(" • Agent decision-making process\n", style="bright_blue")
content.append(" • Task execution flow and timing\n", style="bright_blue")
content.append(" • Tool usage details", style="bright_blue")
panel = Panel(
content,
title="[bold cyan]Execution Traces[/bold cyan]",
border_style="cyan",
padding=(1, 2),
)
console.print("\n")
console.print(panel)
prompt_text = click.style(
f"Would you like to view your execution traces? [y/N] ({timeout_seconds}s timeout): ",
fg="white",
bold=True,
)
click.echo(prompt_text, nl=False)
result = [False]
def get_input():
try:
response = input().strip().lower()
result[0] = response in ["y", "yes"]
except (EOFError, KeyboardInterrupt):
result[0] = False
input_thread = threading.Thread(target=get_input, daemon=True)
input_thread.start()
input_thread.join(timeout=timeout_seconds)
if input_thread.is_alive():
return False
return result[0]
except Exception:
return False
def mark_first_execution_completed() -> None:
"""Mark first execution as completed (called after trace prompt)."""
mark_first_execution_done()

View File

@@ -1,7 +0,0 @@
from crewai.experimental.evaluation.experiment.result import (
ExperimentResult,
ExperimentResults,
)
from crewai.experimental.evaluation.experiment.runner import ExperimentRunner
__all__ = ["ExperimentResult", "ExperimentResults", "ExperimentRunner"]

View File

@@ -1,4 +0,0 @@
from crewai.flow.flow import Flow, and_, listen, or_, router, start
from crewai.flow.persistence import persist
__all__ = ["Flow", "and_", "listen", "or_", "persist", "router", "start"]

View File

@@ -1,129 +0,0 @@
import logging
import traceback
import warnings
from typing import Any, cast
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
from crewai.rag.chromadb.config import ChromaDBConfig
from crewai.rag.chromadb.types import ChromaEmbeddingFunctionWrapper
from crewai.rag.config.utils import get_rag_client
from crewai.rag.core.base_client import BaseClient
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
from crewai.rag.embeddings.factory import build_embedder
from crewai.rag.embeddings.types import ProviderSpec
from crewai.rag.factory import create_client
from crewai.rag.types import BaseRecord, SearchResult
from crewai.utilities.logger import Logger
class KnowledgeStorage(BaseKnowledgeStorage):
"""
Extends Storage to handle embeddings for memory entries, improving
search efficiency.
"""
def __init__(
self,
embedder: ProviderSpec
| BaseEmbeddingsProvider
| type[BaseEmbeddingsProvider]
| None = None,
collection_name: str | None = None,
) -> None:
self.collection_name = collection_name
self._client: BaseClient | None = None
warnings.filterwarnings(
"ignore",
message=r".*'model_fields'.*is deprecated.*",
module=r"^chromadb(\.|$)",
)
if embedder:
embedding_function = build_embedder(embedder) # type: ignore[arg-type]
config = ChromaDBConfig(
embedding_function=cast(
ChromaEmbeddingFunctionWrapper, embedding_function
)
)
self._client = create_client(config)
def _get_client(self) -> BaseClient:
"""Get the appropriate client - instance-specific or global."""
return self._client if self._client else get_rag_client()
def search(
self,
query: list[str],
limit: int = 5,
metadata_filter: dict[str, Any] | None = None,
score_threshold: float = 0.6,
) -> list[SearchResult]:
try:
if not query:
raise ValueError("Query cannot be empty")
client = self._get_client()
collection_name = (
f"knowledge_{self.collection_name}"
if self.collection_name
else "knowledge"
)
query_text = " ".join(query) if len(query) > 1 else query[0]
return client.search(
collection_name=collection_name,
query=query_text,
limit=limit,
metadata_filter=metadata_filter,
score_threshold=score_threshold,
)
except Exception as e:
logging.error(
f"Error during knowledge search: {e!s}\n{traceback.format_exc()}"
)
return []
def reset(self) -> None:
try:
client = self._get_client()
collection_name = (
f"knowledge_{self.collection_name}"
if self.collection_name
else "knowledge"
)
client.delete_collection(collection_name=collection_name)
except Exception as e:
logging.error(
f"Error during knowledge reset: {e!s}\n{traceback.format_exc()}"
)
def save(self, documents: list[str]) -> None:
try:
client = self._get_client()
collection_name = (
f"knowledge_{self.collection_name}"
if self.collection_name
else "knowledge"
)
client.get_or_create_collection(collection_name=collection_name)
rag_documents: list[BaseRecord] = [{"content": doc} for doc in documents]
client.add_documents(
collection_name=collection_name, documents=rag_documents
)
except Exception as e:
if "dimension mismatch" in str(e).lower():
Logger(verbose=True).log(
"error",
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
"red",
)
raise ValueError(
"Embedding dimension mismatch. Make sure you're using the same embedding model "
"across all operations with this collection."
"Try resetting the collection using `crewai reset-memories -a`"
) from e
Logger(verbose=True).log("error", f"Failed to upsert documents: {e}", "red")
raise

View File

@@ -1,98 +0,0 @@
"""AI Suite LLM integration for CrewAI.
This module provides integration with AI Suite for LLM capabilities.
"""
from typing import Any
import aisuite as ai # type: ignore
from crewai.llms.base_llm import BaseLLM
class AISuiteLLM(BaseLLM):
"""AI Suite LLM implementation.
This class provides integration with AI Suite models through the BaseLLM interface.
"""
def __init__(
self,
model: str,
temperature: float | None = None,
stop: list[str] | None = None,
**kwargs: Any,
) -> None:
"""Initialize the AI Suite LLM.
Args:
model: The model identifier for AI Suite.
temperature: Optional temperature setting for response generation.
stop: Optional list of stop sequences for generation.
**kwargs: Additional keyword arguments passed to the AI Suite client.
"""
super().__init__(model, temperature, stop)
self.client = ai.Client()
self.kwargs = kwargs
def call(
self,
messages: str | list[dict[str, str]],
tools: list[dict] | None = None,
callbacks: list[Any] | None = None,
available_functions: dict[str, Any] | None = None,
from_task: Any | None = None,
from_agent: Any | None = None,
) -> str | Any:
"""Call the AI Suite LLM with the given messages.
Args:
messages: Input messages for the LLM.
tools: Optional list of tool schemas for function calling.
callbacks: Optional list of callback functions.
available_functions: Optional dict mapping function names to callables.
from_task: Optional task caller.
from_agent: Optional agent caller.
Returns:
The text response from the LLM.
"""
completion_params = self._prepare_completion_params(messages, tools)
response = self.client.chat.completions.create(**completion_params)
return response.choices[0].message.content
def _prepare_completion_params(
self,
messages: str | list[dict[str, str]],
tools: list[dict] | None = None,
) -> dict[str, Any]:
"""Prepare parameters for the AI Suite completion call.
Args:
messages: Input messages for the LLM.
tools: Optional list of tool schemas.
Returns:
Dictionary of parameters for the completion API.
"""
params: dict[str, Any] = {
"model": self.model,
"messages": messages,
"temperature": self.temperature,
"tools": tools,
**self.kwargs,
}
if self.stop:
params["stop"] = self.stop
return params
def supports_function_calling(self) -> bool:
"""Check if the LLM supports function calling.
Returns:
False, as AI Suite does not currently support function calling.
"""
return False

View File

@@ -1,204 +0,0 @@
import logging
import traceback
import warnings
from typing import Any, cast
from crewai.rag.chromadb.config import ChromaDBConfig
from crewai.rag.chromadb.types import ChromaEmbeddingFunctionWrapper
from crewai.rag.config.utils import get_rag_client
from crewai.rag.core.base_client import BaseClient
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
from crewai.rag.embeddings.factory import build_embedder
from crewai.rag.embeddings.types import ProviderSpec
from crewai.rag.factory import create_client
from crewai.rag.storage.base_rag_storage import BaseRAGStorage
from crewai.rag.types import BaseRecord
from crewai.utilities.constants import MAX_FILE_NAME_LENGTH
from crewai.utilities.paths import db_storage_path
class RAGStorage(BaseRAGStorage):
"""
Extends Storage to handle embeddings for memory entries, improving
search efficiency.
"""
def __init__(
self,
type: str,
allow_reset: bool = True,
embedder_config: ProviderSpec | BaseEmbeddingsProvider | None = None,
crew: Any = None,
path: str | None = None,
) -> None:
super().__init__(type, allow_reset, embedder_config, crew)
agents = crew.agents if crew else []
agents = [self._sanitize_role(agent.role) for agent in agents]
agents = "_".join(agents)
self.agents = agents
self.storage_file_name = self._build_storage_file_name(type, agents)
self.type = type
self._client: BaseClient | None = None
self.allow_reset = allow_reset
self.path = path
warnings.filterwarnings(
"ignore",
message=r".*'model_fields'.*is deprecated.*",
module=r"^chromadb(\.|$)",
)
if self.embedder_config:
embedding_function = build_embedder(self.embedder_config)
try:
_ = embedding_function(["test"])
except Exception as e:
provider = (
self.embedder_config["provider"]
if isinstance(self.embedder_config, dict)
else self.embedder_config.__class__.__name__.replace(
"Provider", ""
).lower()
)
raise ValueError(
f"Failed to initialize embedder. Please check your configuration or connection.\n"
f"Provider: {provider}\n"
f"Error: {e}"
) from e
batch_size = None
if (
isinstance(self.embedder_config, dict)
and "config" in self.embedder_config
):
nested_config = self.embedder_config["config"]
if isinstance(nested_config, dict):
batch_size = nested_config.get("batch_size")
if batch_size is not None:
config = ChromaDBConfig(
embedding_function=cast(
ChromaEmbeddingFunctionWrapper, embedding_function
),
batch_size=cast(int, batch_size),
)
else:
config = ChromaDBConfig(
embedding_function=cast(
ChromaEmbeddingFunctionWrapper, embedding_function
)
)
self._client = create_client(config)
def _get_client(self) -> BaseClient:
"""Get the appropriate client - instance-specific or global."""
return self._client if self._client else get_rag_client()
def _sanitize_role(self, role: str) -> str:
"""
Sanitizes agent roles to ensure valid directory names.
"""
return role.replace("\n", "").replace(" ", "_").replace("/", "_")
def _build_storage_file_name(self, type: str, file_name: str) -> str:
"""
Ensures file name does not exceed max allowed by OS
"""
base_path = f"{db_storage_path()}/{type}"
if len(file_name) > MAX_FILE_NAME_LENGTH:
logging.warning(
f"Trimming file name from {len(file_name)} to {MAX_FILE_NAME_LENGTH} characters."
)
file_name = file_name[:MAX_FILE_NAME_LENGTH]
return f"{base_path}/{file_name}"
def save(self, value: Any, metadata: dict[str, Any]) -> None:
try:
client = self._get_client()
collection_name = (
f"memory_{self.type}_{self.agents}"
if self.agents
else f"memory_{self.type}"
)
client.get_or_create_collection(collection_name=collection_name)
document: BaseRecord = {"content": value}
if metadata:
document["metadata"] = metadata
batch_size = None
if (
self.embedder_config
and isinstance(self.embedder_config, dict)
and "config" in self.embedder_config
):
nested_config = self.embedder_config["config"]
if isinstance(nested_config, dict):
batch_size = nested_config.get("batch_size")
if batch_size is not None:
client.add_documents(
collection_name=collection_name,
documents=[document],
batch_size=cast(int, batch_size),
)
else:
client.add_documents(
collection_name=collection_name, documents=[document]
)
except Exception as e:
logging.error(
f"Error during {self.type} save: {e!s}\n{traceback.format_exc()}"
)
def search(
self,
query: str,
limit: int = 5,
filter: dict[str, Any] | None = None,
score_threshold: float = 0.6,
) -> list[Any]:
try:
client = self._get_client()
collection_name = (
f"memory_{self.type}_{self.agents}"
if self.agents
else f"memory_{self.type}"
)
return client.search(
collection_name=collection_name,
query=query,
limit=limit,
metadata_filter=filter,
score_threshold=score_threshold,
)
except Exception as e:
logging.error(
f"Error during {self.type} search: {e!s}\n{traceback.format_exc()}"
)
return []
def reset(self) -> None:
try:
client = self._get_client()
collection_name = (
f"memory_{self.type}_{self.agents}"
if self.agents
else f"memory_{self.type}"
)
client.delete_collection(collection_name=collection_name)
except Exception as e:
if "attempt to write a readonly database" in str(
e
) or "does not exist" in str(e):
# Ignore readonly database and collection not found errors (already reset)
pass
else:
raise Exception(
f"An error occurred while resetting the {self.type} memory: {e}"
) from e

View File

@@ -1 +0,0 @@
"""Optional imports for RAG configuration providers."""

View File

@@ -1,149 +0,0 @@
"""Base embeddings callable utilities for RAG systems."""
from typing import Protocol, TypeVar, runtime_checkable
import numpy as np
from crewai.rag.core.types import (
Embeddable,
Embedding,
Embeddings,
PyEmbedding,
)
T = TypeVar("T")
D = TypeVar("D", bound=Embeddable, contravariant=True)
def normalize_embeddings(
target: Embedding | list[Embedding] | PyEmbedding | list[PyEmbedding],
) -> Embeddings | None:
"""Normalize various embedding formats to a standard list of numpy arrays.
Args:
target: Input embeddings in various formats (list of floats, list of lists,
numpy array, or list of numpy arrays).
Returns:
Normalized embeddings as a list of numpy arrays, or None if input is None.
Raises:
ValueError: If embeddings are empty or in an unsupported format.
"""
if isinstance(target, np.ndarray):
if target.ndim == 1:
return [target.astype(np.float32)]
if target.ndim == 2:
return [row.astype(np.float32) for row in target]
raise ValueError(f"Unsupported numpy array shape: {target.shape}")
first = target[0]
if isinstance(first, (int, float)) and not isinstance(first, bool):
return [np.array(target, dtype=np.float32)]
if isinstance(first, list):
return [np.array(emb, dtype=np.float32) for emb in target]
if isinstance(first, np.ndarray):
return [emb.astype(np.float32) for emb in target] # type: ignore[union-attr]
raise ValueError(f"Unsupported embeddings format: {type(first)}")
def maybe_cast_one_to_many(target: T | list[T] | None) -> list[T] | None:
"""Cast a single item to a list if needed.
Args:
target: A single item or list of items.
Returns:
A list of items or None if input is None.
"""
if target is None:
return None
return target if isinstance(target, list) else [target]
def validate_embeddings(embeddings: Embeddings) -> Embeddings:
"""Validate embeddings format and content.
Args:
embeddings: List of numpy arrays to validate.
Returns:
Validated embeddings.
Raises:
ValueError: If embeddings format or content is invalid.
"""
if not isinstance(embeddings, list):
raise ValueError(
f"Expected embeddings to be a list, got {type(embeddings).__name__}"
)
if len(embeddings) == 0:
raise ValueError(
f"Expected embeddings to be a list with at least one item, got {len(embeddings)} embeddings"
)
if not all(isinstance(e, np.ndarray) for e in embeddings):
raise ValueError(
"Expected each embedding in the embeddings to be a numpy array"
)
for i, embedding in enumerate(embeddings):
if embedding.ndim == 0:
raise ValueError(
f"Expected a 1-dimensional array, got a 0-dimensional array {embedding}"
)
if embedding.size == 0:
raise ValueError(
f"Expected each embedding to be a 1-dimensional numpy array with at least 1 value. "
f"Got an array with no values at position {i}"
)
if not all(
isinstance(value, (np.integer, float, np.floating))
and not isinstance(value, bool)
for value in embedding
):
raise ValueError(
f"Expected embedding to contain numeric values, got non-numeric values at position {i}"
)
return embeddings
@runtime_checkable
class EmbeddingFunction(Protocol[D]):
"""Protocol for embedding functions.
Embedding functions convert input data (documents or images) into vector embeddings.
"""
def __call__(self, input: D) -> Embeddings:
"""Convert input data to embeddings.
Args:
input: Input data to embed (documents or images).
Returns:
List of numpy arrays representing the embeddings.
"""
...
def __init_subclass__(cls) -> None:
"""Wrap __call__ method to normalize and validate embeddings."""
super().__init_subclass__()
original_call = cls.__call__
def wrapped_call(self: EmbeddingFunction[D], input: D) -> Embeddings:
result = original_call(self, input)
if result is None:
raise ValueError("Embedding function returned None")
normalized = normalize_embeddings(result)
if normalized is None:
raise ValueError("Normalization returned None for non-None input")
return validate_embeddings(normalized)
cls.__call__ = wrapped_call # type: ignore[method-assign]
def embed_query(self, input: D) -> Embeddings:
"""
Get the embeddings for a query input.
This method is optional, and if not implemented, the default behavior is to call __call__.
"""
return self.__call__(input=input)

View File

@@ -1,23 +0,0 @@
"""Base class for embedding providers."""
from typing import Generic, TypeVar
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
from crewai.rag.core.base_embeddings_callable import EmbeddingFunction
T = TypeVar("T", bound=EmbeddingFunction)
class BaseEmbeddingsProvider(BaseSettings, Generic[T]):
"""Abstract base class for embedding providers.
This class provides a common interface for dynamically loading and building
embedding functions from various providers.
"""
model_config = SettingsConfigDict(extra="allow", populate_by_name=True)
embedding_callable: type[T] = Field(
..., description="The embedding function class to use"
)

View File

@@ -1,28 +0,0 @@
"""Core type definitions for RAG systems."""
from collections.abc import Sequence
from typing import TypeVar
import numpy as np
from numpy import floating, integer, number
from numpy.typing import NDArray
T = TypeVar("T")
PyEmbedding = Sequence[float] | Sequence[int]
PyEmbeddings = list[PyEmbedding]
Embedding = NDArray[np.int32 | np.float32]
Embeddings = list[Embedding]
Documents = list[str]
Images = list[np.ndarray]
Embeddable = Documents | Images
ScalarType = TypeVar("ScalarType", bound=np.generic)
IntegerType = TypeVar("IntegerType", bound=integer)
FloatingType = TypeVar("FloatingType", bound=floating)
NumberType = TypeVar("NumberType", bound=number)
DType32 = TypeVar("DType32", np.int32, np.float32)
DType64 = TypeVar("DType64", np.int64, np.float64)
DTypeCommon = TypeVar("DTypeCommon", np.int32, np.int64, np.float32, np.float64)

View File

@@ -1 +0,0 @@
"""Embedding components for RAG infrastructure."""

View File

@@ -1,392 +0,0 @@
"""Factory functions for creating embedding providers and functions."""
from __future__ import annotations
import warnings
from typing import TYPE_CHECKING, TypeVar, overload
from typing_extensions import deprecated
from crewai.rag.core.base_embeddings_callable import EmbeddingFunction
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
from crewai.utilities.import_utils import import_and_validate_definition
if TYPE_CHECKING:
from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (
AmazonBedrockEmbeddingFunction,
)
from chromadb.utils.embedding_functions.cohere_embedding_function import (
CohereEmbeddingFunction,
)
from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleGenerativeAiEmbeddingFunction,
GoogleVertexEmbeddingFunction,
)
from chromadb.utils.embedding_functions.huggingface_embedding_function import (
HuggingFaceEmbeddingFunction,
)
from chromadb.utils.embedding_functions.instructor_embedding_function import (
InstructorEmbeddingFunction,
)
from chromadb.utils.embedding_functions.jina_embedding_function import (
JinaEmbeddingFunction,
)
from chromadb.utils.embedding_functions.ollama_embedding_function import (
OllamaEmbeddingFunction,
)
from chromadb.utils.embedding_functions.onnx_mini_lm_l6_v2 import ONNXMiniLM_L6_V2
from chromadb.utils.embedding_functions.open_clip_embedding_function import (
OpenCLIPEmbeddingFunction,
)
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
from chromadb.utils.embedding_functions.roboflow_embedding_function import (
RoboflowEmbeddingFunction,
)
from chromadb.utils.embedding_functions.sentence_transformer_embedding_function import (
SentenceTransformerEmbeddingFunction,
)
from chromadb.utils.embedding_functions.text2vec_embedding_function import (
Text2VecEmbeddingFunction,
)
from crewai.rag.embeddings.providers.aws.types import BedrockProviderSpec
from crewai.rag.embeddings.providers.cohere.types import CohereProviderSpec
from crewai.rag.embeddings.providers.custom.types import CustomProviderSpec
from crewai.rag.embeddings.providers.google.types import (
GenerativeAiProviderSpec,
VertexAIProviderSpec,
)
from crewai.rag.embeddings.providers.huggingface.types import (
HuggingFaceProviderSpec,
)
from crewai.rag.embeddings.providers.ibm.embedding_callable import (
WatsonXEmbeddingFunction,
)
from crewai.rag.embeddings.providers.ibm.types import (
WatsonProviderSpec,
WatsonXProviderSpec,
)
from crewai.rag.embeddings.providers.instructor.types import InstructorProviderSpec
from crewai.rag.embeddings.providers.jina.types import JinaProviderSpec
from crewai.rag.embeddings.providers.microsoft.types import AzureProviderSpec
from crewai.rag.embeddings.providers.ollama.types import OllamaProviderSpec
from crewai.rag.embeddings.providers.onnx.types import ONNXProviderSpec
from crewai.rag.embeddings.providers.openai.types import OpenAIProviderSpec
from crewai.rag.embeddings.providers.openclip.types import OpenCLIPProviderSpec
from crewai.rag.embeddings.providers.roboflow.types import RoboflowProviderSpec
from crewai.rag.embeddings.providers.sentence_transformer.types import (
SentenceTransformerProviderSpec,
)
from crewai.rag.embeddings.providers.text2vec.types import Text2VecProviderSpec
from crewai.rag.embeddings.providers.voyageai.embedding_callable import (
VoyageAIEmbeddingFunction,
)
from crewai.rag.embeddings.providers.voyageai.types import VoyageAIProviderSpec
T = TypeVar("T", bound=EmbeddingFunction)
PROVIDER_PATHS = {
"azure": "crewai.rag.embeddings.providers.microsoft.azure.AzureProvider",
"amazon-bedrock": "crewai.rag.embeddings.providers.aws.bedrock.BedrockProvider",
"cohere": "crewai.rag.embeddings.providers.cohere.cohere_provider.CohereProvider",
"custom": "crewai.rag.embeddings.providers.custom.custom_provider.CustomProvider",
"google-generativeai": "crewai.rag.embeddings.providers.google.generative_ai.GenerativeAiProvider",
"google-vertex": "crewai.rag.embeddings.providers.google.vertex.VertexAIProvider",
"huggingface": "crewai.rag.embeddings.providers.huggingface.huggingface_provider.HuggingFaceProvider",
"instructor": "crewai.rag.embeddings.providers.instructor.instructor_provider.InstructorProvider",
"jina": "crewai.rag.embeddings.providers.jina.jina_provider.JinaProvider",
"ollama": "crewai.rag.embeddings.providers.ollama.ollama_provider.OllamaProvider",
"onnx": "crewai.rag.embeddings.providers.onnx.onnx_provider.ONNXProvider",
"openai": "crewai.rag.embeddings.providers.openai.openai_provider.OpenAIProvider",
"openclip": "crewai.rag.embeddings.providers.openclip.openclip_provider.OpenCLIPProvider",
"roboflow": "crewai.rag.embeddings.providers.roboflow.roboflow_provider.RoboflowProvider",
"sentence-transformer": "crewai.rag.embeddings.providers.sentence_transformer.sentence_transformer_provider.SentenceTransformerProvider",
"text2vec": "crewai.rag.embeddings.providers.text2vec.text2vec_provider.Text2VecProvider",
"voyageai": "crewai.rag.embeddings.providers.voyageai.voyageai_provider.VoyageAIProvider",
"watson": "crewai.rag.embeddings.providers.ibm.watsonx.WatsonXProvider", # Deprecated alias
"watsonx": "crewai.rag.embeddings.providers.ibm.watsonx.WatsonXProvider",
}
def build_embedder_from_provider(provider: BaseEmbeddingsProvider[T]) -> T:
"""Build an embedding function instance from a provider.
Args:
provider: The embedding provider configuration.
Returns:
An instance of the specified embedding function type.
"""
return provider.embedding_callable(
**provider.model_dump(exclude={"embedding_callable"})
)
@overload
def build_embedder_from_dict(spec: AzureProviderSpec) -> OpenAIEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: BedrockProviderSpec,
) -> AmazonBedrockEmbeddingFunction: ...
@overload
def build_embedder_from_dict(spec: CohereProviderSpec) -> CohereEmbeddingFunction: ...
@overload
def build_embedder_from_dict(spec: CustomProviderSpec) -> EmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: GenerativeAiProviderSpec,
) -> GoogleGenerativeAiEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: HuggingFaceProviderSpec,
) -> HuggingFaceEmbeddingFunction: ...
@overload
def build_embedder_from_dict(spec: OllamaProviderSpec) -> OllamaEmbeddingFunction: ...
@overload
def build_embedder_from_dict(spec: OpenAIProviderSpec) -> OpenAIEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: VertexAIProviderSpec,
) -> GoogleVertexEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: VoyageAIProviderSpec,
) -> VoyageAIEmbeddingFunction: ...
@overload
def build_embedder_from_dict(spec: WatsonXProviderSpec) -> WatsonXEmbeddingFunction: ...
@overload
@deprecated(
'The "WatsonProviderSpec" provider spec is deprecated and will be removed in v1.0.0. Use "WatsonXProviderSpec" instead.'
)
def build_embedder_from_dict(spec: WatsonProviderSpec) -> WatsonXEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: SentenceTransformerProviderSpec,
) -> SentenceTransformerEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: InstructorProviderSpec,
) -> InstructorEmbeddingFunction: ...
@overload
def build_embedder_from_dict(spec: JinaProviderSpec) -> JinaEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: RoboflowProviderSpec,
) -> RoboflowEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: OpenCLIPProviderSpec,
) -> OpenCLIPEmbeddingFunction: ...
@overload
def build_embedder_from_dict(
spec: Text2VecProviderSpec,
) -> Text2VecEmbeddingFunction: ...
@overload
def build_embedder_from_dict(spec: ONNXProviderSpec) -> ONNXMiniLM_L6_V2: ...
def build_embedder_from_dict(spec):
"""Build an embedding function instance from a dictionary specification.
Args:
spec: A dictionary with 'provider' and 'config' keys.
Example: {
"provider": "openai",
"config": {
"api_key": "sk-...",
"model_name": "text-embedding-3-small"
}
}
Returns:
An instance of the appropriate embedding function.
Raises:
ValueError: If the provider is not recognized.
"""
provider_name = spec["provider"]
if not provider_name:
raise ValueError("Missing 'provider' key in specification")
if provider_name == "watson":
warnings.warn(
'The "watson" provider key is deprecated and will be removed in v1.0.0. '
'Use "watsonx" instead.',
DeprecationWarning,
stacklevel=2,
)
if provider_name not in PROVIDER_PATHS:
raise ValueError(
f"Unknown provider: {provider_name}. Available providers: {list(PROVIDER_PATHS.keys())}"
)
provider_path = PROVIDER_PATHS[provider_name]
try:
provider_class = import_and_validate_definition(provider_path)
except (ImportError, AttributeError, ValueError) as e:
raise ImportError(f"Failed to import provider {provider_name}: {e}") from e
provider_config = spec.get("config", {})
if provider_name == "custom" and "embedding_callable" not in provider_config:
raise ValueError("Custom provider requires 'embedding_callable' in config")
provider = provider_class(**provider_config)
return build_embedder_from_provider(provider)
@overload
def build_embedder(spec: BaseEmbeddingsProvider[T]) -> T: ...
@overload
def build_embedder(spec: AzureProviderSpec) -> OpenAIEmbeddingFunction: ...
@overload
def build_embedder(spec: BedrockProviderSpec) -> AmazonBedrockEmbeddingFunction: ...
@overload
def build_embedder(spec: CohereProviderSpec) -> CohereEmbeddingFunction: ...
@overload
def build_embedder(spec: CustomProviderSpec) -> EmbeddingFunction: ...
@overload
def build_embedder(
spec: GenerativeAiProviderSpec,
) -> GoogleGenerativeAiEmbeddingFunction: ...
@overload
def build_embedder(spec: HuggingFaceProviderSpec) -> HuggingFaceEmbeddingFunction: ...
@overload
def build_embedder(spec: OllamaProviderSpec) -> OllamaEmbeddingFunction: ...
@overload
def build_embedder(spec: OpenAIProviderSpec) -> OpenAIEmbeddingFunction: ...
@overload
def build_embedder(spec: VertexAIProviderSpec) -> GoogleVertexEmbeddingFunction: ...
@overload
def build_embedder(spec: VoyageAIProviderSpec) -> VoyageAIEmbeddingFunction: ...
@overload
def build_embedder(spec: WatsonXProviderSpec) -> WatsonXEmbeddingFunction: ...
@overload
@deprecated(
'The "WatsonProviderSpec" provider spec is deprecated and will be removed in v1.0.0. Use "WatsonXProviderSpec" instead.'
)
def build_embedder(spec: WatsonProviderSpec) -> WatsonXEmbeddingFunction: ...
@overload
def build_embedder(
spec: SentenceTransformerProviderSpec,
) -> SentenceTransformerEmbeddingFunction: ...
@overload
def build_embedder(spec: InstructorProviderSpec) -> InstructorEmbeddingFunction: ...
@overload
def build_embedder(spec: JinaProviderSpec) -> JinaEmbeddingFunction: ...
@overload
def build_embedder(spec: RoboflowProviderSpec) -> RoboflowEmbeddingFunction: ...
@overload
def build_embedder(spec: OpenCLIPProviderSpec) -> OpenCLIPEmbeddingFunction: ...
@overload
def build_embedder(spec: Text2VecProviderSpec) -> Text2VecEmbeddingFunction: ...
@overload
def build_embedder(spec: ONNXProviderSpec) -> ONNXMiniLM_L6_V2: ...
def build_embedder(spec):
"""Build an embedding function from either a provider spec or a provider instance.
Args:
spec: Either a provider specification dictionary or a provider instance.
Returns:
An embedding function instance. If a typed provider is passed, returns
the specific embedding function type.
Examples:
# From dictionary specification
embedder = build_embedder({
"provider": "openai",
"config": {"api_key": "sk-..."}
})
# From provider instance
provider = OpenAIProvider(api_key="sk-...")
embedder = build_embedder(provider)
"""
if isinstance(spec, BaseEmbeddingsProvider):
return build_embedder_from_provider(spec)
return build_embedder_from_dict(spec)
# Backward compatibility alias
get_embedding_function = build_embedder

View File

@@ -1 +0,0 @@
"""Embedding provider implementations."""

View File

@@ -1,13 +0,0 @@
"""AWS embedding providers."""
from crewai.rag.embeddings.providers.aws.bedrock import BedrockProvider
from crewai.rag.embeddings.providers.aws.types import (
BedrockProviderConfig,
BedrockProviderSpec,
)
__all__ = [
"BedrockProvider",
"BedrockProviderConfig",
"BedrockProviderSpec",
]

View File

@@ -1,53 +0,0 @@
"""Amazon Bedrock embeddings provider."""
from typing import Any
from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (
AmazonBedrockEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
def create_aws_session() -> Any:
"""Create an AWS session for Bedrock.
Returns:
boto3.Session: AWS session object
Raises:
ImportError: If boto3 is not installed
ValueError: If AWS session creation fails
"""
try:
import boto3 # type: ignore[import]
return boto3.Session()
except ImportError as e:
raise ImportError(
"boto3 is required for amazon-bedrock embeddings. "
"Install it with: uv add boto3"
) from e
except Exception as e:
raise ValueError(
f"Failed to create AWS session for amazon-bedrock. "
f"Ensure AWS credentials are configured. Error: {e}"
) from e
class BedrockProvider(BaseEmbeddingsProvider[AmazonBedrockEmbeddingFunction]):
"""Amazon Bedrock embeddings provider."""
embedding_callable: type[AmazonBedrockEmbeddingFunction] = Field(
default=AmazonBedrockEmbeddingFunction,
description="Amazon Bedrock embedding function class",
)
model_name: str = Field(
default="amazon.titan-embed-text-v1",
description="Model name to use for embeddings",
validation_alias="EMBEDDINGS_BEDROCK_MODEL_NAME",
)
session: Any = Field(
default_factory=create_aws_session, description="AWS session object"
)

View File

@@ -1,19 +0,0 @@
"""Type definitions for AWS embedding providers."""
from typing import Annotated, Any, Literal
from typing_extensions import Required, TypedDict
class BedrockProviderConfig(TypedDict, total=False):
"""Configuration for Bedrock provider."""
model_name: Annotated[str, "amazon.titan-embed-text-v1"]
session: Any
class BedrockProviderSpec(TypedDict, total=False):
"""Bedrock provider specification."""
provider: Required[Literal["amazon-bedrock"]]
config: BedrockProviderConfig

View File

@@ -1,13 +0,0 @@
"""Cohere embedding providers."""
from crewai.rag.embeddings.providers.cohere.cohere_provider import CohereProvider
from crewai.rag.embeddings.providers.cohere.types import (
CohereProviderConfig,
CohereProviderSpec,
)
__all__ = [
"CohereProvider",
"CohereProviderConfig",
"CohereProviderSpec",
]

View File

@@ -1,24 +0,0 @@
"""Cohere embeddings provider."""
from chromadb.utils.embedding_functions.cohere_embedding_function import (
CohereEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class CohereProvider(BaseEmbeddingsProvider[CohereEmbeddingFunction]):
"""Cohere embeddings provider."""
embedding_callable: type[CohereEmbeddingFunction] = Field(
default=CohereEmbeddingFunction, description="Cohere embedding function class"
)
api_key: str = Field(
description="Cohere API key", validation_alias="EMBEDDINGS_COHERE_API_KEY"
)
model_name: str = Field(
default="large",
description="Model name to use for embeddings",
validation_alias="EMBEDDINGS_COHERE_MODEL_NAME",
)

View File

@@ -1,19 +0,0 @@
"""Type definitions for Cohere embedding providers."""
from typing import Annotated, Literal
from typing_extensions import Required, TypedDict
class CohereProviderConfig(TypedDict, total=False):
"""Configuration for Cohere provider."""
api_key: str
model_name: Annotated[str, "large"]
class CohereProviderSpec(TypedDict, total=False):
"""Cohere provider specification."""
provider: Required[Literal["cohere"]]
config: CohereProviderConfig

View File

@@ -1,13 +0,0 @@
"""Custom embedding providers."""
from crewai.rag.embeddings.providers.custom.custom_provider import CustomProvider
from crewai.rag.embeddings.providers.custom.types import (
CustomProviderConfig,
CustomProviderSpec,
)
__all__ = [
"CustomProvider",
"CustomProviderConfig",
"CustomProviderSpec",
]

View File

@@ -1,19 +0,0 @@
"""Custom embeddings provider for user-defined embedding functions."""
from pydantic import Field
from pydantic_settings import SettingsConfigDict
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
from crewai.rag.embeddings.providers.custom.embedding_callable import (
CustomEmbeddingFunction,
)
class CustomProvider(BaseEmbeddingsProvider[CustomEmbeddingFunction]):
"""Custom embeddings provider for user-defined embedding functions."""
embedding_callable: type[CustomEmbeddingFunction] = Field(
..., description="Custom embedding function class"
)
model_config = SettingsConfigDict(extra="allow")

View File

@@ -1,22 +0,0 @@
"""Custom embedding function base implementation."""
from crewai.rag.core.base_embeddings_callable import EmbeddingFunction
from crewai.rag.core.types import Documents, Embeddings
class CustomEmbeddingFunction(EmbeddingFunction[Documents]):
"""Base class for custom embedding functions.
This provides a concrete implementation that can be subclassed for custom embeddings.
"""
def __call__(self, input: Documents) -> Embeddings:
"""Convert input documents to embeddings.
Args:
input: List of documents to embed.
Returns:
List of numpy arrays representing the embeddings.
"""
raise NotImplementedError("Subclasses must implement __call__ method")

View File

@@ -1,19 +0,0 @@
"""Type definitions for custom embedding providers."""
from typing import Literal
from chromadb.api.types import EmbeddingFunction
from typing_extensions import Required, TypedDict
class CustomProviderConfig(TypedDict, total=False):
"""Configuration for Custom provider."""
embedding_callable: type[EmbeddingFunction]
class CustomProviderSpec(TypedDict, total=False):
"""Custom provider specification."""
provider: Required[Literal["custom"]]
config: CustomProviderConfig

View File

@@ -1,23 +0,0 @@
"""Google embedding providers."""
from crewai.rag.embeddings.providers.google.generative_ai import (
GenerativeAiProvider,
)
from crewai.rag.embeddings.providers.google.types import (
GenerativeAiProviderConfig,
GenerativeAiProviderSpec,
VertexAIProviderConfig,
VertexAIProviderSpec,
)
from crewai.rag.embeddings.providers.google.vertex import (
VertexAIProvider,
)
__all__ = [
"GenerativeAiProvider",
"GenerativeAiProviderConfig",
"GenerativeAiProviderSpec",
"VertexAIProvider",
"VertexAIProviderConfig",
"VertexAIProviderSpec",
]

View File

@@ -1,30 +0,0 @@
"""Google Generative AI embeddings provider."""
from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleGenerativeAiEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class GenerativeAiProvider(BaseEmbeddingsProvider[GoogleGenerativeAiEmbeddingFunction]):
"""Google Generative AI embeddings provider."""
embedding_callable: type[GoogleGenerativeAiEmbeddingFunction] = Field(
default=GoogleGenerativeAiEmbeddingFunction,
description="Google Generative AI embedding function class",
)
model_name: str = Field(
default="models/embedding-001",
description="Model name to use for embeddings",
validation_alias="EMBEDDINGS_GOOGLE_GENERATIVE_AI_MODEL_NAME",
)
api_key: str = Field(
description="Google API key", validation_alias="EMBEDDINGS_GOOGLE_API_KEY"
)
task_type: str = Field(
default="RETRIEVAL_DOCUMENT",
description="Task type for embeddings",
validation_alias="EMBEDDINGS_GOOGLE_GENERATIVE_AI_TASK_TYPE",
)

View File

@@ -1,36 +0,0 @@
"""Type definitions for Google embedding providers."""
from typing import Annotated, Literal
from typing_extensions import Required, TypedDict
class GenerativeAiProviderConfig(TypedDict, total=False):
"""Configuration for Google Generative AI provider."""
api_key: str
model_name: Annotated[str, "models/embedding-001"]
task_type: Annotated[str, "RETRIEVAL_DOCUMENT"]
class GenerativeAiProviderSpec(TypedDict):
"""Google Generative AI provider specification."""
provider: Literal["google-generativeai"]
config: GenerativeAiProviderConfig
class VertexAIProviderConfig(TypedDict, total=False):
"""Configuration for Vertex AI provider."""
api_key: str
model_name: Annotated[str, "textembedding-gecko"]
project_id: Annotated[str, "cloud-large-language-models"]
region: Annotated[str, "us-central1"]
class VertexAIProviderSpec(TypedDict, total=False):
"""Vertex AI provider specification."""
provider: Required[Literal["google-vertex"]]
config: VertexAIProviderConfig

View File

@@ -1,35 +0,0 @@
"""Google Vertex AI embeddings provider."""
from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleVertexEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class VertexAIProvider(BaseEmbeddingsProvider[GoogleVertexEmbeddingFunction]):
"""Google Vertex AI embeddings provider."""
embedding_callable: type[GoogleVertexEmbeddingFunction] = Field(
default=GoogleVertexEmbeddingFunction,
description="Vertex AI embedding function class",
)
model_name: str = Field(
default="textembedding-gecko",
description="Model name to use for embeddings",
validation_alias="EMBEDDINGS_GOOGLE_VERTEX_MODEL_NAME",
)
api_key: str = Field(
description="Google API key", validation_alias="EMBEDDINGS_GOOGLE_CLOUD_API_KEY"
)
project_id: str = Field(
default="cloud-large-language-models",
description="GCP project ID",
validation_alias="EMBEDDINGS_GOOGLE_CLOUD_PROJECT",
)
region: str = Field(
default="us-central1",
description="GCP region",
validation_alias="EMBEDDINGS_GOOGLE_CLOUD_REGION",
)

View File

@@ -1,15 +0,0 @@
"""HuggingFace embedding providers."""
from crewai.rag.embeddings.providers.huggingface.huggingface_provider import (
HuggingFaceProvider,
)
from crewai.rag.embeddings.providers.huggingface.types import (
HuggingFaceProviderConfig,
HuggingFaceProviderSpec,
)
__all__ = [
"HuggingFaceProvider",
"HuggingFaceProviderConfig",
"HuggingFaceProviderSpec",
]

View File

@@ -1,20 +0,0 @@
"""HuggingFace embeddings provider."""
from chromadb.utils.embedding_functions.huggingface_embedding_function import (
HuggingFaceEmbeddingServer,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class HuggingFaceProvider(BaseEmbeddingsProvider[HuggingFaceEmbeddingServer]):
"""HuggingFace embeddings provider."""
embedding_callable: type[HuggingFaceEmbeddingServer] = Field(
default=HuggingFaceEmbeddingServer,
description="HuggingFace embedding function class",
)
url: str = Field(
description="HuggingFace API URL", validation_alias="EMBEDDINGS_HUGGINGFACE_URL"
)

View File

@@ -1,18 +0,0 @@
"""Type definitions for HuggingFace embedding providers."""
from typing import Literal
from typing_extensions import Required, TypedDict
class HuggingFaceProviderConfig(TypedDict, total=False):
"""Configuration for HuggingFace provider."""
url: str
class HuggingFaceProviderSpec(TypedDict, total=False):
"""HuggingFace provider specification."""
provider: Required[Literal["huggingface"]]
config: HuggingFaceProviderConfig

View File

@@ -1,17 +0,0 @@
"""IBM embedding providers."""
from crewai.rag.embeddings.providers.ibm.types import (
WatsonProviderSpec,
WatsonXProviderConfig,
WatsonXProviderSpec,
)
from crewai.rag.embeddings.providers.ibm.watsonx import (
WatsonXProvider,
)
__all__ = [
"WatsonProviderSpec",
"WatsonXProvider",
"WatsonXProviderConfig",
"WatsonXProviderSpec",
]

View File

@@ -1,159 +0,0 @@
"""IBM WatsonX embedding function implementation."""
from typing import cast
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
from typing_extensions import Unpack
from crewai.rag.embeddings.providers.ibm.types import WatsonXProviderConfig
class WatsonXEmbeddingFunction(EmbeddingFunction[Documents]):
"""Embedding function for IBM WatsonX models."""
def __init__(self, **kwargs: Unpack[WatsonXProviderConfig]) -> None:
"""Initialize WatsonX embedding function.
Args:
**kwargs: Configuration parameters for WatsonX Embeddings and Credentials.
"""
super().__init__(**kwargs)
self._config = kwargs
@staticmethod
def name() -> str:
"""Return the name of the embedding function for ChromaDB compatibility."""
return "watsonx"
def __call__(self, input: Documents) -> Embeddings:
"""Generate embeddings for input documents.
Args:
input: List of documents to embed.
Returns:
List of embedding vectors.
"""
try:
import ibm_watsonx_ai.foundation_models as watson_models # type: ignore[import-not-found, import-untyped]
from ibm_watsonx_ai import (
Credentials, # type: ignore[import-not-found, import-untyped]
)
from ibm_watsonx_ai.metanames import ( # type: ignore[import-not-found, import-untyped]
EmbedTextParamsMetaNames as EmbedParams,
)
except ImportError as e:
raise ImportError(
"ibm-watsonx-ai is required for watsonx embeddings. "
"Install it with: uv add ibm-watsonx-ai"
) from e
if isinstance(input, str):
input = [input]
embeddings_config: dict = {
"model_id": self._config["model_id"],
}
if "params" in self._config and self._config["params"] is not None:
embeddings_config["params"] = self._config["params"]
if "project_id" in self._config and self._config["project_id"] is not None:
embeddings_config["project_id"] = self._config["project_id"]
if "space_id" in self._config and self._config["space_id"] is not None:
embeddings_config["space_id"] = self._config["space_id"]
if "api_client" in self._config and self._config["api_client"] is not None:
embeddings_config["api_client"] = self._config["api_client"]
if "verify" in self._config and self._config["verify"] is not None:
embeddings_config["verify"] = self._config["verify"]
if "persistent_connection" in self._config:
embeddings_config["persistent_connection"] = self._config[
"persistent_connection"
]
if "batch_size" in self._config:
embeddings_config["batch_size"] = self._config["batch_size"]
if "concurrency_limit" in self._config:
embeddings_config["concurrency_limit"] = self._config["concurrency_limit"]
if "max_retries" in self._config and self._config["max_retries"] is not None:
embeddings_config["max_retries"] = self._config["max_retries"]
if "delay_time" in self._config and self._config["delay_time"] is not None:
embeddings_config["delay_time"] = self._config["delay_time"]
if (
"retry_status_codes" in self._config
and self._config["retry_status_codes"] is not None
):
embeddings_config["retry_status_codes"] = self._config["retry_status_codes"]
if "credentials" in self._config and self._config["credentials"] is not None:
embeddings_config["credentials"] = self._config["credentials"]
else:
cred_config: dict = {}
if "url" in self._config and self._config["url"] is not None:
cred_config["url"] = self._config["url"]
if "api_key" in self._config and self._config["api_key"] is not None:
cred_config["api_key"] = self._config["api_key"]
if "name" in self._config and self._config["name"] is not None:
cred_config["name"] = self._config["name"]
if (
"iam_serviceid_crn" in self._config
and self._config["iam_serviceid_crn"] is not None
):
cred_config["iam_serviceid_crn"] = self._config["iam_serviceid_crn"]
if (
"trusted_profile_id" in self._config
and self._config["trusted_profile_id"] is not None
):
cred_config["trusted_profile_id"] = self._config["trusted_profile_id"]
if "token" in self._config and self._config["token"] is not None:
cred_config["token"] = self._config["token"]
if (
"projects_token" in self._config
and self._config["projects_token"] is not None
):
cred_config["projects_token"] = self._config["projects_token"]
if "username" in self._config and self._config["username"] is not None:
cred_config["username"] = self._config["username"]
if "password" in self._config and self._config["password"] is not None:
cred_config["password"] = self._config["password"]
if (
"instance_id" in self._config
and self._config["instance_id"] is not None
):
cred_config["instance_id"] = self._config["instance_id"]
if "version" in self._config and self._config["version"] is not None:
cred_config["version"] = self._config["version"]
if (
"bedrock_url" in self._config
and self._config["bedrock_url"] is not None
):
cred_config["bedrock_url"] = self._config["bedrock_url"]
if (
"platform_url" in self._config
and self._config["platform_url"] is not None
):
cred_config["platform_url"] = self._config["platform_url"]
if "proxies" in self._config and self._config["proxies"] is not None:
cred_config["proxies"] = self._config["proxies"]
if (
"verify" not in embeddings_config
and "verify" in self._config
and self._config["verify"] is not None
):
cred_config["verify"] = self._config["verify"]
if cred_config:
embeddings_config["credentials"] = Credentials(**cred_config)
if "params" not in embeddings_config:
embeddings_config["params"] = {
EmbedParams.TRUNCATE_INPUT_TOKENS: 3,
EmbedParams.RETURN_OPTIONS: {"input_text": True},
}
embedding = watson_models.Embeddings(**embeddings_config)
try:
embeddings = embedding.embed_documents(input)
return cast(Embeddings, embeddings)
except Exception as e:
print(f"Error during WatsonX embedding: {e}")
raise

View File

@@ -1,58 +0,0 @@
"""Type definitions for IBM WatsonX embedding providers."""
from typing import Annotated, Any, Literal
from typing_extensions import Required, TypedDict, deprecated
class WatsonXProviderConfig(TypedDict, total=False):
"""Configuration for WatsonX provider."""
model_id: str
url: str
params: dict[str, str | dict[str, str]]
credentials: Any
project_id: str
space_id: str
api_client: Any
verify: bool | str
persistent_connection: Annotated[bool, True]
batch_size: Annotated[int, 100]
concurrency_limit: Annotated[int, 10]
max_retries: int
delay_time: float
retry_status_codes: list[int]
api_key: str
name: str
iam_serviceid_crn: str
trusted_profile_id: str
token: str
projects_token: str
username: str
password: str
instance_id: str
version: str
bedrock_url: str
platform_url: str
proxies: dict
class WatsonXProviderSpec(TypedDict, total=False):
"""WatsonX provider specification."""
provider: Required[Literal["watsonx"]]
config: WatsonXProviderConfig
@deprecated(
'The "WatsonProviderSpec" provider spec is deprecated and will be removed in v1.0.0. Use "WatsonXProviderSpec" instead.'
)
class WatsonProviderSpec(TypedDict, total=False):
"""Watson provider specification (deprecated).
Notes:
- This is deprecated. Use WatsonXProviderSpec with provider="watsonx" instead.
"""
provider: Required[Literal["watson"]]
config: WatsonXProviderConfig

View File

@@ -1,142 +0,0 @@
"""IBM WatsonX embeddings provider."""
from typing import Any
from pydantic import Field, model_validator
from typing_extensions import Self
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
from crewai.rag.embeddings.providers.ibm.embedding_callable import (
WatsonXEmbeddingFunction,
)
class WatsonXProvider(BaseEmbeddingsProvider[WatsonXEmbeddingFunction]):
"""IBM WatsonX embeddings provider.
Note: Requires custom implementation as WatsonX uses a different interface.
"""
embedding_callable: type[WatsonXEmbeddingFunction] = Field(
default=WatsonXEmbeddingFunction, description="WatsonX embedding function class"
)
model_id: str = Field(
description="WatsonX model ID", validation_alias="EMBEDDINGS_WATSONX_MODEL_ID"
)
params: dict[str, str | dict[str, str]] | None = Field(
default=None, description="Additional parameters"
)
credentials: Any | None = Field(default=None, description="WatsonX credentials")
project_id: str | None = Field(
default=None,
description="WatsonX project ID",
validation_alias="EMBEDDINGS_WATSONX_PROJECT_ID",
)
space_id: str | None = Field(
default=None,
description="WatsonX space ID",
validation_alias="EMBEDDINGS_WATSONX_SPACE_ID",
)
api_client: Any | None = Field(default=None, description="WatsonX API client")
verify: bool | str | None = Field(
default=None,
description="SSL verification",
validation_alias="EMBEDDINGS_WATSONX_VERIFY",
)
persistent_connection: bool = Field(
default=True,
description="Use persistent connection",
validation_alias="EMBEDDINGS_WATSONX_PERSISTENT_CONNECTION",
)
batch_size: int = Field(
default=100,
description="Batch size for processing",
validation_alias="EMBEDDINGS_WATSONX_BATCH_SIZE",
)
concurrency_limit: int = Field(
default=10,
description="Concurrency limit",
validation_alias="EMBEDDINGS_WATSONX_CONCURRENCY_LIMIT",
)
max_retries: int | None = Field(
default=None,
description="Maximum retries",
validation_alias="EMBEDDINGS_WATSONX_MAX_RETRIES",
)
delay_time: float | None = Field(
default=None,
description="Delay time between retries",
validation_alias="EMBEDDINGS_WATSONX_DELAY_TIME",
)
retry_status_codes: list[int] | None = Field(
default=None, description="HTTP status codes to retry on"
)
url: str = Field(
description="WatsonX API URL", validation_alias="EMBEDDINGS_WATSONX_URL"
)
api_key: str = Field(
description="WatsonX API key", validation_alias="EMBEDDINGS_WATSONX_API_KEY"
)
name: str | None = Field(
default=None,
description="Service name",
validation_alias="EMBEDDINGS_WATSONX_NAME",
)
iam_serviceid_crn: str | None = Field(
default=None,
description="IAM service ID CRN",
validation_alias="EMBEDDINGS_WATSONX_IAM_SERVICEID_CRN",
)
trusted_profile_id: str | None = Field(
default=None,
description="Trusted profile ID",
validation_alias="EMBEDDINGS_WATSONX_TRUSTED_PROFILE_ID",
)
token: str | None = Field(
default=None,
description="Bearer token",
validation_alias="EMBEDDINGS_WATSONX_TOKEN",
)
projects_token: str | None = Field(
default=None,
description="Projects token",
validation_alias="EMBEDDINGS_WATSONX_PROJECTS_TOKEN",
)
username: str | None = Field(
default=None,
description="Username",
validation_alias="EMBEDDINGS_WATSONX_USERNAME",
)
password: str | None = Field(
default=None,
description="Password",
validation_alias="EMBEDDINGS_WATSONX_PASSWORD",
)
instance_id: str | None = Field(
default=None,
description="Service instance ID",
validation_alias="EMBEDDINGS_WATSONX_INSTANCE_ID",
)
version: str | None = Field(
default=None,
description="API version",
validation_alias="EMBEDDINGS_WATSONX_VERSION",
)
bedrock_url: str | None = Field(
default=None,
description="Bedrock URL",
validation_alias="EMBEDDINGS_WATSONX_BEDROCK_URL",
)
platform_url: str | None = Field(
default=None,
description="Platform URL",
validation_alias="EMBEDDINGS_WATSONX_PLATFORM_URL",
)
proxies: dict | None = Field(default=None, description="Proxy configuration")
@model_validator(mode="after")
def validate_space_or_project(self) -> Self:
"""Validate that either space_id or project_id is provided."""
if not self.space_id and not self.project_id:
raise ValueError("One of 'space_id' or 'project_id' must be provided")
return self

View File

@@ -1,15 +0,0 @@
"""Instructor embedding providers."""
from crewai.rag.embeddings.providers.instructor.instructor_provider import (
InstructorProvider,
)
from crewai.rag.embeddings.providers.instructor.types import (
InstructorProviderConfig,
InstructorProviderSpec,
)
__all__ = [
"InstructorProvider",
"InstructorProviderConfig",
"InstructorProviderSpec",
]

View File

@@ -1,32 +0,0 @@
"""Instructor embeddings provider."""
from chromadb.utils.embedding_functions.instructor_embedding_function import (
InstructorEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class InstructorProvider(BaseEmbeddingsProvider[InstructorEmbeddingFunction]):
"""Instructor embeddings provider."""
embedding_callable: type[InstructorEmbeddingFunction] = Field(
default=InstructorEmbeddingFunction,
description="Instructor embedding function class",
)
model_name: str = Field(
default="hkunlp/instructor-base",
description="Model name to use",
validation_alias="EMBEDDINGS_INSTRUCTOR_MODEL_NAME",
)
device: str = Field(
default="cpu",
description="Device to run model on (cpu or cuda)",
validation_alias="EMBEDDINGS_INSTRUCTOR_DEVICE",
)
instruction: str | None = Field(
default=None,
description="Instruction for embeddings",
validation_alias="EMBEDDINGS_INSTRUCTOR_INSTRUCTION",
)

View File

@@ -1,20 +0,0 @@
"""Type definitions for Instructor embedding providers."""
from typing import Annotated, Literal
from typing_extensions import Required, TypedDict
class InstructorProviderConfig(TypedDict, total=False):
"""Configuration for Instructor provider."""
model_name: Annotated[str, "hkunlp/instructor-base"]
device: Annotated[str, "cpu"]
instruction: str
class InstructorProviderSpec(TypedDict, total=False):
"""Instructor provider specification."""
provider: Required[Literal["instructor"]]
config: InstructorProviderConfig

View File

@@ -1,13 +0,0 @@
"""Jina embedding providers."""
from crewai.rag.embeddings.providers.jina.jina_provider import JinaProvider
from crewai.rag.embeddings.providers.jina.types import (
JinaProviderConfig,
JinaProviderSpec,
)
__all__ = [
"JinaProvider",
"JinaProviderConfig",
"JinaProviderSpec",
]

View File

@@ -1,24 +0,0 @@
"""Jina embeddings provider."""
from chromadb.utils.embedding_functions.jina_embedding_function import (
JinaEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class JinaProvider(BaseEmbeddingsProvider[JinaEmbeddingFunction]):
"""Jina embeddings provider."""
embedding_callable: type[JinaEmbeddingFunction] = Field(
default=JinaEmbeddingFunction, description="Jina embedding function class"
)
api_key: str = Field(
description="Jina API key", validation_alias="EMBEDDINGS_JINA_API_KEY"
)
model_name: str = Field(
default="jina-embeddings-v2-base-en",
description="Model name to use for embeddings",
validation_alias="EMBEDDINGS_JINA_MODEL_NAME",
)

View File

@@ -1,19 +0,0 @@
"""Type definitions for Jina embedding providers."""
from typing import Annotated, Literal
from typing_extensions import Required, TypedDict
class JinaProviderConfig(TypedDict, total=False):
"""Configuration for Jina provider."""
api_key: str
model_name: Annotated[str, "jina-embeddings-v2-base-en"]
class JinaProviderSpec(TypedDict, total=False):
"""Jina provider specification."""
provider: Required[Literal["jina"]]
config: JinaProviderConfig

View File

@@ -1,15 +0,0 @@
"""Microsoft embedding providers."""
from crewai.rag.embeddings.providers.microsoft.azure import (
AzureProvider,
)
from crewai.rag.embeddings.providers.microsoft.types import (
AzureProviderConfig,
AzureProviderSpec,
)
__all__ = [
"AzureProvider",
"AzureProviderConfig",
"AzureProviderSpec",
]

View File

@@ -1,60 +0,0 @@
"""Azure OpenAI embeddings provider."""
from typing import Any
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class AzureProvider(BaseEmbeddingsProvider[OpenAIEmbeddingFunction]):
"""Azure OpenAI embeddings provider."""
embedding_callable: type[OpenAIEmbeddingFunction] = Field(
default=OpenAIEmbeddingFunction,
description="Azure OpenAI embedding function class",
)
api_key: str = Field(
description="Azure API key", validation_alias="EMBEDDINGS_OPENAI_API_KEY"
)
api_base: str | None = Field(
default=None,
description="Azure endpoint URL",
validation_alias="EMBEDDINGS_OPENAI_API_BASE",
)
api_type: str = Field(
default="azure",
description="API type for Azure",
validation_alias="EMBEDDINGS_OPENAI_API_TYPE",
)
api_version: str | None = Field(
default=None,
description="Azure API version",
validation_alias="EMBEDDINGS_OPENAI_API_VERSION",
)
model_name: str = Field(
default="text-embedding-ada-002",
description="Model name to use for embeddings",
validation_alias="EMBEDDINGS_OPENAI_MODEL_NAME",
)
default_headers: dict[str, Any] | None = Field(
default=None, description="Default headers for API requests"
)
dimensions: int | None = Field(
default=None,
description="Embedding dimensions",
validation_alias="EMBEDDINGS_OPENAI_DIMENSIONS",
)
deployment_id: str | None = Field(
default=None,
description="Azure deployment ID",
validation_alias="EMBEDDINGS_OPENAI_DEPLOYMENT_ID",
)
organization_id: str | None = Field(
default=None,
description="Organization ID",
validation_alias="EMBEDDINGS_OPENAI_ORGANIZATION_ID",
)

View File

@@ -1,26 +0,0 @@
"""Type definitions for Microsoft Azure embedding providers."""
from typing import Annotated, Any, Literal
from typing_extensions import Required, TypedDict
class AzureProviderConfig(TypedDict, total=False):
"""Configuration for Azure provider."""
api_key: str
api_base: str
api_type: Annotated[str, "azure"]
api_version: str
model_name: Annotated[str, "text-embedding-ada-002"]
default_headers: dict[str, Any]
dimensions: int
deployment_id: str
organization_id: str
class AzureProviderSpec(TypedDict, total=False):
"""Azure provider specification."""
provider: Required[Literal["azure"]]
config: AzureProviderConfig

View File

@@ -1,15 +0,0 @@
"""Ollama embedding providers."""
from crewai.rag.embeddings.providers.ollama.ollama_provider import (
OllamaProvider,
)
from crewai.rag.embeddings.providers.ollama.types import (
OllamaProviderConfig,
OllamaProviderSpec,
)
__all__ = [
"OllamaProvider",
"OllamaProviderConfig",
"OllamaProviderSpec",
]

View File

@@ -1,25 +0,0 @@
"""Ollama embeddings provider."""
from chromadb.utils.embedding_functions.ollama_embedding_function import (
OllamaEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class OllamaProvider(BaseEmbeddingsProvider[OllamaEmbeddingFunction]):
"""Ollama embeddings provider."""
embedding_callable: type[OllamaEmbeddingFunction] = Field(
default=OllamaEmbeddingFunction, description="Ollama embedding function class"
)
url: str = Field(
default="http://localhost:11434/api/embeddings",
description="Ollama API endpoint URL",
validation_alias="EMBEDDINGS_OLLAMA_URL",
)
model_name: str = Field(
description="Model name to use for embeddings",
validation_alias="EMBEDDINGS_OLLAMA_MODEL_NAME",
)

View File

@@ -1,19 +0,0 @@
"""Type definitions for Ollama embedding providers."""
from typing import Annotated, Literal
from typing_extensions import Required, TypedDict
class OllamaProviderConfig(TypedDict, total=False):
"""Configuration for Ollama provider."""
url: Annotated[str, "http://localhost:11434/api/embeddings"]
model_name: str
class OllamaProviderSpec(TypedDict, total=False):
"""Ollama provider specification."""
provider: Required[Literal["ollama"]]
config: OllamaProviderConfig

View File

@@ -1,13 +0,0 @@
"""ONNX embedding providers."""
from crewai.rag.embeddings.providers.onnx.onnx_provider import ONNXProvider
from crewai.rag.embeddings.providers.onnx.types import (
ONNXProviderConfig,
ONNXProviderSpec,
)
__all__ = [
"ONNXProvider",
"ONNXProviderConfig",
"ONNXProviderSpec",
]

View File

@@ -1,19 +0,0 @@
"""ONNX embeddings provider."""
from chromadb.utils.embedding_functions.onnx_mini_lm_l6_v2 import ONNXMiniLM_L6_V2
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class ONNXProvider(BaseEmbeddingsProvider[ONNXMiniLM_L6_V2]):
"""ONNX embeddings provider."""
embedding_callable: type[ONNXMiniLM_L6_V2] = Field(
default=ONNXMiniLM_L6_V2, description="ONNX MiniLM embedding function class"
)
preferred_providers: list[str] | None = Field(
default=None,
description="Preferred ONNX execution providers",
validation_alias="EMBEDDINGS_ONNX_PREFERRED_PROVIDERS",
)

View File

@@ -1,18 +0,0 @@
"""Type definitions for ONNX embedding providers."""
from typing import Literal
from typing_extensions import Required, TypedDict
class ONNXProviderConfig(TypedDict, total=False):
"""Configuration for ONNX provider."""
preferred_providers: list[str]
class ONNXProviderSpec(TypedDict, total=False):
"""ONNX provider specification."""
provider: Required[Literal["onnx"]]
config: ONNXProviderConfig

View File

@@ -1,15 +0,0 @@
"""OpenAI embedding providers."""
from crewai.rag.embeddings.providers.openai.openai_provider import (
OpenAIProvider,
)
from crewai.rag.embeddings.providers.openai.types import (
OpenAIProviderConfig,
OpenAIProviderSpec,
)
__all__ = [
"OpenAIProvider",
"OpenAIProviderConfig",
"OpenAIProviderSpec",
]

View File

@@ -1,62 +0,0 @@
"""OpenAI embeddings provider."""
from typing import Any
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class OpenAIProvider(BaseEmbeddingsProvider[OpenAIEmbeddingFunction]):
"""OpenAI embeddings provider."""
embedding_callable: type[OpenAIEmbeddingFunction] = Field(
default=OpenAIEmbeddingFunction,
description="OpenAI embedding function class",
)
api_key: str | None = Field(
default=None,
description="OpenAI API key",
validation_alias="EMBEDDINGS_OPENAI_API_KEY",
)
model_name: str = Field(
default="text-embedding-ada-002",
description="Model name to use for embeddings",
validation_alias="EMBEDDINGS_OPENAI_MODEL_NAME",
)
api_base: str | None = Field(
default=None,
description="Base URL for API requests",
validation_alias="EMBEDDINGS_OPENAI_API_BASE",
)
api_type: str | None = Field(
default=None,
description="API type (e.g., 'azure')",
validation_alias="EMBEDDINGS_OPENAI_API_TYPE",
)
api_version: str | None = Field(
default=None,
description="API version",
validation_alias="EMBEDDINGS_OPENAI_API_VERSION",
)
default_headers: dict[str, Any] | None = Field(
default=None, description="Default headers for API requests"
)
dimensions: int | None = Field(
default=None,
description="Embedding dimensions",
validation_alias="EMBEDDINGS_OPENAI_DIMENSIONS",
)
deployment_id: str | None = Field(
default=None,
description="Azure deployment ID",
validation_alias="EMBEDDINGS_OPENAI_DEPLOYMENT_ID",
)
organization_id: str | None = Field(
default=None,
description="OpenAI organization ID",
validation_alias="EMBEDDINGS_OPENAI_ORGANIZATION_ID",
)

View File

@@ -1,26 +0,0 @@
"""Type definitions for OpenAI embedding providers."""
from typing import Annotated, Any, Literal
from typing_extensions import Required, TypedDict
class OpenAIProviderConfig(TypedDict, total=False):
"""Configuration for OpenAI provider."""
api_key: str
model_name: Annotated[str, "text-embedding-ada-002"]
api_base: str
api_type: str
api_version: str
default_headers: dict[str, Any]
dimensions: int
deployment_id: str
organization_id: str
class OpenAIProviderSpec(TypedDict, total=False):
"""OpenAI provider specification."""
provider: Required[Literal["openai"]]
config: OpenAIProviderConfig

View File

@@ -1,15 +0,0 @@
"""OpenCLIP embedding providers."""
from crewai.rag.embeddings.providers.openclip.openclip_provider import (
OpenCLIPProvider,
)
from crewai.rag.embeddings.providers.openclip.types import (
OpenCLIPProviderConfig,
OpenCLIPProviderSpec,
)
__all__ = [
"OpenCLIPProvider",
"OpenCLIPProviderConfig",
"OpenCLIPProviderSpec",
]

View File

@@ -1,32 +0,0 @@
"""OpenCLIP embeddings provider."""
from chromadb.utils.embedding_functions.open_clip_embedding_function import (
OpenCLIPEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class OpenCLIPProvider(BaseEmbeddingsProvider[OpenCLIPEmbeddingFunction]):
"""OpenCLIP embeddings provider."""
embedding_callable: type[OpenCLIPEmbeddingFunction] = Field(
default=OpenCLIPEmbeddingFunction,
description="OpenCLIP embedding function class",
)
model_name: str = Field(
default="ViT-B-32",
description="Model name to use",
validation_alias="EMBEDDINGS_OPENCLIP_MODEL_NAME",
)
checkpoint: str = Field(
default="laion2b_s34b_b79k",
description="Model checkpoint",
validation_alias="EMBEDDINGS_OPENCLIP_CHECKPOINT",
)
device: str | None = Field(
default="cpu",
description="Device to run model on",
validation_alias="EMBEDDINGS_OPENCLIP_DEVICE",
)

View File

@@ -1,20 +0,0 @@
"""Type definitions for OpenCLIP embedding providers."""
from typing import Annotated, Literal
from typing_extensions import Required, TypedDict
class OpenCLIPProviderConfig(TypedDict, total=False):
"""Configuration for OpenCLIP provider."""
model_name: Annotated[str, "ViT-B-32"]
checkpoint: Annotated[str, "laion2b_s34b_b79k"]
device: Annotated[str, "cpu"]
class OpenCLIPProviderSpec(TypedDict):
"""OpenCLIP provider specification."""
provider: Required[Literal["openclip"]]
config: OpenCLIPProviderConfig

View File

@@ -1,15 +0,0 @@
"""Roboflow embedding providers."""
from crewai.rag.embeddings.providers.roboflow.roboflow_provider import (
RoboflowProvider,
)
from crewai.rag.embeddings.providers.roboflow.types import (
RoboflowProviderConfig,
RoboflowProviderSpec,
)
__all__ = [
"RoboflowProvider",
"RoboflowProviderConfig",
"RoboflowProviderSpec",
]

View File

@@ -1,27 +0,0 @@
"""Roboflow embeddings provider."""
from chromadb.utils.embedding_functions.roboflow_embedding_function import (
RoboflowEmbeddingFunction,
)
from pydantic import Field
from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider
class RoboflowProvider(BaseEmbeddingsProvider[RoboflowEmbeddingFunction]):
"""Roboflow embeddings provider."""
embedding_callable: type[RoboflowEmbeddingFunction] = Field(
default=RoboflowEmbeddingFunction,
description="Roboflow embedding function class",
)
api_key: str = Field(
default="",
description="Roboflow API key",
validation_alias="EMBEDDINGS_ROBOFLOW_API_KEY",
)
api_url: str = Field(
default="https://infer.roboflow.com",
description="Roboflow API URL",
validation_alias="EMBEDDINGS_ROBOFLOW_API_URL",
)

View File

@@ -1,19 +0,0 @@
"""Type definitions for Roboflow embedding providers."""
from typing import Annotated, Literal
from typing_extensions import Required, TypedDict
class RoboflowProviderConfig(TypedDict, total=False):
"""Configuration for Roboflow provider."""
api_key: Annotated[str, ""]
api_url: Annotated[str, "https://infer.roboflow.com"]
class RoboflowProviderSpec(TypedDict):
"""Roboflow provider specification."""
provider: Required[Literal["roboflow"]]
config: RoboflowProviderConfig

View File

@@ -1,15 +0,0 @@
"""SentenceTransformer embedding providers."""
from crewai.rag.embeddings.providers.sentence_transformer.sentence_transformer_provider import (
SentenceTransformerProvider,
)
from crewai.rag.embeddings.providers.sentence_transformer.types import (
SentenceTransformerProviderConfig,
SentenceTransformerProviderSpec,
)
__all__ = [
"SentenceTransformerProvider",
"SentenceTransformerProviderConfig",
"SentenceTransformerProviderSpec",
]

Some files were not shown because too many files have changed in this diff Show More