Compare commits

..

4 Commits

Author SHA1 Message Date
Devin AI
b70c4499d7 fix: emit tool call events in provider-specific LLM streaming implementations
Fixes #3982

This commit adds tool call event emission to all provider-specific LLM
streaming implementations. Previously, only text chunks were emitted
during streaming, but tool call information was missing.

Changes:
- Update BaseLLM._emit_stream_chunk_event to infer call_type from
  tool_call presence when not explicitly provided
- Add tool call event emission in OpenAI provider streaming
- Add tool call event emission in Azure provider streaming
- Add tool call event emission in Gemini provider streaming
- Add tool call event emission in Bedrock provider streaming
- Add tool call event emission in Anthropic provider streaming
- Add comprehensive tests for tool call streaming events

The fix ensures that LLMStreamChunkEvent is emitted with:
- call_type=LLMCallType.TOOL_CALL when tool calls are received
- tool_call dict containing id, function (name, arguments), type, index
- chunk containing the tool call arguments being streamed

Co-Authored-By: João <joao@crewai.com>
2025-11-27 07:19:36 +00:00
Greyson LaLonde
2025a26fc3 fix: ensure parameters in RagTool.add, add typing, tests (#3979)
Some checks failed
Mark stale issues and pull requests / stale (push) Has been cancelled
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Build uv cache / build-cache (3.10) (push) Has been cancelled
Build uv cache / build-cache (3.11) (push) Has been cancelled
Build uv cache / build-cache (3.12) (push) Has been cancelled
Build uv cache / build-cache (3.13) (push) Has been cancelled
* fix: ensure parameters in RagTool.add, add typing, tests

* feat: substitute pymupdf for pypdf, better parsing performance

---------

Co-authored-by: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com>
2025-11-26 22:32:43 -08:00
Greyson LaLonde
bed9a3847a fix: remove invalid param from sse client (#3980) 2025-11-26 21:37:55 -08:00
Heitor Carvalho
5239dc9859 fix: erase 'oauth2_extra' setting on 'crewai config reset' command
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
2025-11-26 18:43:44 -05:00
19 changed files with 1172 additions and 84 deletions

View File

@@ -16,9 +16,9 @@ dependencies = [
"lancedb>=0.5.4",
"tiktoken>=0.8.0",
"beautifulsoup4>=4.13.4",
"pypdf>=5.9.0",
"python-docx>=1.2.0",
"youtube-transcript-api>=1.2.2",
"pymupdf>=1.26.6",
]

View File

@@ -3,8 +3,7 @@
from __future__ import annotations
import hashlib
from pathlib import Path
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast
from typing import TYPE_CHECKING, Any, cast
import uuid
from crewai.rag.config.types import RagConfigType
@@ -19,15 +18,13 @@ from typing_extensions import TypeIs, Unpack
from crewai_tools.rag.data_types import DataType
from crewai_tools.rag.misc import sanitize_metadata_for_chromadb
from crewai_tools.tools.rag.rag_tool import Adapter
from crewai_tools.tools.rag.types import AddDocumentParams, ContentItem
if TYPE_CHECKING:
from crewai.rag.qdrant.config import QdrantConfig
ContentItem: TypeAlias = str | Path | dict[str, Any]
def _is_qdrant_config(config: Any) -> TypeIs[QdrantConfig]:
"""Check if config is a QdrantConfig using safe duck typing.
@@ -46,19 +43,6 @@ def _is_qdrant_config(config: Any) -> TypeIs[QdrantConfig]:
return False
class AddDocumentParams(TypedDict, total=False):
"""Parameters for adding documents to the RAG system."""
data_type: DataType
metadata: dict[str, Any]
website: str
url: str
file_path: str | Path
github_url: str
youtube_url: str
directory_path: str | Path
class CrewAIRagAdapter(Adapter):
"""Adapter that uses CrewAI's native RAG system.
@@ -131,13 +115,26 @@ class CrewAIRagAdapter(Adapter):
def add(self, *args: ContentItem, **kwargs: Unpack[AddDocumentParams]) -> None:
"""Add content to the knowledge base.
This method handles various input types and converts them to documents
for the vector database. It supports the data_type parameter for
compatibility with existing tools.
Args:
*args: Content items to add (strings, paths, or document dicts)
**kwargs: Additional parameters including data_type, metadata, etc.
**kwargs: Additional parameters including:
- data_type: DataType enum or string (e.g., "file", "pdf_file", "text")
- path: Path to file or directory (alternative to positional arg)
- file_path: Alias for path
- metadata: Additional metadata to attach to documents
- url: URL to fetch content from
- website: Website URL to scrape
- github_url: GitHub repository URL
- youtube_url: YouTube video URL
- directory_path: Path to directory
Examples:
rag_tool.add("path/to/document.pdf", data_type=DataType.PDF_FILE)
rag_tool.add(path="path/to/document.pdf", data_type="file")
rag_tool.add(file_path="path/to/document.pdf", data_type="pdf_file")
rag_tool.add("path/to/document.pdf") # auto-detects PDF
"""
import os
@@ -146,10 +143,54 @@ class CrewAIRagAdapter(Adapter):
from crewai_tools.rag.source_content import SourceContent
documents: list[BaseRecord] = []
data_type: DataType | None = kwargs.get("data_type")
raw_data_type = kwargs.get("data_type")
base_metadata: dict[str, Any] = kwargs.get("metadata", {})
for arg in args:
data_type: DataType | None = None
if raw_data_type is not None:
if isinstance(raw_data_type, DataType):
if raw_data_type != DataType.FILE:
data_type = raw_data_type
elif isinstance(raw_data_type, str):
if raw_data_type != "file":
try:
data_type = DataType(raw_data_type)
except ValueError:
raise ValueError(
f"Invalid data_type: '{raw_data_type}'. "
f"Valid values are: 'file' (auto-detect), or one of: "
f"{', '.join(dt.value for dt in DataType)}"
) from None
content_items: list[ContentItem] = list(args)
path_value = kwargs.get("path") or kwargs.get("file_path")
if path_value is not None:
content_items.append(path_value)
if url := kwargs.get("url"):
content_items.append(url)
if website := kwargs.get("website"):
content_items.append(website)
if github_url := kwargs.get("github_url"):
content_items.append(github_url)
if youtube_url := kwargs.get("youtube_url"):
content_items.append(youtube_url)
if directory_path := kwargs.get("directory_path"):
content_items.append(directory_path)
file_extensions = {
".pdf",
".txt",
".csv",
".json",
".xml",
".docx",
".mdx",
".md",
}
for arg in content_items:
source_ref: str
if isinstance(arg, dict):
source_ref = str(arg.get("source", arg.get("content", "")))
@@ -157,6 +198,14 @@ class CrewAIRagAdapter(Adapter):
source_ref = str(arg)
if not data_type:
ext = os.path.splitext(source_ref)[1].lower()
is_url = source_ref.startswith(("http://", "https://", "file://"))
if (
ext in file_extensions
and not is_url
and not os.path.isfile(source_ref)
):
raise FileNotFoundError(f"File does not exist: {source_ref}")
data_type = DataTypes.from_content(source_ref)
if data_type == DataType.DIRECTORY:

View File

@@ -1,6 +1,8 @@
from enum import Enum
from importlib import import_module
import os
from pathlib import Path
from typing import cast
from urllib.parse import urlparse
from crewai_tools.rag.base_loader import BaseLoader
@@ -8,6 +10,7 @@ from crewai_tools.rag.chunkers.base_chunker import BaseChunker
class DataType(str, Enum):
FILE = "file"
PDF_FILE = "pdf_file"
TEXT_FILE = "text_file"
CSV = "csv"
@@ -15,22 +18,14 @@ class DataType(str, Enum):
XML = "xml"
DOCX = "docx"
MDX = "mdx"
# Database types
MYSQL = "mysql"
POSTGRES = "postgres"
# Repository types
GITHUB = "github"
DIRECTORY = "directory"
# Web types
WEBSITE = "website"
DOCS_SITE = "docs_site"
YOUTUBE_VIDEO = "youtube_video"
YOUTUBE_CHANNEL = "youtube_channel"
# Raw types
TEXT = "text"
def get_chunker(self) -> BaseChunker:
@@ -63,13 +58,11 @@ class DataType(str, Enum):
try:
module = import_module(module_path)
return getattr(module, class_name)()
return cast(BaseChunker, getattr(module, class_name)())
except Exception as e:
raise ValueError(f"Error loading chunker for {self}: {e}") from e
def get_loader(self) -> BaseLoader:
from importlib import import_module
loaders = {
DataType.PDF_FILE: ("pdf_loader", "PDFLoader"),
DataType.TEXT_FILE: ("text_loader", "TextFileLoader"),
@@ -98,7 +91,7 @@ class DataType(str, Enum):
module_path = f"crewai_tools.rag.loaders.{module_name}"
try:
module = import_module(module_path)
return getattr(module, class_name)()
return cast(BaseLoader, getattr(module, class_name)())
except Exception as e:
raise ValueError(f"Error loading loader for {self}: {e}") from e

View File

@@ -2,70 +2,112 @@
import os
from pathlib import Path
from typing import Any
from typing import Any, cast
from urllib.parse import urlparse
import urllib.request
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
from crewai_tools.rag.source_content import SourceContent
class PDFLoader(BaseLoader):
"""Loader for PDF files."""
"""Loader for PDF files and URLs."""
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override]
"""Load and extract text from a PDF file.
@staticmethod
def _is_url(path: str) -> bool:
"""Check if the path is a URL."""
try:
parsed = urlparse(path)
return parsed.scheme in ("http", "https")
except Exception:
return False
@staticmethod
def _download_pdf(url: str) -> bytes:
"""Download PDF content from a URL.
Args:
source: The source content containing the PDF file path
url: The URL to download from.
Returns:
LoaderResult with extracted text content
The PDF content as bytes.
Raises:
FileNotFoundError: If the PDF file doesn't exist
ImportError: If required PDF libraries aren't installed
ValueError: If the download fails.
"""
try:
with urllib.request.urlopen(url, timeout=30) as response: # noqa: S310
return cast(bytes, response.read())
except Exception as e:
raise ValueError(f"Failed to download PDF from {url}: {e!s}") from e
def load(self, source: SourceContent, **kwargs: Any) -> LoaderResult: # type: ignore[override]
"""Load and extract text from a PDF file or URL.
Args:
source: The source content containing the PDF file path or URL.
Returns:
LoaderResult with extracted text content.
Raises:
FileNotFoundError: If the PDF file doesn't exist.
ImportError: If required PDF libraries aren't installed.
ValueError: If the PDF cannot be read or downloaded.
"""
try:
import pypdf
except ImportError:
try:
import PyPDF2 as pypdf # type: ignore[import-not-found,no-redef] # noqa: N813
except ImportError as e:
raise ImportError(
"PDF support requires pypdf or PyPDF2. Install with: uv add pypdf"
) from e
import pymupdf # type: ignore[import-untyped]
except ImportError as e:
raise ImportError(
"PDF support requires pymupdf. Install with: uv add pymupdf"
) from e
file_path = source.source
is_url = self._is_url(file_path)
if not os.path.isfile(file_path):
raise FileNotFoundError(f"PDF file not found: {file_path}")
if is_url:
source_name = Path(urlparse(file_path).path).name or "downloaded.pdf"
else:
source_name = Path(file_path).name
text_content = []
text_content: list[str] = []
metadata: dict[str, Any] = {
"source": str(file_path),
"file_name": Path(file_path).name,
"source": file_path,
"file_name": source_name,
"file_type": "pdf",
}
try:
with open(file_path, "rb") as file:
pdf_reader = pypdf.PdfReader(file)
metadata["num_pages"] = len(pdf_reader.pages)
if is_url:
pdf_bytes = self._download_pdf(file_path)
doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
else:
if not os.path.isfile(file_path):
raise FileNotFoundError(f"PDF file not found: {file_path}")
doc = pymupdf.open(file_path)
for page_num, page in enumerate(pdf_reader.pages, 1):
page_text = page.extract_text()
if page_text.strip():
text_content.append(f"Page {page_num}:\n{page_text}")
metadata["num_pages"] = len(doc)
for page_num, page in enumerate(doc, 1):
page_text = page.get_text()
if page_text.strip():
text_content.append(f"Page {page_num}:\n{page_text}")
doc.close()
except FileNotFoundError:
raise
except Exception as e:
raise ValueError(f"Error reading PDF file {file_path}: {e!s}") from e
raise ValueError(f"Error reading PDF from {file_path}: {e!s}") from e
if not text_content:
content = f"[PDF file with no extractable text: {Path(file_path).name}]"
content = f"[PDF file with no extractable text: {source_name}]"
else:
content = "\n\n".join(text_content)
return LoaderResult(
content=content,
source=str(file_path),
source=file_path,
metadata=metadata,
doc_id=self.generate_doc_id(source_ref=str(file_path), content=content),
doc_id=self.generate_doc_id(source_ref=file_path, content=content),
)

View File

@@ -14,9 +14,14 @@ from pydantic import (
field_validator,
model_validator,
)
from typing_extensions import Self
from typing_extensions import Self, Unpack
from crewai_tools.tools.rag.types import RagToolConfig, VectorDbConfig
from crewai_tools.tools.rag.types import (
AddDocumentParams,
ContentItem,
RagToolConfig,
VectorDbConfig,
)
def _validate_embedding_config(
@@ -72,6 +77,8 @@ def _validate_embedding_config(
class Adapter(BaseModel, ABC):
"""Abstract base class for RAG adapters."""
model_config = ConfigDict(arbitrary_types_allowed=True)
@abstractmethod
@@ -86,8 +93,8 @@ class Adapter(BaseModel, ABC):
@abstractmethod
def add(
self,
*args: Any,
**kwargs: Any,
*args: ContentItem,
**kwargs: Unpack[AddDocumentParams],
) -> None:
"""Add content to the knowledge base."""
@@ -102,7 +109,11 @@ class RagTool(BaseTool):
) -> str:
raise NotImplementedError
def add(self, *args: Any, **kwargs: Any) -> None:
def add(
self,
*args: ContentItem,
**kwargs: Unpack[AddDocumentParams],
) -> None:
raise NotImplementedError
name: str = "Knowledge base"
@@ -207,9 +218,34 @@ class RagTool(BaseTool):
def add(
self,
*args: Any,
**kwargs: Any,
*args: ContentItem,
**kwargs: Unpack[AddDocumentParams],
) -> None:
"""Add content to the knowledge base.
Args:
*args: Content items to add (strings, paths, or document dicts)
data_type: DataType enum or string (e.g., "file", "pdf_file", "text")
path: Path to file or directory, alias to positional arg
file_path: Alias for path
metadata: Additional metadata to attach to documents
url: URL to fetch content from
website: Website URL to scrape
github_url: GitHub repository URL
youtube_url: YouTube video URL
directory_path: Path to directory
Examples:
rag_tool.add("path/to/document.pdf", data_type=DataType.PDF_FILE)
# Keyword argument (documented API)
rag_tool.add(path="path/to/document.pdf", data_type="file")
rag_tool.add(file_path="path/to/document.pdf", data_type="pdf_file")
# Auto-detect type from extension
rag_tool.add("path/to/document.pdf") # auto-detects PDF
"""
self.adapter.add(*args, **kwargs)
def _run(

View File

@@ -1,10 +1,50 @@
"""Type definitions for RAG tool configuration."""
from typing import Any, Literal
from pathlib import Path
from typing import Any, Literal, TypeAlias
from crewai.rag.embeddings.types import ProviderSpec
from typing_extensions import TypedDict
from crewai_tools.rag.data_types import DataType
DataTypeStr: TypeAlias = Literal[
"file",
"pdf_file",
"text_file",
"csv",
"json",
"xml",
"docx",
"mdx",
"mysql",
"postgres",
"github",
"directory",
"website",
"docs_site",
"youtube_video",
"youtube_channel",
"text",
]
ContentItem: TypeAlias = str | Path | dict[str, Any]
class AddDocumentParams(TypedDict, total=False):
"""Parameters for adding documents to the RAG system."""
data_type: DataType | DataTypeStr
metadata: dict[str, Any]
path: str | Path
file_path: str | Path
website: str
url: str
github_url: str
youtube_url: str
directory_path: str | Path
class VectorDbConfig(TypedDict):
"""Configuration for vector database provider.

View File

@@ -0,0 +1,471 @@
"""Tests for RagTool.add() method with various data_type values."""
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import MagicMock, Mock, patch
import pytest
from crewai_tools.rag.data_types import DataType
from crewai_tools.tools.rag.rag_tool import RagTool
@pytest.fixture
def mock_rag_client() -> MagicMock:
"""Create a mock RAG client for testing."""
mock_client = MagicMock()
mock_client.get_or_create_collection = MagicMock(return_value=None)
mock_client.add_documents = MagicMock(return_value=None)
mock_client.search = MagicMock(return_value=[])
return mock_client
@pytest.fixture
def rag_tool(mock_rag_client: MagicMock) -> RagTool:
"""Create a RagTool instance with mocked client."""
with (
patch(
"crewai_tools.adapters.crewai_rag_adapter.get_rag_client",
return_value=mock_rag_client,
),
patch(
"crewai_tools.adapters.crewai_rag_adapter.create_client",
return_value=mock_rag_client,
),
):
return RagTool()
class TestDataTypeFileAlias:
"""Tests for data_type='file' alias."""
def test_file_alias_with_existing_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test that data_type='file' works with existing files."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Test content for file alias.")
rag_tool.add(path=str(test_file), data_type="file")
assert mock_rag_client.add_documents.called
def test_file_alias_with_nonexistent_file_raises_error(
self, rag_tool: RagTool
) -> None:
"""Test that data_type='file' raises FileNotFoundError for missing files."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent/path/to/file.pdf", data_type="file")
def test_file_alias_with_path_keyword(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test that path keyword argument works with data_type='file'."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "document.txt"
test_file.write_text("Content via path keyword.")
rag_tool.add(data_type="file", path=str(test_file))
assert mock_rag_client.add_documents.called
def test_file_alias_with_file_path_keyword(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test that file_path keyword argument works with data_type='file'."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "document.txt"
test_file.write_text("Content via file_path keyword.")
rag_tool.add(data_type="file", file_path=str(test_file))
assert mock_rag_client.add_documents.called
class TestDataTypeStringValues:
"""Tests for data_type as string values matching DataType enum."""
def test_pdf_file_string(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type='pdf_file' with existing PDF file."""
with TemporaryDirectory() as tmpdir:
# Create a minimal valid PDF file
test_file = Path(tmpdir) / "test.pdf"
test_file.write_bytes(
b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj\ntrailer\n"
b"<<\n/Root 1 0 R\n>>\n%%EOF"
)
# Mock the PDF loader to avoid actual PDF parsing
with patch(
"crewai_tools.adapters.crewai_rag_adapter.DataType.get_loader"
) as mock_loader:
mock_loader_instance = MagicMock()
mock_loader_instance.load.return_value = MagicMock(
content="PDF content", metadata={}, doc_id="test-id"
)
mock_loader.return_value = mock_loader_instance
rag_tool.add(path=str(test_file), data_type="pdf_file")
assert mock_rag_client.add_documents.called
def test_text_file_string(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type='text_file' with existing text file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Plain text content.")
rag_tool.add(path=str(test_file), data_type="text_file")
assert mock_rag_client.add_documents.called
def test_csv_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='csv' with existing CSV file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.csv"
test_file.write_text("name,value\nfoo,1\nbar,2")
rag_tool.add(path=str(test_file), data_type="csv")
assert mock_rag_client.add_documents.called
def test_json_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='json' with existing JSON file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.json"
test_file.write_text('{"key": "value", "items": [1, 2, 3]}')
rag_tool.add(path=str(test_file), data_type="json")
assert mock_rag_client.add_documents.called
def test_xml_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='xml' with existing XML file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.xml"
test_file.write_text('<?xml version="1.0"?><root><item>value</item></root>')
rag_tool.add(path=str(test_file), data_type="xml")
assert mock_rag_client.add_documents.called
def test_mdx_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='mdx' with existing MDX file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.mdx"
test_file.write_text("# Heading\n\nSome markdown content.")
rag_tool.add(path=str(test_file), data_type="mdx")
assert mock_rag_client.add_documents.called
def test_text_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='text' with raw text content."""
rag_tool.add("This is raw text content.", data_type="text")
assert mock_rag_client.add_documents.called
def test_directory_string(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type='directory' with existing directory."""
with TemporaryDirectory() as tmpdir:
# Create some files in the directory
(Path(tmpdir) / "file1.txt").write_text("Content 1")
(Path(tmpdir) / "file2.txt").write_text("Content 2")
rag_tool.add(path=tmpdir, data_type="directory")
assert mock_rag_client.add_documents.called
class TestDataTypeEnumValues:
"""Tests for data_type as DataType enum values."""
def test_datatype_file_enum_with_existing_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.FILE with existing file (auto-detect)."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("File enum auto-detect content.")
rag_tool.add(str(test_file), data_type=DataType.FILE)
assert mock_rag_client.add_documents.called
def test_datatype_file_enum_with_nonexistent_file_raises_error(
self, rag_tool: RagTool
) -> None:
"""Test data_type=DataType.FILE raises FileNotFoundError for missing files."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add("nonexistent/file.pdf", data_type=DataType.FILE)
def test_datatype_pdf_file_enum(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.PDF_FILE with existing file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.pdf"
test_file.write_bytes(
b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj\ntrailer\n"
b"<<\n/Root 1 0 R\n>>\n%%EOF"
)
with patch(
"crewai_tools.adapters.crewai_rag_adapter.DataType.get_loader"
) as mock_loader:
mock_loader_instance = MagicMock()
mock_loader_instance.load.return_value = MagicMock(
content="PDF content", metadata={}, doc_id="test-id"
)
mock_loader.return_value = mock_loader_instance
rag_tool.add(str(test_file), data_type=DataType.PDF_FILE)
assert mock_rag_client.add_documents.called
def test_datatype_text_file_enum(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.TEXT_FILE with existing file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Text file content.")
rag_tool.add(str(test_file), data_type=DataType.TEXT_FILE)
assert mock_rag_client.add_documents.called
def test_datatype_text_enum(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.TEXT with raw text."""
rag_tool.add("Raw text using enum.", data_type=DataType.TEXT)
assert mock_rag_client.add_documents.called
def test_datatype_directory_enum(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.DIRECTORY with existing directory."""
with TemporaryDirectory() as tmpdir:
(Path(tmpdir) / "file.txt").write_text("Directory file content.")
rag_tool.add(tmpdir, data_type=DataType.DIRECTORY)
assert mock_rag_client.add_documents.called
class TestInvalidDataType:
"""Tests for invalid data_type values."""
def test_invalid_string_data_type_raises_error(self, rag_tool: RagTool) -> None:
"""Test that invalid string data_type raises ValueError."""
with pytest.raises(ValueError, match="Invalid data_type"):
rag_tool.add("some content", data_type="invalid_type")
def test_invalid_data_type_error_message_contains_valid_values(
self, rag_tool: RagTool
) -> None:
"""Test that error message lists valid data_type values."""
with pytest.raises(ValueError) as exc_info:
rag_tool.add("some content", data_type="not_a_type")
error_message = str(exc_info.value)
assert "file" in error_message
assert "pdf_file" in error_message
assert "text_file" in error_message
class TestFileExistenceValidation:
"""Tests for file existence validation."""
def test_pdf_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent PDF file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.pdf", data_type="pdf_file")
def test_text_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent text file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.txt", data_type="text_file")
def test_csv_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent CSV file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.csv", data_type="csv")
def test_json_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent JSON file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.json", data_type="json")
def test_xml_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent XML file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.xml", data_type="xml")
def test_docx_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent DOCX file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.docx", data_type="docx")
def test_mdx_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent MDX file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.mdx", data_type="mdx")
def test_directory_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent directory raises ValueError."""
with pytest.raises(ValueError, match="Directory does not exist"):
rag_tool.add(path="nonexistent/directory", data_type="directory")
class TestKeywordArgumentVariants:
"""Tests for different keyword argument combinations."""
def test_positional_argument_with_data_type(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test positional argument with data_type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Positional arg content.")
rag_tool.add(str(test_file), data_type="text_file")
assert mock_rag_client.add_documents.called
def test_path_keyword_with_data_type(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test path keyword argument with data_type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Path keyword content.")
rag_tool.add(path=str(test_file), data_type="text_file")
assert mock_rag_client.add_documents.called
def test_file_path_keyword_with_data_type(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test file_path keyword argument with data_type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("File path keyword content.")
rag_tool.add(file_path=str(test_file), data_type="text_file")
assert mock_rag_client.add_documents.called
def test_directory_path_keyword(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test directory_path keyword argument."""
with TemporaryDirectory() as tmpdir:
(Path(tmpdir) / "file.txt").write_text("Directory content.")
rag_tool.add(directory_path=tmpdir)
assert mock_rag_client.add_documents.called
class TestAutoDetection:
"""Tests for auto-detection of data type from content."""
def test_auto_detect_nonexistent_file_raises_error(self, rag_tool: RagTool) -> None:
"""Test that auto-detection raises FileNotFoundError for missing files."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add("path/to/document.pdf")
def test_auto_detect_txt_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of .txt file type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "auto.txt"
test_file.write_text("Auto-detected text file.")
# No data_type specified - should auto-detect
rag_tool.add(str(test_file))
assert mock_rag_client.add_documents.called
def test_auto_detect_csv_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of .csv file type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "auto.csv"
test_file.write_text("col1,col2\nval1,val2")
rag_tool.add(str(test_file))
assert mock_rag_client.add_documents.called
def test_auto_detect_json_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of .json file type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "auto.json"
test_file.write_text('{"auto": "detected"}')
rag_tool.add(str(test_file))
assert mock_rag_client.add_documents.called
def test_auto_detect_directory(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of directory type."""
with TemporaryDirectory() as tmpdir:
(Path(tmpdir) / "file.txt").write_text("Auto-detected directory.")
rag_tool.add(tmpdir)
assert mock_rag_client.add_documents.called
def test_auto_detect_raw_text(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of raw text (non-file content)."""
rag_tool.add("Just some raw text content")
assert mock_rag_client.add_documents.called
class TestMetadataHandling:
"""Tests for metadata handling with data_type."""
def test_metadata_passed_to_documents(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test that metadata is properly passed to documents."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Content with metadata.")
rag_tool.add(
path=str(test_file),
data_type="text_file",
metadata={"custom_key": "custom_value"},
)
assert mock_rag_client.add_documents.called
call_args = mock_rag_client.add_documents.call_args
documents = call_args.kwargs.get("documents", call_args.args[0] if call_args.args else [])
# Check that at least one document has the custom metadata
assert any(
doc.get("metadata", {}).get("custom_key") == "custom_value"
for doc in documents
)

View File

@@ -73,6 +73,7 @@ CLI_SETTINGS_KEYS = [
"oauth2_audience",
"oauth2_client_id",
"oauth2_domain",
"oauth2_extra",
]
# Default values for CLI settings
@@ -82,6 +83,7 @@ DEFAULT_CLI_SETTINGS = {
"oauth2_audience": CREWAI_ENTERPRISE_DEFAULT_OAUTH2_AUDIENCE,
"oauth2_client_id": CREWAI_ENTERPRISE_DEFAULT_OAUTH2_CLIENT_ID,
"oauth2_domain": CREWAI_ENTERPRISE_DEFAULT_OAUTH2_DOMAIN,
"oauth2_extra": {},
}
# Readonly settings - cannot be set by the user

View File

@@ -316,11 +316,33 @@ class BaseLLM(ABC):
from_task: Task | None = None,
from_agent: Agent | None = None,
tool_call: dict[str, Any] | None = None,
call_type: LLMCallType | None = None,
) -> None:
"""Emit stream chunk event."""
"""Emit stream chunk event.
Args:
chunk: The text content of the chunk
from_task: Optional task that initiated the call
from_agent: Optional agent that initiated the call
tool_call: Optional tool call information as a dict with keys:
- id: Tool call ID
- function: Dict with 'name' and 'arguments'
- type: Tool call type (e.g., 'function')
- index: Index of the tool call
call_type: Optional call type. If not provided, it will be inferred
from the presence of tool_call (TOOL_CALL if tool_call is present,
LLM_CALL otherwise)
"""
if not hasattr(crewai_event_bus, "emit"):
raise ValueError("crewai_event_bus does not have an emit method") from None
# Infer call_type from tool_call presence if not explicitly provided
effective_call_type = call_type
if effective_call_type is None:
effective_call_type = (
LLMCallType.TOOL_CALL if tool_call is not None else LLMCallType.LLM_CALL
)
crewai_event_bus.emit(
self,
event=LLMStreamChunkEvent(
@@ -328,6 +350,7 @@ class BaseLLM(ABC):
tool_call=tool_call,
from_task=from_task,
from_agent=from_agent,
call_type=effective_call_type,
),
)

View File

@@ -450,9 +450,14 @@ class AnthropicCompletion(BaseLLM):
# (the SDK sets it internally)
stream_params = {k: v for k, v in params.items() if k != "stream"}
# Track tool use blocks during streaming
current_tool_use: dict[str, Any] = {}
tool_use_index = 0
# Make streaming API call
with self.client.messages.stream(**stream_params) as stream:
for event in stream:
# Handle text content
if hasattr(event, "delta") and hasattr(event.delta, "text"):
text_delta = event.delta.text
full_response += text_delta
@@ -462,6 +467,55 @@ class AnthropicCompletion(BaseLLM):
from_agent=from_agent,
)
# Handle tool use start (content_block_start event with tool_use type)
if hasattr(event, "content_block") and hasattr(event.content_block, "type"):
if event.content_block.type == "tool_use":
current_tool_use = {
"id": getattr(event.content_block, "id", None),
"name": getattr(event.content_block, "name", ""),
"input": "",
"index": tool_use_index,
}
tool_use_index += 1
# Emit tool call start event
tool_call_event_data = {
"id": current_tool_use["id"],
"function": {
"name": current_tool_use["name"],
"arguments": "",
},
"type": "function",
"index": current_tool_use["index"],
}
self._emit_stream_chunk_event(
chunk="",
from_task=from_task,
from_agent=from_agent,
tool_call=tool_call_event_data,
)
# Handle tool use input delta (input_json events)
if hasattr(event, "delta") and hasattr(event.delta, "partial_json"):
partial_json = event.delta.partial_json
if current_tool_use and partial_json:
current_tool_use["input"] += partial_json
# Emit tool call delta event
tool_call_event_data = {
"id": current_tool_use["id"],
"function": {
"name": current_tool_use["name"],
"arguments": partial_json,
},
"type": "function",
"index": current_tool_use["index"],
}
self._emit_stream_chunk_event(
chunk=partial_json,
from_task=from_task,
from_agent=from_agent,
tool_call=tool_call_event_data,
)
final_message: Message = stream.get_final_message()
usage = self._extract_anthropic_token_usage(final_message)

View File

@@ -503,8 +503,10 @@ class AzureCompletion(BaseLLM):
call_id = tool_call.id or "default"
if call_id not in tool_calls:
tool_calls[call_id] = {
"id": call_id,
"name": "",
"arguments": "",
"index": getattr(tool_call, "index", 0) or 0,
}
if tool_call.function and tool_call.function.name:
@@ -514,6 +516,23 @@ class AzureCompletion(BaseLLM):
tool_call.function.arguments
)
# Emit tool call streaming event
tool_call_event_data = {
"id": tool_calls[call_id]["id"],
"function": {
"name": tool_calls[call_id]["name"],
"arguments": tool_call.function.arguments if tool_call.function and tool_call.function.arguments else "",
},
"type": "function",
"index": tool_calls[call_id]["index"],
}
self._emit_stream_chunk_event(
chunk=tool_call.function.arguments if tool_call.function and tool_call.function.arguments else "",
from_task=from_task,
from_agent=from_agent,
tool_call=tool_call_event_data,
)
# Handle completed tool calls
if tool_calls and available_functions:
for call_data in tool_calls.values():

View File

@@ -567,12 +567,31 @@ class BedrockCompletion(BaseLLM):
elif "contentBlockStart" in event:
start = event["contentBlockStart"].get("start", {})
block_index = event["contentBlockStart"].get("contentBlockIndex", 0)
if "toolUse" in start:
current_tool_use = start["toolUse"]
current_tool_use["_block_index"] = block_index
current_tool_use["_accumulated_input"] = ""
tool_use_id = current_tool_use.get("toolUseId")
logging.debug(
f"Tool use started in stream: {current_tool_use.get('name')} (ID: {tool_use_id})"
)
# Emit tool call start event
tool_call_event_data = {
"id": tool_use_id,
"function": {
"name": current_tool_use.get("name", ""),
"arguments": "",
},
"type": "function",
"index": block_index,
}
self._emit_stream_chunk_event(
chunk="",
from_task=from_task,
from_agent=from_agent,
tool_call=tool_call_event_data,
)
elif "contentBlockDelta" in event:
delta = event["contentBlockDelta"]["delta"]
@@ -589,6 +608,23 @@ class BedrockCompletion(BaseLLM):
tool_input = delta["toolUse"].get("input", "")
if tool_input:
logging.debug(f"Tool input delta: {tool_input}")
current_tool_use["_accumulated_input"] += tool_input
# Emit tool call delta event
tool_call_event_data = {
"id": current_tool_use.get("toolUseId"),
"function": {
"name": current_tool_use.get("name", ""),
"arguments": tool_input,
},
"type": "function",
"index": current_tool_use.get("_block_index", 0),
}
self._emit_stream_chunk_event(
chunk=tool_input,
from_task=from_task,
from_agent=from_agent,
tool_call=tool_call_event_data,
)
# Content block stop - end of a content block
elif "contentBlockStop" in event:

View File

@@ -1,3 +1,4 @@
import json
import logging
import os
import re
@@ -496,7 +497,7 @@ class GeminiCompletion(BaseLLM):
if hasattr(chunk, "candidates") and chunk.candidates:
candidate = chunk.candidates[0]
if candidate.content and candidate.content.parts:
for part in candidate.content.parts:
for part_index, part in enumerate(candidate.content.parts):
if hasattr(part, "function_call") and part.function_call:
call_id = part.function_call.name or "default"
if call_id not in function_calls:
@@ -505,8 +506,27 @@ class GeminiCompletion(BaseLLM):
"args": dict(part.function_call.args)
if part.function_call.args
else {},
"index": part_index,
}
# Emit tool call streaming event
args_str = json.dumps(function_calls[call_id]["args"]) if function_calls[call_id]["args"] else ""
tool_call_event_data = {
"id": call_id,
"function": {
"name": function_calls[call_id]["name"],
"arguments": args_str,
},
"type": "function",
"index": function_calls[call_id]["index"],
}
self._emit_stream_chunk_event(
chunk=args_str,
from_task=from_task,
from_agent=from_agent,
tool_call=tool_call_event_data,
)
# Handle completed function calls
if function_calls and available_functions:
for call_data in function_calls.values():

View File

@@ -510,8 +510,10 @@ class OpenAICompletion(BaseLLM):
call_id = tool_call.id or "default"
if call_id not in tool_calls:
tool_calls[call_id] = {
"id": call_id,
"name": "",
"arguments": "",
"index": tool_call.index if tool_call.index is not None else 0,
}
if tool_call.function and tool_call.function.name:
@@ -519,6 +521,23 @@ class OpenAICompletion(BaseLLM):
if tool_call.function and tool_call.function.arguments:
tool_calls[call_id]["arguments"] += tool_call.function.arguments
# Emit tool call streaming event
tool_call_event_data = {
"id": tool_calls[call_id]["id"],
"function": {
"name": tool_calls[call_id]["name"],
"arguments": tool_call.function.arguments if tool_call.function and tool_call.function.arguments else "",
},
"type": "function",
"index": tool_calls[call_id]["index"],
}
self._emit_stream_chunk_event(
chunk=tool_call.function.arguments if tool_call.function and tool_call.function.arguments else "",
from_task=from_task,
from_agent=from_agent,
tool_call=tool_call_event_data,
)
if tool_calls and available_functions:
for call_data in tool_calls.values():
function_name = call_data["name"]

View File

@@ -66,7 +66,6 @@ class SSETransport(BaseTransport):
self._transport_context = sse_client(
self.url,
headers=self.headers if self.headers else None,
terminate_on_close=True,
)
read, write = await self._transport_context.__aenter__()

View File

@@ -72,7 +72,8 @@ class TestSettings(unittest.TestCase):
@patch("crewai.cli.config.TokenManager")
def test_reset_settings(self, mock_token_manager):
user_settings = {key: f"value_for_{key}" for key in USER_SETTINGS_KEYS}
cli_settings = {key: f"value_for_{key}" for key in CLI_SETTINGS_KEYS}
cli_settings = {key: f"value_for_{key}" for key in CLI_SETTINGS_KEYS if key != "oauth2_extra"}
cli_settings["oauth2_extra"] = {"scope": "xxx", "other": "yyy"}
settings = Settings(
config_path=self.config_path, **user_settings, **cli_settings

View File

@@ -0,0 +1,22 @@
"""Tests for SSE transport."""
import pytest
from crewai.mcp.transports.sse import SSETransport
@pytest.mark.asyncio
async def test_sse_transport_connect_does_not_pass_invalid_args():
"""Test that SSETransport.connect() doesn't pass invalid args to sse_client.
The sse_client function does not accept terminate_on_close parameter.
"""
transport = SSETransport(
url="http://localhost:9999/sse",
headers={"Authorization": "Bearer test"},
)
with pytest.raises(ConnectionError) as exc_info:
await transport.connect()
assert "unexpected keyword argument" not in str(exc_info.value)

View File

@@ -715,3 +715,243 @@ class TestStreamingImports:
assert StreamChunk is not None
assert StreamChunkType is not None
assert ToolCallChunk is not None
class TestLLMStreamChunkEventToolCall:
"""Tests for LLMStreamChunkEvent with tool call information."""
def test_llm_stream_chunk_event_with_tool_call(self) -> None:
"""Test that LLMStreamChunkEvent correctly handles tool call data."""
from crewai.events.types.llm_events import (
LLMCallType,
LLMStreamChunkEvent,
ToolCall,
FunctionCall,
)
# Create a tool call event
tool_call = ToolCall(
id="call-123",
function=FunctionCall(
name="search",
arguments='{"query": "test"}',
),
type="function",
index=0,
)
event = LLMStreamChunkEvent(
chunk='{"query": "test"}',
tool_call=tool_call,
call_type=LLMCallType.TOOL_CALL,
)
assert event.chunk == '{"query": "test"}'
assert event.tool_call is not None
assert event.tool_call.id == "call-123"
assert event.tool_call.function.name == "search"
assert event.tool_call.function.arguments == '{"query": "test"}'
assert event.call_type == LLMCallType.TOOL_CALL
def test_llm_stream_chunk_event_with_dict_tool_call(self) -> None:
"""Test that LLMStreamChunkEvent correctly handles tool call as dict."""
from crewai.events.types.llm_events import (
LLMCallType,
LLMStreamChunkEvent,
)
# Create a tool call event using dict (as providers emit)
tool_call_dict = {
"id": "call-456",
"function": {
"name": "get_weather",
"arguments": '{"location": "NYC"}',
},
"type": "function",
"index": 1,
}
event = LLMStreamChunkEvent(
chunk='{"location": "NYC"}',
tool_call=tool_call_dict,
call_type=LLMCallType.TOOL_CALL,
)
assert event.chunk == '{"location": "NYC"}'
assert event.tool_call is not None
assert event.tool_call.id == "call-456"
assert event.tool_call.function.name == "get_weather"
assert event.tool_call.function.arguments == '{"location": "NYC"}'
assert event.call_type == LLMCallType.TOOL_CALL
def test_llm_stream_chunk_event_text_only(self) -> None:
"""Test that LLMStreamChunkEvent works for text-only chunks."""
from crewai.events.types.llm_events import (
LLMCallType,
LLMStreamChunkEvent,
)
event = LLMStreamChunkEvent(
chunk="Hello, world!",
tool_call=None,
call_type=LLMCallType.LLM_CALL,
)
assert event.chunk == "Hello, world!"
assert event.tool_call is None
assert event.call_type == LLMCallType.LLM_CALL
class TestBaseLLMEmitStreamChunkEvent:
"""Tests for BaseLLM._emit_stream_chunk_event method."""
def test_emit_stream_chunk_event_infers_tool_call_type(self) -> None:
"""Test that _emit_stream_chunk_event infers TOOL_CALL type when tool_call is present."""
from unittest.mock import MagicMock, patch
from crewai.llms.base_llm import BaseLLM
from crewai.events.types.llm_events import LLMCallType, LLMStreamChunkEvent
# Create a mock BaseLLM instance
with patch.object(BaseLLM, "__abstractmethods__", set()):
llm = BaseLLM(model="test-model") # type: ignore
captured_events: list[LLMStreamChunkEvent] = []
def capture_emit(source: Any, event: Any) -> None:
if isinstance(event, LLMStreamChunkEvent):
captured_events.append(event)
with patch("crewai.llms.base_llm.crewai_event_bus") as mock_bus:
mock_bus.emit = capture_emit
# Emit with tool_call - should infer TOOL_CALL type
tool_call_dict = {
"id": "call-789",
"function": {
"name": "test_tool",
"arguments": '{"arg": "value"}',
},
"type": "function",
"index": 0,
}
llm._emit_stream_chunk_event(
chunk='{"arg": "value"}',
tool_call=tool_call_dict,
)
assert len(captured_events) == 1
assert captured_events[0].call_type == LLMCallType.TOOL_CALL
assert captured_events[0].tool_call is not None
def test_emit_stream_chunk_event_infers_llm_call_type(self) -> None:
"""Test that _emit_stream_chunk_event infers LLM_CALL type when tool_call is None."""
from unittest.mock import patch
from crewai.llms.base_llm import BaseLLM
from crewai.events.types.llm_events import LLMCallType, LLMStreamChunkEvent
# Create a mock BaseLLM instance
with patch.object(BaseLLM, "__abstractmethods__", set()):
llm = BaseLLM(model="test-model") # type: ignore
captured_events: list[LLMStreamChunkEvent] = []
def capture_emit(source: Any, event: Any) -> None:
if isinstance(event, LLMStreamChunkEvent):
captured_events.append(event)
with patch("crewai.llms.base_llm.crewai_event_bus") as mock_bus:
mock_bus.emit = capture_emit
# Emit without tool_call - should infer LLM_CALL type
llm._emit_stream_chunk_event(
chunk="Hello, world!",
tool_call=None,
)
assert len(captured_events) == 1
assert captured_events[0].call_type == LLMCallType.LLM_CALL
assert captured_events[0].tool_call is None
def test_emit_stream_chunk_event_respects_explicit_call_type(self) -> None:
"""Test that _emit_stream_chunk_event respects explicitly provided call_type."""
from unittest.mock import patch
from crewai.llms.base_llm import BaseLLM
from crewai.events.types.llm_events import LLMCallType, LLMStreamChunkEvent
# Create a mock BaseLLM instance
with patch.object(BaseLLM, "__abstractmethods__", set()):
llm = BaseLLM(model="test-model") # type: ignore
captured_events: list[LLMStreamChunkEvent] = []
def capture_emit(source: Any, event: Any) -> None:
if isinstance(event, LLMStreamChunkEvent):
captured_events.append(event)
with patch("crewai.llms.base_llm.crewai_event_bus") as mock_bus:
mock_bus.emit = capture_emit
# Emit with explicit call_type - should use provided type
llm._emit_stream_chunk_event(
chunk="test",
tool_call=None,
call_type=LLMCallType.TOOL_CALL, # Explicitly set even though no tool_call
)
assert len(captured_events) == 1
assert captured_events[0].call_type == LLMCallType.TOOL_CALL
class TestStreamingToolCallExtraction:
"""Tests for tool call extraction from streaming events."""
def test_extract_tool_call_info_from_event(self) -> None:
"""Test that tool call info is correctly extracted from LLMStreamChunkEvent."""
from crewai.utilities.streaming import _extract_tool_call_info
from crewai.events.types.llm_events import (
LLMStreamChunkEvent,
ToolCall,
FunctionCall,
)
from crewai.types.streaming import StreamChunkType
# Create event with tool call
tool_call = ToolCall(
id="call-extract-test",
function=FunctionCall(
name="extract_test",
arguments='{"key": "value"}',
),
type="function",
index=2,
)
event = LLMStreamChunkEvent(
chunk='{"key": "value"}',
tool_call=tool_call,
)
chunk_type, tool_call_chunk = _extract_tool_call_info(event)
assert chunk_type == StreamChunkType.TOOL_CALL
assert tool_call_chunk is not None
assert tool_call_chunk.tool_id == "call-extract-test"
assert tool_call_chunk.tool_name == "extract_test"
assert tool_call_chunk.arguments == '{"key": "value"}'
assert tool_call_chunk.index == 2
def test_extract_tool_call_info_returns_text_for_no_tool_call(self) -> None:
"""Test that TEXT type is returned when no tool call is present."""
from crewai.utilities.streaming import _extract_tool_call_info
from crewai.events.types.llm_events import LLMStreamChunkEvent
from crewai.types.streaming import StreamChunkType
event = LLMStreamChunkEvent(
chunk="Just text content",
tool_call=None,
)
chunk_type, tool_call_chunk = _extract_tool_call_info(event)
assert chunk_type == StreamChunkType.TEXT
assert tool_call_chunk is None

26
uv.lock generated
View File

@@ -1225,7 +1225,7 @@ dependencies = [
{ name = "crewai" },
{ name = "docker" },
{ name = "lancedb" },
{ name = "pypdf" },
{ name = "pymupdf" },
{ name = "python-docx" },
{ name = "pytube" },
{ name = "requests" },
@@ -1382,8 +1382,8 @@ requires-dist = [
{ name = "psycopg2-binary", marker = "extra == 'postgresql'", specifier = ">=2.9.10" },
{ name = "pygithub", marker = "extra == 'github'", specifier = "==1.59.1" },
{ name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.13" },
{ name = "pymupdf", specifier = ">=1.26.6" },
{ name = "pymysql", marker = "extra == 'mysql'", specifier = ">=1.1.1" },
{ name = "pypdf", specifier = ">=5.9.0" },
{ name = "python-docx", specifier = ">=1.2.0" },
{ name = "python-docx", marker = "extra == 'rag'", specifier = ">=1.1.0" },
{ name = "pytube", specifier = ">=15.0.0" },
@@ -2224,6 +2224,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" },
{ url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" },
{ url = "https://files.pythonhosted.org/packages/a1/8d/88f3ebd2bc96bf7747093696f4335a0a8a4c5acfcf1b757717c0d2474ba3/greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f", size = 1137126, upload-time = "2025-08-07T13:18:20.239Z" },
{ url = "https://files.pythonhosted.org/packages/f1/29/74242b7d72385e29bcc5563fba67dad94943d7cd03552bac320d597f29b2/greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7", size = 1544904, upload-time = "2025-11-04T12:42:04.763Z" },
{ url = "https://files.pythonhosted.org/packages/c8/e2/1572b8eeab0f77df5f6729d6ab6b141e4a84ee8eb9bc8c1e7918f94eda6d/greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8", size = 1611228, upload-time = "2025-11-04T12:42:08.423Z" },
{ url = "https://files.pythonhosted.org/packages/d6/6f/b60b0291d9623c496638c582297ead61f43c4b72eef5e9c926ef4565ec13/greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c", size = 298654, upload-time = "2025-08-07T13:50:00.469Z" },
{ url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" },
{ url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" },
@@ -2233,6 +2235,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" },
{ url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" },
{ url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" },
{ url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" },
{ url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" },
{ url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" },
{ url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" },
{ url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" },
@@ -2242,6 +2246,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
{ url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
{ url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
{ url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
{ url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
{ url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
{ url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
{ url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
@@ -2251,6 +2257,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
{ url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
{ url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
{ url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
{ url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
{ url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
]
@@ -5970,6 +5978,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/48/7c/42f0b6997324023e94939f8f32b9a8dd928499f4b5d7b4412905368686b5/pymongo-4.15.3-cp313-cp313-win_arm64.whl", hash = "sha256:fb384623ece34db78d445dd578a52d28b74e8319f4d9535fbaff79d0eae82b3d", size = 944300, upload-time = "2025-10-07T21:56:58.969Z" },
]
[[package]]
name = "pymupdf"
version = "1.26.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ec/d7/a6f0e03a117fa2ad79c4b898203bb212b17804f92558a6a339298faca7bb/pymupdf-1.26.6.tar.gz", hash = "sha256:a2b4531cd4ab36d6f1f794bb6d3c33b49bda22f36d58bb1f3e81cbc10183bd2b", size = 84322494, upload-time = "2025-11-05T15:20:46.786Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9e/5c/dec354eee5fe4966c715f33818ed4193e0e6c986cf8484de35b6c167fb8e/pymupdf-1.26.6-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:e46f320a136ad55e5219e8f0f4061bdf3e4c12b126d2740d5a49f73fae7ea176", size = 23178988, upload-time = "2025-11-05T14:31:19.834Z" },
{ url = "https://files.pythonhosted.org/packages/ec/a0/11adb742d18142bd623556cd3b5d64649816decc5eafd30efc9498657e76/pymupdf-1.26.6-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:6844cd2396553c0fa06de4869d5d5ecb1260e6fc3b9d85abe8fa35f14dd9d688", size = 22469764, upload-time = "2025-11-05T14:32:34.654Z" },
{ url = "https://files.pythonhosted.org/packages/e4/c8/377cf20e31f58d4c243bfcf2d3cb7466d5b97003b10b9f1161f11eb4a994/pymupdf-1.26.6-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:617ba69e02c44f0da1c0e039ea4a26cf630849fd570e169c71daeb8ac52a81d6", size = 23502227, upload-time = "2025-11-06T11:03:56.934Z" },
{ url = "https://files.pythonhosted.org/packages/4f/bf/6e02e3d84b32c137c71a0a3dcdba8f2f6e9950619a3bc272245c7c06a051/pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:7777d0b7124c2ebc94849536b6a1fb85d158df3b9d873935e63036559391534c", size = 24115381, upload-time = "2025-11-05T14:33:54.338Z" },
{ url = "https://files.pythonhosted.org/packages/ab/9d/30f7fcb3776bfedde66c06297960debe4883b1667294a1ee9426c942e94d/pymupdf-1.26.6-cp310-abi3-win32.whl", hash = "sha256:8f3ef05befc90ca6bb0f12983200a7048d5bff3e1c1edef1bb3de60b32cb5274", size = 17203613, upload-time = "2025-11-05T17:19:47.494Z" },
{ url = "https://files.pythonhosted.org/packages/f9/e8/989f4eaa369c7166dc24f0eaa3023f13788c40ff1b96701f7047421554a8/pymupdf-1.26.6-cp310-abi3-win_amd64.whl", hash = "sha256:ce02ca96ed0d1acfd00331a4d41a34c98584d034155b06fd4ec0f051718de7ba", size = 18405680, upload-time = "2025-11-05T14:34:48.672Z" },
]
[[package]]
name = "pymysql"
version = "1.1.2"