Compare commits

..

1 Commits

Author SHA1 Message Date
Gui Vieira
413e0363d0 Document monorepo deployments 2026-06-02 18:24:11 -03:00
16 changed files with 347 additions and 745 deletions

View File

@@ -509,6 +509,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -1028,6 +1029,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -1514,6 +1516,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -1999,6 +2002,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -2484,6 +2488,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -2979,6 +2984,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -3474,6 +3480,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -3969,6 +3976,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -4464,6 +4472,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -4948,6 +4957,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -5432,6 +5442,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/update-crew",
@@ -5916,6 +5927,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/training-crews",
@@ -6402,6 +6414,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/training-crews",
@@ -6886,6 +6899,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/training-crews",
@@ -7373,6 +7387,7 @@
"en/enterprise/guides/build-crew",
"en/enterprise/guides/prepare-for-deployment",
"en/enterprise/guides/deploy-to-amp",
"en/enterprise/guides/monorepo-deployments",
"en/enterprise/guides/private-package-registry",
"en/enterprise/guides/kickoff-crew",
"en/enterprise/guides/training-crews",

View File

@@ -164,6 +164,12 @@ You need to push your crew to a GitHub repository. If you haven't created a crew
![Select Repository](/images/enterprise/select-repo.png)
</Frame>
<Tip>
If your Crew or Flow is inside a monorepo subfolder, expand **Advanced**
and set a working directory before deploying. See
[Monorepo Deployments](/en/enterprise/guides/monorepo-deployments).
</Tip>
</Step>
<Step title="Set Environment Variables">

View File

@@ -0,0 +1,225 @@
---
title: "Monorepo Deployments"
description: "Deploy a Crew or Flow from a subfolder in a larger repository"
icon: "folder-tree"
mode: "wide"
---
<Note>
Use a working directory when your Crew or Flow lives inside a larger
repository. CrewAI AMP validates, builds, tests, and runs the automation from
that subfolder instead of the repository root.
</Note>
## When to Use This
Monorepo deployments are useful when one repository contains multiple
automations, shared packages, or other application code:
```text
company-ai/
|-- uv.lock
|-- packages/
| `-- shared_tools/
`-- crews/
|-- support_agent/
| |-- pyproject.toml
| `-- src/
| `-- support_agent/
| |-- main.py
| `-- crew.py
`-- research_flow/
|-- pyproject.toml
`-- src/
`-- research_flow/
`-- main.py
```
To deploy `support_agent`, set the working directory to:
```text
crews/support_agent
```
AMP still pulls or uploads the whole repository, but it treats the selected
folder as the automation project root.
## What the Working Directory Controls
When a working directory is set, AMP uses that folder for:
- Project validation, including `pyproject.toml`, `src/`, and the Crew or Flow entry point
- Dependency installation with `uv`
- The running process working directory
- The `CREW_ROOT_DIR` environment variable
Leaving the field empty keeps the existing behavior and uses the repository
root.
## Supported Sources
You can set a working directory when creating a deployment from:
- A connected GitHub repository
- A Git repository configured in AMP
- A ZIP upload
<Info>
Configure working directories in the AMP web interface. The
`crewai deploy create` CLI flow does not prompt for this field.
</Info>
You can also add or change the working directory on an existing deployment from
the deployment's **Settings** page. The change takes effect on the next deploy.
<Warning>
Working directories and auto-deploy cannot be used together. If a deployment
has a working directory, auto-deploy is disabled for that deployment. Turn
auto-deploy off before setting a working directory.
</Warning>
## Configure a New Deployment
<Steps>
<Step title="Open Deploy from Code">
In CrewAI AMP, create a new deployment and choose your source: GitHub, Git
Repository, or ZIP upload.
</Step>
<Step title="Select the repository, branch, or ZIP file">
Choose the repository and branch that contain your monorepo, or upload a ZIP
file whose root contains the monorepo contents.
</Step>
<Step title="Open Advanced settings">
Expand the **Advanced** section in the deploy form.
</Step>
<Step title="Enter the working directory">
Enter the path from the repository root to the Crew or Flow project:
```text
crews/support_agent
```
Do not include a leading slash.
</Step>
<Step title="Deploy">
Add any required environment variables, then start the deployment.
</Step>
</Steps>
## Configure an Existing Deployment
<Steps>
<Step title="Open the deployment settings">
Go to your automation in AMP and open **Settings**.
</Step>
<Step title="Turn off auto-deploy if needed">
If auto-deploy is enabled, disable it first. The working directory field is
unavailable while auto-deploy is on.
</Step>
<Step title="Set the working directory">
In **Basic settings**, enter the subfolder path, such as:
```text
crews/support_agent
```
</Step>
<Step title="Redeploy">
Save the setting and redeploy the automation. The new working directory is
used on the next deploy.
</Step>
</Steps>
## Path Rules
The working directory must be a relative path inside the repository or ZIP root.
| Rule | Example |
|------|---------|
| Use a relative path | `crews/support_agent` |
| Do not start with `/` | `/crews/support_agent` is invalid |
| Do not use `.` or `..` path segments | `crews/../support_agent` is invalid |
| Use only letters, numbers, dashes, underscores, dots, and forward slashes | `crews/support agent` is invalid |
| Keep the path at 255 characters or fewer | Longer paths are rejected |
AMP trims leading and trailing whitespace, collapses repeated slashes, and
removes trailing slashes. A blank value uses the repository root.
## Lock Files and UV Workspaces
The selected folder must contain the automation's `pyproject.toml` and `src/`
directory. A `uv.lock` or `poetry.lock` file can live either in the selected
folder or at the repository root.
This supports both common monorepo layouts:
<Tabs>
<Tab title="Project lock file">
```text
company-ai/
`-- crews/
`-- support_agent/
|-- pyproject.toml
|-- uv.lock
`-- src/
`-- support_agent/
`-- main.py
```
</Tab>
<Tab title="Workspace lock file">
```text
company-ai/
|-- uv.lock
|-- packages/
| `-- shared_tools/
`-- crews/
`-- support_agent/
|-- pyproject.toml
`-- src/
`-- support_agent/
`-- main.py
```
</Tab>
</Tabs>
<Tip>
If your automation imports shared packages from elsewhere in the monorepo,
declare those packages in `pyproject.toml` using UV workspace, path, or source
configuration. AMP runs the automation from the selected folder, so shared
code should be installed as a dependency instead of relying on the repository
root being on the Python path.
</Tip>
## Troubleshooting
### Working Directory Not Found
Check that the path is relative to the repository or ZIP root. For ZIP uploads,
the ZIP contents must include the working directory path exactly as entered.
### Missing pyproject.toml
The working directory should point to the Crew or Flow project folder, not just
to a parent folder that contains several projects.
### Missing uv.lock or poetry.lock
Commit a lock file either in the selected project folder or in the repository
root. For UV workspaces, keeping `uv.lock` at the workspace root is supported.
### Auto-Deploy Is Unavailable
Auto-deploy is disabled while a working directory is set. Use manual redeploys
or trigger redeployments from CI/CD with the AMP API instead.
<Card title="Deploy to AMP" icon="rocket" href="/en/enterprise/guides/deploy-to-amp">
Continue with the deployment guide after choosing your monorepo working
directory.
</Card>

View File

@@ -11,10 +11,7 @@ from crewai_files.formatting.anthropic import AnthropicFormatter
from crewai_files.formatting.bedrock import BedrockFormatter
from crewai_files.formatting.gemini import GeminiFormatter
from crewai_files.formatting.openai import OpenAIFormatter, OpenAIResponsesFormatter
from crewai_files.processing.constraints import (
get_constraints_for_provider,
uses_openai_responses_api,
)
from crewai_files.processing.constraints import get_constraints_for_provider
from crewai_files.processing.processor import FileProcessor
from crewai_files.resolution.resolver import FileResolver, FileResolverConfig
from crewai_files.uploaders.factory import ProviderType
@@ -123,11 +120,9 @@ def format_multimodal_content(
if not files:
return content_blocks
constraints_key = (
"openai_responses"
if uses_openai_responses_api(provider_type, api)
else provider_type
)
constraints_key: str = provider_type
if api == "responses" and "openai" in provider_type.lower():
constraints_key = "openai_responses"
processor = FileProcessor(constraints=constraints_key)
processed_files = processor.process_files(files)
@@ -189,11 +184,9 @@ async def aformat_multimodal_content(
if not files:
return content_blocks
constraints_key = (
"openai_responses"
if uses_openai_responses_api(provider_type, api)
else provider_type
)
constraints_key: str = provider_type
if api == "responses" and "openai" in provider_type.lower():
constraints_key = "openai_responses"
processor = FileProcessor(constraints=constraints_key)
processed_files = await processor.aprocess_files(files)

View File

@@ -346,20 +346,6 @@ def get_constraints_for_provider(
return None
def uses_openai_responses_api(provider: str, api: str | None = None) -> bool:
"""Return whether provider/API should use OpenAI Responses file support."""
if api != "responses":
return False
provider_lower = provider.lower()
return (
"openai" in provider_lower
or provider_lower == "gpt"
or provider_lower.startswith("gpt-")
or "/gpt-" in provider_lower
)
def get_supported_content_types(provider: str, api: str | None = None) -> list[str]:
"""Get supported MIME type prefixes for a provider.
@@ -370,9 +356,9 @@ def get_supported_content_types(provider: str, api: str | None = None) -> list[s
Returns:
List of supported MIME type prefixes (e.g., ["image/", "application/pdf"]).
"""
lookup_key = (
"openai_responses" if uses_openai_responses_api(provider, api) else provider
)
lookup_key = provider
if api == "responses" and "openai" in provider.lower():
lookup_key = "openai_responses"
constraints = get_constraints_for_provider(lookup_key)
if not constraints:

View File

@@ -11,7 +11,6 @@ from crewai_files.processing.constraints import (
ProviderConstraints,
VideoConstraints,
get_constraints_for_provider,
get_supported_content_types,
)
import pytest
@@ -71,13 +70,6 @@ class TestPDFConstraints:
assert constraints.max_size_bytes == 1000
assert constraints.max_pages is None
@pytest.mark.parametrize("provider", ["openai", "gpt", "gpt-4o-mini"])
def test_openai_responses_supports_pdf_for_gpt_aliases(self, provider):
"""OpenAI Responses PDF support applies to concrete GPT model names."""
supported_types = get_supported_content_types(provider, api="responses")
assert "application/pdf" in supported_types
class TestAudioConstraints:
"""Tests for AudioConstraints dataclass."""

View File

@@ -93,7 +93,6 @@ from crewai.utilities.agent_utils import (
track_delegation_if_needed,
)
from crewai.utilities.constants import TRAINING_DATA_FILE
from crewai.utilities.file_store import aget_all_files, get_all_files
from crewai.utilities.i18n import I18N_DEFAULT
from crewai.utilities.planning_types import (
PlanStep,
@@ -2772,7 +2771,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
mark_cache_breakpoint(format_message_for_llm(user_prompt))
)
await self._ainject_files_from_inputs(inputs)
self._inject_files_from_inputs(inputs)
self.state.ask_for_human_input = bool(
inputs.get("ask_for_human_input", False)
@@ -2983,42 +2982,12 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
training_handler.save(training_data)
def _inject_files_from_inputs(self, inputs: dict[str, Any]) -> None:
"""Inject files into the last user message.
"""Inject files from inputs into the last user message.
Args:
inputs: Input dictionary that may contain a 'files' key.
"""
files: dict[str, Any] = {}
if self.crew and self.task:
stored_files = get_all_files(self.crew.id, self.task.id)
if stored_files:
files.update(stored_files)
if inputs.get("files"):
files.update(inputs["files"])
if not files:
return
for i in range(len(self.state.messages) - 1, -1, -1):
msg = self.state.messages[i]
if msg.get("role") == "user":
msg["files"] = files
break
async def _ainject_files_from_inputs(self, inputs: dict[str, Any]) -> None:
"""Async inject files into the last user message."""
files: dict[str, Any] = {}
if self.crew and self.task:
stored_files = await aget_all_files(self.crew.id, self.task.id)
if stored_files:
files.update(stored_files)
if inputs.get("files"):
files.update(inputs["files"])
files = inputs.get("files")
if not files:
return

View File

@@ -1,84 +1,32 @@
from __future__ import annotations
from collections.abc import Iterator
from functools import cache
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal, NamedTuple, cast
from typing import TYPE_CHECKING, Any, Literal
from urllib.parse import urlparse
from pydantic import Field, model_validator
from typing_extensions import Self
try:
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter
from docling.exceptions import ConversionError
from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
from docling_core.types.doc.document import DoclingDocument
DOCLING_AVAILABLE = True
except ImportError:
DOCLING_AVAILABLE = False
if TYPE_CHECKING:
from docling.document_converter import DocumentConverter
from docling_core.types.doc.document import DoclingDocument
from pydantic import Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
from crewai.utilities.logger import Logger
if TYPE_CHECKING:
from docling.document_converter import DocumentConverter
from docling_core.types.doc.document import DoclingDocument
_DOCLING_IMPORT_ERROR = (
"The docling package is required to use CrewDoclingSource. "
"Please install it using: uv add docling"
)
class _DoclingModules(NamedTuple):
"""Lazily-imported docling symbols used by ``CrewDoclingSource``."""
input_format: Any
document_converter: Any
conversion_error: type[BaseException]
hierarchical_chunker: Any
@cache
def _import_docling() -> _DoclingModules:
"""Import docling submodules lazily and cache the result.
Raises:
ImportError: If the docling package is not installed.
"""
try:
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter
from docling.exceptions import ConversionError
from docling_core.transforms.chunker.hierarchical_chunker import (
HierarchicalChunker,
)
except ImportError as e:
raise ImportError(_DOCLING_IMPORT_ERROR) from e
return _DoclingModules(
input_format=InputFormat,
document_converter=DocumentConverter,
conversion_error=ConversionError,
hierarchical_chunker=HierarchicalChunker,
)
def _build_default_document_converter() -> DocumentConverter:
"""Construct the default ``DocumentConverter`` with crewAI's allowed formats."""
docling = _import_docling()
input_format = docling.input_format
return cast(
"DocumentConverter",
docling.document_converter(
allowed_formats=[
input_format.MD,
input_format.ASCIIDOC,
input_format.PDF,
input_format.DOCX,
input_format.HTML,
input_format.IMAGE,
input_format.XLSX,
input_format.PPTX,
]
),
)
class CrewDoclingSource(BaseKnowledgeSource):
"""Default Source class for converting documents to markdown or json.
@@ -86,11 +34,13 @@ class CrewDoclingSource(BaseKnowledgeSource):
any additional dependencies and follows the docling package as the source of truth.
"""
@model_validator(mode="before")
@classmethod
def _ensure_docling_available(cls, data: Any) -> Any:
_import_docling()
return data
def __init__(self, *args: Any, **kwargs: Any) -> None:
if not DOCLING_AVAILABLE:
raise ImportError(
"The docling package is required to use CrewDoclingSource. "
"Please install it using: uv add docling"
)
super().__init__(*args, **kwargs)
_logger: Logger = Logger(verbose=True)
@@ -99,11 +49,23 @@ class CrewDoclingSource(BaseKnowledgeSource):
file_paths: list[Path | str] = Field(default_factory=list)
chunks: list[str] = Field(default_factory=list)
safe_file_paths: list[Path | str] = Field(default_factory=list)
content: list[Any] = Field(default_factory=list)
document_converter: Any = Field(default_factory=_build_default_document_converter)
content: list[DoclingDocument] = Field(default_factory=list)
document_converter: DocumentConverter = Field(
default_factory=lambda: DocumentConverter(
allowed_formats=[
InputFormat.MD,
InputFormat.ASCIIDOC,
InputFormat.PDF,
InputFormat.DOCX,
InputFormat.HTML,
InputFormat.IMAGE,
InputFormat.XLSX,
InputFormat.PPTX,
]
)
)
@model_validator(mode="after")
def _load_sources(self) -> Self:
def model_post_init(self, _: Any) -> None:
if self.file_path:
self._logger.log(
"warning",
@@ -113,13 +75,11 @@ class CrewDoclingSource(BaseKnowledgeSource):
self.file_paths = self.file_path
self.safe_file_paths = self.validate_content()
self.content = self._load_content()
return self
def _load_content(self) -> list[DoclingDocument]:
conversion_error = _import_docling().conversion_error
try:
return self._convert_source_to_docling_documents()
except conversion_error as e:
except ConversionError as e:
self._logger.log(
"error",
f"Error loading content: {e}. Supported formats: {self.document_converter.allowed_formats}",
@@ -152,7 +112,7 @@ class CrewDoclingSource(BaseKnowledgeSource):
return [result.document for result in conv_results_iter]
def _chunk_doc(self, doc: DoclingDocument) -> Iterator[str]:
chunker = _import_docling().hierarchical_chunker()
chunker = HierarchicalChunker()
for chunk in chunker.chunk(doc):
yield chunk.text

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import ast
import os
from typing import Any, Literal
@@ -134,9 +133,6 @@ class SnowflakeCompletion(OpenAICompletion):
def _format_messages(self, messages: str | list[LLMMessage]) -> list[LLMMessage]:
formatted_messages = super()._format_messages(messages)
if self._is_claude_model():
formatted_messages = self._normalize_stringified_tool_calls(
formatted_messages
)
formatted_messages = self._remove_incomplete_claude_tool_uses(
formatted_messages
)
@@ -147,41 +143,6 @@ class SnowflakeCompletion(OpenAICompletion):
model = self.model.lower()
return model.startswith(("claude-", "anthropic."))
@staticmethod
def _normalize_stringified_tool_calls(
messages: list[LLMMessage],
) -> list[LLMMessage]:
normalized_messages: list[LLMMessage] = []
for message in messages:
tool_calls = message.get("tool_calls")
if not isinstance(tool_calls, list) or not tool_calls:
normalized_messages.append(message)
continue
normalized_tool_calls: list[Any] = []
changed = False
for tool_call in tool_calls:
if isinstance(tool_call, str):
try:
parsed_tool_call = ast.literal_eval(tool_call)
except (ValueError, SyntaxError):
normalized_tool_calls.append(tool_call)
continue
if isinstance(parsed_tool_call, dict):
normalized_tool_calls.append(parsed_tool_call)
changed = True
continue
normalized_tool_calls.append(tool_call)
if changed:
normalized_message = dict(message)
normalized_message["tool_calls"] = normalized_tool_calls
normalized_messages.append(normalized_message) # type: ignore[arg-type]
else:
normalized_messages.append(message)
return normalized_messages
@staticmethod
def _remove_incomplete_claude_tool_uses(
messages: list[LLMMessage],
@@ -201,120 +162,45 @@ class SnowflakeCompletion(OpenAICompletion):
while index < len(messages):
message = messages[index]
expected_ids = SnowflakeCompletion._extract_claude_tool_use_ids(message)
if message.get("role") != "assistant" or not expected_ids:
tool_calls = message.get("tool_calls") or []
if message.get("role") != "assistant" or not tool_calls:
sanitized.append(message)
index += 1
continue
expected_ids = {
tool_call.get("id")
for tool_call in tool_calls
if isinstance(tool_call, dict) and tool_call.get("id")
}
if not expected_ids:
sanitized.append(message)
index += 1
continue
tool_result_ids: set[str] = set()
lookahead = index + 1
while lookahead < len(
messages
) and SnowflakeCompletion._is_tool_result_message(messages[lookahead]):
tool_result_ids.update(
SnowflakeCompletion._extract_claude_tool_result_ids(
messages[lookahead]
)
)
while (
lookahead < len(messages) and messages[lookahead].get("role") == "tool"
):
tool_call_id = messages[lookahead].get("tool_call_id")
if isinstance(tool_call_id, str):
tool_result_ids.add(tool_call_id)
lookahead += 1
if expected_ids.issubset(tool_result_ids):
summary = SnowflakeCompletion._summarize_tool_results(
messages[index + 1 : lookahead], expected_ids
sanitized.append(message)
sanitized.extend(
tool_message
for tool_message in messages[index + 1 : lookahead]
if tool_message.get("role") == "tool"
and tool_message.get("tool_call_id") in expected_ids
)
if summary:
sanitized.append({"role": "user", "content": summary})
index = lookahead
return sanitized
@staticmethod
def _summarize_tool_results(
messages: list[LLMMessage], expected_ids: set[str]
) -> str:
summaries: list[str] = []
for message in messages:
result_ids = SnowflakeCompletion._extract_claude_tool_result_ids(message)
if not result_ids & expected_ids:
continue
name = message.get("name") or "tool"
content = message.get("content")
if isinstance(content, str):
summaries.append(f"{name}: {content}")
elif isinstance(content, list):
extracted_text = SnowflakeCompletion._extract_tool_result_text(content)
summaries.append(f"{name}: {extracted_text or content}")
if not summaries:
return ""
return "Tool results from previous tool calls:\n" + "\n".join(
f"- {summary}" for summary in summaries
)
@staticmethod
def _extract_tool_result_text(content: list[Any]) -> str:
texts: list[str] = []
for item in content:
if not isinstance(item, dict) or not isinstance(
item.get("toolResult"), dict
):
continue
result_content = item["toolResult"].get("content", [])
texts.extend(
str(inner["text"])
for inner in result_content
if isinstance(inner, dict) and "text" in inner
)
return " ".join(texts)
@staticmethod
def _extract_claude_tool_use_ids(message: LLMMessage) -> set[str]:
tool_calls = message.get("tool_calls") or []
ids: set[str] = set()
for tool_call in tool_calls:
if not isinstance(tool_call, dict):
continue
tool_call_id = tool_call.get("id")
if isinstance(tool_call_id, str):
ids.add(tool_call_id)
content = message.get("content")
if isinstance(content, list):
for block in content:
if isinstance(block, dict) and isinstance(block.get("toolUse"), dict):
tool_use_id = block["toolUse"].get("toolUseId")
if isinstance(tool_use_id, str):
ids.add(tool_use_id)
return ids
@staticmethod
def _extract_claude_tool_result_ids(message: LLMMessage) -> set[str]:
ids: set[str] = set()
tool_call_id = message.get("tool_call_id")
if isinstance(tool_call_id, str):
ids.add(tool_call_id)
content = message.get("content")
if isinstance(content, list):
for block in content:
if isinstance(block, dict) and isinstance(
block.get("toolResult"), dict
):
tool_use_id = block["toolResult"].get("toolUseId")
if isinstance(tool_use_id, str):
ids.add(tool_use_id)
return ids
@staticmethod
def _is_tool_result_message(message: LLMMessage) -> bool:
return message.get("role") == "tool" or bool(
SnowflakeCompletion._extract_claude_tool_result_ids(message)
)
@staticmethod
def _ensure_claude_conversation_ends_with_user(
messages: list[LLMMessage],

View File

@@ -3,7 +3,6 @@
from __future__ import annotations
import base64
from io import BytesIO
from typing import TYPE_CHECKING
from pydantic import BaseModel, Field, PrivateAttr
@@ -65,9 +64,6 @@ class ReadFileTool(BaseTool):
content_type = file_input.content_type
filename = file_input.filename or file_name
if content_type == "application/pdf":
return self._read_pdf_text(content, filename)
text_types = (
"text/",
"application/json",
@@ -80,22 +76,3 @@ class ReadFileTool(BaseTool):
encoded = base64.b64encode(content).decode("ascii")
return f"[Binary file: {filename} ({content_type})]\nBase64: {encoded}"
def _read_pdf_text(self, content: bytes, filename: str) -> str:
"""Extract text from a PDF instead of returning base64."""
try:
from pypdf import PdfReader
except ImportError:
encoded = base64.b64encode(content).decode("ascii")
return f"[Binary file: {filename} (application/pdf)]\nBase64: {encoded}"
try:
reader = PdfReader(BytesIO(content))
page_text = [text for page in reader.pages if (text := page.extract_text())]
except Exception as exc:
return f"Unable to extract text from PDF '{filename}': {exc}"
if not page_text:
return f"[PDF file with no extractable text: {filename}]"
return "\n\n".join(page_text)

View File

@@ -7,11 +7,9 @@ flow methods, routing logic, and error handling.
from __future__ import annotations
import asyncio
from types import SimpleNamespace
import time
from typing import Any
from unittest.mock import AsyncMock, Mock, patch
from uuid import uuid4
import pytest
from pydantic import BaseModel
@@ -66,8 +64,6 @@ from crewai.events.types.tool_usage_events import (
from crewai.tools.tool_types import ToolResult
from crewai.utilities.step_execution_context import StepExecutionContext
from crewai.utilities.planning_types import TodoItem
from crewai.utilities.file_store import clear_files, clear_task_files, store_files
from crewai_files import TextFile
class TestAgentExecutorState:
"""Test AgentExecutorState Pydantic model."""
@@ -116,58 +112,6 @@ class TestAgentExecutor:
class StructuredResult(BaseModel):
value: str
def test_inject_files_from_crew_task_store(self):
"""Crew-level input_files should attach to the LLM user message."""
crew_id = uuid4()
task_id = uuid4()
stored_file = TextFile(source=b"stored content")
executor = _build_executor(
crew=SimpleNamespace(id=crew_id),
task=SimpleNamespace(id=task_id),
)
executor.state.messages = [{"role": "user", "content": "Analyze this file"}]
try:
store_files(crew_id, {"document": stored_file})
executor._inject_files_from_inputs({})
finally:
clear_files(crew_id)
clear_task_files(task_id)
assert executor.state.messages[0]["files"] == {"document": stored_file}
@pytest.mark.asyncio
async def test_ainject_files_from_crew_task_store_uses_async_store(self):
"""Async file injection should not call the sync file store helper."""
crew_id = uuid4()
task_id = uuid4()
stored_file = TextFile(source=b"stored content")
local_file = TextFile(source=b"local content")
inputs = {"files": {"local": local_file}}
executor = _build_executor(
crew=SimpleNamespace(id=crew_id),
task=SimpleNamespace(id=task_id),
)
executor.state.messages = [{"role": "user", "content": "Analyze this file"}]
with (
patch(
"crewai.experimental.agent_executor.aget_all_files",
new=AsyncMock(return_value={"document": stored_file}),
) as async_get_files,
patch(
"crewai.experimental.agent_executor.get_all_files",
side_effect=AssertionError("sync file store should not be called"),
),
):
await executor._ainject_files_from_inputs(inputs)
async_get_files.assert_awaited_once_with(crew_id, task_id)
assert executor.state.messages[0]["files"] == {
"document": stored_file,
"local": local_file,
}
@pytest.fixture
def mock_dependencies(self):
"""Create mock dependencies for executor."""

View File

@@ -156,44 +156,6 @@ class TestSnowflakeRequests:
assert messages == [{"role": "user", "content": "Write a summary."}]
def test_claude_model_normalizes_stringified_tool_calls_with_results(
self, monkeypatch: pytest.MonkeyPatch
):
_snowflake_env(monkeypatch)
llm = SnowflakeCompletion(model="claude-sonnet-4-5")
messages = llm._format_messages(
[
{"role": "user", "content": "Use the tools."},
{
"role": "assistant",
"content": None,
"tool_calls": [
"{'id': 'toolu_1', 'type': 'function', 'function': {'name': \"'search_the_internet_with_serper'\", 'arguments': '\\\'{\"search_query\":\"CrewAI tools\"}\\\''}}",
"{'id': 'toolu_2', 'type': 'function', 'function': {'name': \"'search_the_internet_with_serper'\", 'arguments': '\\\'{\"search_query\":\"CrewAI demos\"}\\\''}}",
],
},
{
"role": "tool",
"tool_call_id": "toolu_1",
"name": "search_the_internet_with_serper",
"content": "result 1",
},
{
"role": "tool",
"tool_call_id": "toolu_2",
"name": "search_the_internet_with_serper",
"content": "result 2",
},
]
)
assert messages[-2] == {"role": "user", "content": "Use the tools."}
assert messages[-1]["role"] == "user"
assert "result 1" in messages[-1]["content"]
assert "result 2" in messages[-1]["content"]
assert all("tool_calls" not in message for message in messages)
def test_claude_model_removes_dangling_tool_call_without_result(
self, monkeypatch: pytest.MonkeyPatch
):
@@ -247,10 +209,14 @@ class TestSnowflakeRequests:
]
)
assert messages[-2] == {"role": "user", "content": "Use the tool."}
assert messages[-3]["role"] == "assistant"
assert messages[-3]["tool_calls"][0]["id"] == "call_1"
assert messages[-2] == {
"role": "tool",
"tool_call_id": "call_1",
"content": "result",
}
assert messages[-1]["role"] == "user"
assert "result" in messages[-1]["content"]
assert all("tool_calls" not in message for message in messages)
def test_claude_model_drops_unrelated_tool_results_from_preserved_pair(
self, monkeypatch: pytest.MonkeyPatch
@@ -285,88 +251,16 @@ class TestSnowflakeRequests:
]
)
assert messages[-2] == {"role": "user", "content": "Use the tool."}
assert messages[-1]["role"] == "user"
assert "valid result" in messages[-1]["content"]
assert "unrelated result" not in messages[-1]["content"]
assert all("tool_call_id" not in message for message in messages)
def test_claude_model_removes_dangling_tool_use_content_block(
self, monkeypatch: pytest.MonkeyPatch
):
_snowflake_env(monkeypatch)
llm = SnowflakeCompletion(model="claude-sonnet-4-5")
messages = llm._format_messages(
[
{"role": "user", "content": "Use the tool."},
{
"role": "assistant",
"content": [
{
"toolUse": {
"toolUseId": "tooluse_1",
"name": "lookup",
"input": {},
}
}
],
},
{"role": "user", "content": "Continue."},
]
)
assert messages == [
{"role": "user", "content": "Use the tool."},
{"role": "user", "content": "Continue."},
]
def test_claude_model_preserves_complete_tool_use_content_block_pair(
self, monkeypatch: pytest.MonkeyPatch
):
_snowflake_env(monkeypatch)
llm = SnowflakeCompletion(model="claude-sonnet-4-5")
messages = llm._format_messages(
[
{"role": "user", "content": "Use the tool."},
{
"role": "assistant",
"content": [
{
"toolUse": {
"toolUseId": "tooluse_1",
"name": "lookup",
"input": {},
}
}
],
},
{
"role": "user",
"content": [
{
"toolResult": {
"toolUseId": "tooluse_1",
"content": [{"text": "result"}],
}
}
],
},
]
)
assert messages[-2] == {"role": "user", "content": "Use the tool."}
assert messages[-1]["role"] == "user"
assert "result" in messages[-1]["content"]
assert "toolResult" not in messages[-1]["content"]
assert messages[-3]["role"] == "assistant"
assert messages[-2] == {
"role": "tool",
"tool_call_id": "call_1",
"content": "valid result",
}
assert all(
not (
message.get("role") == "assistant"
and isinstance(message.get("content"), list)
)
for message in messages
message.get("tool_call_id") != "unrelated_call" for message in messages
)
assert messages[-1]["role"] == "user"
def test_claude_model_maps_max_tokens_to_max_completion_tokens(
self, monkeypatch: pytest.MonkeyPatch

View File

@@ -108,16 +108,6 @@ class TestLiteLLMMultimodal:
assert result == []
def test_format_responses_pdf_with_concrete_gpt_model(self) -> None:
"""Test OpenAI Responses PDF support with an inferred GPT provider."""
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = format_multimodal_content(files, "gpt-4o-mini", api="responses")
assert len(result) == 1
assert result[0]["type"] == "input_file"
assert result[0]["file_data"].startswith("data:application/pdf;base64,")
@pytest.mark.skipif(not HAS_ANTHROPIC, reason="Anthropic SDK not installed")
class TestAnthropicMultimodal:
@@ -380,4 +370,4 @@ class TestMultipleFilesFormatting:
result = format_multimodal_content({}, llm.model)
assert result == []
assert result == []

View File

@@ -1,20 +1,11 @@
"""Unit tests for ReadFileTool."""
import base64
from pathlib import Path
from crewai.tools.agent_tools.read_file_tool import ReadFileTool
from crewai_files import ImageFile, PDFFile, TextFile
TEST_FIXTURES_DIR = (
Path(__file__).parent.parent.parent.parent.parent
/ "crewai-files"
/ "tests"
/ "fixtures"
)
class TestReadFileTool:
"""Tests for ReadFileTool."""
@@ -81,15 +72,15 @@ class TestReadFileTool:
decoded = base64.b64decode(b64_part)
assert decoded == png_bytes
def test_run_pdf_file_returns_extracted_text(self) -> None:
"""Test reading a PDF file returns extracted text instead of base64."""
pdf_bytes = (TEST_FIXTURES_DIR / "agents.pdf").read_bytes()
def test_run_pdf_file_returns_base64(self) -> None:
"""Test reading a PDF file returns base64 encoded content."""
pdf_bytes = b"%PDF-1.4 some content here"
self.tool.set_files({"doc.pdf": PDFFile(source=pdf_bytes)})
result = self.tool._run(file_name="doc.pdf")
assert "Base64:" not in result
assert "agents" in result.lower()
assert "[Binary file:" in result
assert "application/pdf" in result
def test_set_files_none(self) -> None:
"""Test setting files to None."""

View File

@@ -1,226 +0,0 @@
# ruff: noqa: T201
"""Manual runner for AGE-90 PDF input handling.
Usage examples:
uv run python scripts/age90_file_input_runner.py
uv run python scripts/age90_file_input_runner.py --mode fallback
uv run python scripts/age90_file_input_runner.py --mode payload --pdf ./sample_story.pdf
uv run python scripts/age90_file_input_runner.py --mode kickoff --pdf ./sample_story.pdf
"""
from __future__ import annotations
import argparse
from collections.abc import Mapping, Sequence
from contextlib import nullcontext
import os
from pathlib import Path
from typing import Any
from unittest.mock import patch
from crewai_files import PDFFile, format_multimodal_content, get_supported_content_types
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_PDF = ROOT / "lib" / "crewai-files" / "tests" / "fixtures" / "agents.pdf"
def _content_summary(block: dict[str, Any]) -> dict[str, str]:
"""Return a compact, non-base64 summary of a content block."""
summary: dict[str, str] = {"type": str(block.get("type"))}
for key in ("file_id", "file_url", "filename", "image_url"):
if key in block:
value = str(block[key])
summary[key] = value[:100] + ("..." if len(value) > 100 else "")
if "file_data" in block:
value = str(block["file_data"])
summary["file_data"] = value[:80] + f"... ({len(value)} chars)"
return summary
def _sanitize_payload(value: Any) -> Any:
"""Shorten large fields before printing API payloads."""
if isinstance(value, Mapping):
sanitized: dict[str, Any] = {}
for key, item in value.items():
if key == "file_data" and isinstance(item, str):
sanitized[key] = item[:100] + f"... ({len(item)} chars)"
else:
sanitized[str(key)] = _sanitize_payload(item)
return sanitized
if isinstance(value, Sequence) and not isinstance(value, str | bytes):
return [_sanitize_payload(item) for item in value]
return value
def inspect_native_path(pdf_path: Path, provider: str, api: str | None) -> None:
"""Show whether the PDF is treated as a native multimodal input."""
pdf = PDFFile(source=str(pdf_path))
supported_types = get_supported_content_types(provider, api=api)
blocks = format_multimodal_content(
{"document": pdf},
provider=provider,
api=api,
text="Summarize this PDF.",
)
print("\n== Native File Formatting ==")
print(f"PDF: {pdf_path}")
print(f"Provider/API: {provider} / {api or 'default'}")
print(f"Supported content types: {supported_types}")
print(f"Content block count: {len(blocks)}")
for index, block in enumerate(blocks, start=1):
print(f" {index}. {_content_summary(block)}")
has_pdf_block = any(block.get("type") == "input_file" for block in blocks)
print(f"PDF native input_file block: {'YES' if has_pdf_block else 'NO'}")
def inspect_fallback_tool(pdf_path: Path) -> None:
"""Show what read_file returns if a PDF falls back to the tool path."""
from crewai.tools.agent_tools.read_file_tool import ReadFileTool
tool = ReadFileTool()
tool.set_files({"document": PDFFile(source=str(pdf_path))})
result = tool._run("document")
print("\n== read_file Fallback ==")
print(f"Returned {len(result)} chars")
print(f"Contains Base64 marker: {'YES' if 'Base64:' in result else 'NO'}")
print("\nPreview:")
print(result[:1200])
if len(result) > 1200:
print("...")
def run_crew_kickoff(
pdf_path: Path,
model: str,
api: str | None,
prompt: str,
*,
payload_only: bool = False,
) -> None:
"""Run a real Crew kickoff against the supplied model."""
from crewai import LLM, Agent, Crew, Task
if model.startswith("openai/") and not os.getenv("OPENAI_API_KEY") and not payload_only:
raise SystemExit(
"OPENAI_API_KEY is not set. Export it before running --mode kickoff."
)
kwargs: dict[str, Any] = {"model": model, "temperature": 0}
if api:
kwargs["api"] = api
llm = LLM(**kwargs)
agent = Agent(
role="PDF Analyst",
goal="Read the provided PDF and answer accurately from its contents",
backstory="You inspect uploaded files carefully and avoid guessing.",
llm=llm,
verbose=True,
)
task = Task(
description=prompt,
expected_output="A concise answer grounded in the uploaded PDF.",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task], verbose=True)
print("\n== Crew Kickoff ==")
print(f"Model/API: {model} / {api or 'default'}")
print(f"PDF: {pdf_path}")
context = nullcontext()
if payload_only:
from crewai.llms.providers.openai.completion import OpenAICompletion
def print_payload_and_stop(
self: OpenAICompletion,
params: dict[str, Any],
*_args: Any,
**_kwargs: Any,
) -> str:
print("\n== Sanitized Responses Payload ==")
print(_sanitize_payload(params))
return "Payload debug complete."
context = patch.object(
OpenAICompletion,
"_handle_responses",
print_payload_and_stop,
)
with context:
result = crew.kickoff(input_files={"document": PDFFile(source=str(pdf_path))})
print("\n== Final Output ==")
print(result.raw)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--mode",
choices=("inspect", "fallback", "payload", "kickoff", "all"),
default="inspect",
help="What to run. 'inspect', 'fallback', and 'payload' do not call an LLM.",
)
parser.add_argument(
"--pdf",
type=Path,
default=DEFAULT_PDF,
help="PDF file to test.",
)
parser.add_argument(
"--provider",
default="gpt-4o-mini",
help="Provider/model string for file formatting inspection.",
)
parser.add_argument(
"--model",
default="openai/gpt-4o-mini",
help="CrewAI model for real kickoff mode.",
)
parser.add_argument(
"--api",
default="responses",
help="API variant. Use '' to omit.",
)
parser.add_argument(
"--prompt",
default="Summarize the uploaded PDF in 3 bullet points. Do not guess.",
help="Task prompt for kickoff mode.",
)
return parser.parse_args()
def main() -> None:
args = parse_args()
pdf_path = args.pdf.expanduser().resolve()
api = args.api or None
if not pdf_path.exists():
raise SystemExit(f"PDF not found: {pdf_path}")
if args.mode in ("inspect", "all"):
inspect_native_path(pdf_path, args.provider, api)
if args.mode in ("fallback", "all"):
inspect_fallback_tool(pdf_path)
if args.mode == "payload":
run_crew_kickoff(pdf_path, args.model, api, args.prompt, payload_only=True)
if args.mode in ("kickoff", "all"):
run_crew_kickoff(
pdf_path,
args.model,
api,
args.prompt,
payload_only=args.mode == "all",
)
if __name__ == "__main__":
main()

2
uv.lock generated
View File

@@ -13,7 +13,7 @@ resolution-markers = [
]
[options]
exclude-newer = "2026-05-30T15:40:20.821639605Z"
exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values.
exclude-newer-span = "P3D"
[manifest]