mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-03 06:08:15 +00:00
fix filesystem
This commit is contained in:
@@ -11,7 +11,10 @@ from crewai_files.formatting.anthropic import AnthropicFormatter
|
||||
from crewai_files.formatting.bedrock import BedrockFormatter
|
||||
from crewai_files.formatting.gemini import GeminiFormatter
|
||||
from crewai_files.formatting.openai import OpenAIFormatter, OpenAIResponsesFormatter
|
||||
from crewai_files.processing.constraints import get_constraints_for_provider
|
||||
from crewai_files.processing.constraints import (
|
||||
get_constraints_for_provider,
|
||||
uses_openai_responses_api,
|
||||
)
|
||||
from crewai_files.processing.processor import FileProcessor
|
||||
from crewai_files.resolution.resolver import FileResolver, FileResolverConfig
|
||||
from crewai_files.uploaders.factory import ProviderType
|
||||
@@ -120,9 +123,11 @@ def format_multimodal_content(
|
||||
if not files:
|
||||
return content_blocks
|
||||
|
||||
constraints_key: str = provider_type
|
||||
if api == "responses" and "openai" in provider_type.lower():
|
||||
constraints_key = "openai_responses"
|
||||
constraints_key = (
|
||||
"openai_responses"
|
||||
if uses_openai_responses_api(provider_type, api)
|
||||
else provider_type
|
||||
)
|
||||
|
||||
processor = FileProcessor(constraints=constraints_key)
|
||||
processed_files = processor.process_files(files)
|
||||
@@ -184,9 +189,11 @@ async def aformat_multimodal_content(
|
||||
if not files:
|
||||
return content_blocks
|
||||
|
||||
constraints_key: str = provider_type
|
||||
if api == "responses" and "openai" in provider_type.lower():
|
||||
constraints_key = "openai_responses"
|
||||
constraints_key = (
|
||||
"openai_responses"
|
||||
if uses_openai_responses_api(provider_type, api)
|
||||
else provider_type
|
||||
)
|
||||
|
||||
processor = FileProcessor(constraints=constraints_key)
|
||||
processed_files = await processor.aprocess_files(files)
|
||||
|
||||
@@ -346,6 +346,20 @@ def get_constraints_for_provider(
|
||||
return None
|
||||
|
||||
|
||||
def uses_openai_responses_api(provider: str, api: str | None = None) -> bool:
|
||||
"""Return whether provider/API should use OpenAI Responses file support."""
|
||||
if api != "responses":
|
||||
return False
|
||||
|
||||
provider_lower = provider.lower()
|
||||
return (
|
||||
"openai" in provider_lower
|
||||
or provider_lower == "gpt"
|
||||
or provider_lower.startswith("gpt-")
|
||||
or "/gpt-" in provider_lower
|
||||
)
|
||||
|
||||
|
||||
def get_supported_content_types(provider: str, api: str | None = None) -> list[str]:
|
||||
"""Get supported MIME type prefixes for a provider.
|
||||
|
||||
@@ -356,9 +370,9 @@ def get_supported_content_types(provider: str, api: str | None = None) -> list[s
|
||||
Returns:
|
||||
List of supported MIME type prefixes (e.g., ["image/", "application/pdf"]).
|
||||
"""
|
||||
lookup_key = provider
|
||||
if api == "responses" and "openai" in provider.lower():
|
||||
lookup_key = "openai_responses"
|
||||
lookup_key = (
|
||||
"openai_responses" if uses_openai_responses_api(provider, api) else provider
|
||||
)
|
||||
|
||||
constraints = get_constraints_for_provider(lookup_key)
|
||||
if not constraints:
|
||||
|
||||
@@ -11,6 +11,7 @@ from crewai_files.processing.constraints import (
|
||||
ProviderConstraints,
|
||||
VideoConstraints,
|
||||
get_constraints_for_provider,
|
||||
get_supported_content_types,
|
||||
)
|
||||
import pytest
|
||||
|
||||
@@ -70,6 +71,13 @@ class TestPDFConstraints:
|
||||
assert constraints.max_size_bytes == 1000
|
||||
assert constraints.max_pages is None
|
||||
|
||||
@pytest.mark.parametrize("provider", ["openai", "gpt", "gpt-4o-mini"])
|
||||
def test_openai_responses_supports_pdf_for_gpt_aliases(self, provider):
|
||||
"""OpenAI Responses PDF support applies to concrete GPT model names."""
|
||||
supported_types = get_supported_content_types(provider, api="responses")
|
||||
|
||||
assert "application/pdf" in supported_types
|
||||
|
||||
|
||||
class TestAudioConstraints:
|
||||
"""Tests for AudioConstraints dataclass."""
|
||||
|
||||
@@ -93,6 +93,7 @@ from crewai.utilities.agent_utils import (
|
||||
track_delegation_if_needed,
|
||||
)
|
||||
from crewai.utilities.constants import TRAINING_DATA_FILE
|
||||
from crewai.utilities.file_store import get_all_files
|
||||
from crewai.utilities.i18n import I18N_DEFAULT
|
||||
from crewai.utilities.planning_types import (
|
||||
PlanStep,
|
||||
@@ -2982,12 +2983,21 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
|
||||
training_handler.save(training_data)
|
||||
|
||||
def _inject_files_from_inputs(self, inputs: dict[str, Any]) -> None:
|
||||
"""Inject files from inputs into the last user message.
|
||||
"""Inject files into the last user message.
|
||||
|
||||
Args:
|
||||
inputs: Input dictionary that may contain a 'files' key.
|
||||
"""
|
||||
files = inputs.get("files")
|
||||
files: dict[str, Any] = {}
|
||||
|
||||
if self.crew and self.task:
|
||||
stored_files = get_all_files(self.crew.id, self.task.id)
|
||||
if stored_files:
|
||||
files.update(stored_files)
|
||||
|
||||
if inputs.get("files"):
|
||||
files.update(inputs["files"])
|
||||
|
||||
if not files:
|
||||
return
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pydantic import BaseModel, Field, PrivateAttr
|
||||
@@ -64,6 +65,9 @@ class ReadFileTool(BaseTool):
|
||||
content_type = file_input.content_type
|
||||
filename = file_input.filename or file_name
|
||||
|
||||
if content_type == "application/pdf":
|
||||
return self._read_pdf_text(content, filename)
|
||||
|
||||
text_types = (
|
||||
"text/",
|
||||
"application/json",
|
||||
@@ -76,3 +80,22 @@ class ReadFileTool(BaseTool):
|
||||
|
||||
encoded = base64.b64encode(content).decode("ascii")
|
||||
return f"[Binary file: {filename} ({content_type})]\nBase64: {encoded}"
|
||||
|
||||
def _read_pdf_text(self, content: bytes, filename: str) -> str:
|
||||
"""Extract text from a PDF instead of returning base64."""
|
||||
try:
|
||||
from pypdf import PdfReader
|
||||
except ImportError:
|
||||
encoded = base64.b64encode(content).decode("ascii")
|
||||
return f"[Binary file: {filename} (application/pdf)]\nBase64: {encoded}"
|
||||
|
||||
try:
|
||||
reader = PdfReader(BytesIO(content))
|
||||
page_text = [text for page in reader.pages if (text := page.extract_text())]
|
||||
except Exception as exc:
|
||||
return f"Unable to extract text from PDF '{filename}': {exc}"
|
||||
|
||||
if not page_text:
|
||||
return f"[PDF file with no extractable text: {filename}]"
|
||||
|
||||
return "\n\n".join(page_text)
|
||||
|
||||
@@ -7,9 +7,11 @@ flow methods, routing logic, and error handling.
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
import time
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
@@ -64,6 +66,8 @@ from crewai.events.types.tool_usage_events import (
|
||||
from crewai.tools.tool_types import ToolResult
|
||||
from crewai.utilities.step_execution_context import StepExecutionContext
|
||||
from crewai.utilities.planning_types import TodoItem
|
||||
from crewai.utilities.file_store import clear_files, clear_task_files, store_files
|
||||
from crewai_files import TextFile
|
||||
|
||||
class TestAgentExecutorState:
|
||||
"""Test AgentExecutorState Pydantic model."""
|
||||
@@ -112,6 +116,26 @@ class TestAgentExecutor:
|
||||
class StructuredResult(BaseModel):
|
||||
value: str
|
||||
|
||||
def test_inject_files_from_crew_task_store(self):
|
||||
"""Crew-level input_files should attach to the LLM user message."""
|
||||
crew_id = uuid4()
|
||||
task_id = uuid4()
|
||||
stored_file = TextFile(source=b"stored content")
|
||||
executor = _build_executor(
|
||||
crew=SimpleNamespace(id=crew_id),
|
||||
task=SimpleNamespace(id=task_id),
|
||||
)
|
||||
executor.state.messages = [{"role": "user", "content": "Analyze this file"}]
|
||||
|
||||
try:
|
||||
store_files(crew_id, {"document": stored_file})
|
||||
executor._inject_files_from_inputs({})
|
||||
finally:
|
||||
clear_files(crew_id)
|
||||
clear_task_files(task_id)
|
||||
|
||||
assert executor.state.messages[0]["files"] == {"document": stored_file}
|
||||
|
||||
@pytest.fixture
|
||||
def mock_dependencies(self):
|
||||
"""Create mock dependencies for executor."""
|
||||
|
||||
@@ -108,6 +108,16 @@ class TestLiteLLMMultimodal:
|
||||
|
||||
assert result == []
|
||||
|
||||
def test_format_responses_pdf_with_concrete_gpt_model(self) -> None:
|
||||
"""Test OpenAI Responses PDF support with an inferred GPT provider."""
|
||||
files = {"doc": PDFFile(source=MINIMAL_PDF)}
|
||||
|
||||
result = format_multimodal_content(files, "gpt-4o-mini", api="responses")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["type"] == "input_file"
|
||||
assert result[0]["file_data"].startswith("data:application/pdf;base64,")
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_ANTHROPIC, reason="Anthropic SDK not installed")
|
||||
class TestAnthropicMultimodal:
|
||||
@@ -370,4 +380,4 @@ class TestMultipleFilesFormatting:
|
||||
|
||||
result = format_multimodal_content({}, llm.model)
|
||||
|
||||
assert result == []
|
||||
assert result == []
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
"""Unit tests for ReadFileTool."""
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
from crewai.tools.agent_tools.read_file_tool import ReadFileTool
|
||||
from crewai_files import ImageFile, PDFFile, TextFile
|
||||
|
||||
|
||||
TEST_FIXTURES_DIR = (
|
||||
Path(__file__).parent.parent.parent.parent.parent
|
||||
/ "crewai-files"
|
||||
/ "tests"
|
||||
/ "fixtures"
|
||||
)
|
||||
|
||||
|
||||
class TestReadFileTool:
|
||||
"""Tests for ReadFileTool."""
|
||||
|
||||
@@ -72,15 +81,15 @@ class TestReadFileTool:
|
||||
decoded = base64.b64decode(b64_part)
|
||||
assert decoded == png_bytes
|
||||
|
||||
def test_run_pdf_file_returns_base64(self) -> None:
|
||||
"""Test reading a PDF file returns base64 encoded content."""
|
||||
pdf_bytes = b"%PDF-1.4 some content here"
|
||||
def test_run_pdf_file_returns_extracted_text(self) -> None:
|
||||
"""Test reading a PDF file returns extracted text instead of base64."""
|
||||
pdf_bytes = (TEST_FIXTURES_DIR / "agents.pdf").read_bytes()
|
||||
self.tool.set_files({"doc.pdf": PDFFile(source=pdf_bytes)})
|
||||
|
||||
result = self.tool._run(file_name="doc.pdf")
|
||||
|
||||
assert "[Binary file:" in result
|
||||
assert "application/pdf" in result
|
||||
assert "Base64:" not in result
|
||||
assert "agents" in result.lower()
|
||||
|
||||
def test_set_files_none(self) -> None:
|
||||
"""Test setting files to None."""
|
||||
|
||||
Reference in New Issue
Block a user