mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-03 00:02:36 +00:00
fix: ensure token usage recording, validate response model on stream
Some checks failed
Build uv cache / build-cache (3.10) (push) Has been cancelled
Build uv cache / build-cache (3.11) (push) Has been cancelled
Build uv cache / build-cache (3.12) (push) Has been cancelled
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Check Documentation Broken Links / Check broken links (push) Has been cancelled
Build uv cache / build-cache (3.13) (push) Has been cancelled
Some checks failed
Build uv cache / build-cache (3.10) (push) Has been cancelled
Build uv cache / build-cache (3.11) (push) Has been cancelled
Build uv cache / build-cache (3.12) (push) Has been cancelled
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Check Documentation Broken Links / Check broken links (push) Has been cancelled
Build uv cache / build-cache (3.13) (push) Has been cancelled
This commit is contained in:
@@ -10,9 +10,9 @@ from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@pytest.fixture
|
||||
def mock_azure_credentials():
|
||||
"""Automatically mock Azure credentials for all tests in this module."""
|
||||
"""Mock Azure credentials for tests that need them."""
|
||||
with patch.dict(os.environ, {
|
||||
"AZURE_API_KEY": "test-key",
|
||||
"AZURE_ENDPOINT": "https://test.openai.azure.com"
|
||||
@@ -20,6 +20,7 @@ def mock_azure_credentials():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("mock_azure_credentials")
|
||||
def test_azure_completion_is_used_when_azure_provider():
|
||||
"""
|
||||
Test that AzureCompletion from completion.py is used when LLM uses provider 'azure'
|
||||
@@ -31,6 +32,7 @@ def test_azure_completion_is_used_when_azure_provider():
|
||||
assert llm.model == "gpt-4"
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("mock_azure_credentials")
|
||||
def test_azure_completion_is_used_when_azure_openai_provider():
|
||||
"""
|
||||
Test that AzureCompletion is used when provider is 'azure_openai'
|
||||
@@ -101,6 +103,7 @@ def test_azure_tool_use_conversation_flow():
|
||||
assert mock_complete.called
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("mock_azure_credentials")
|
||||
def test_azure_completion_module_is_imported():
|
||||
"""
|
||||
Test that the completion module is properly imported when using Azure provider
|
||||
@@ -189,6 +192,7 @@ def test_azure_specific_parameters():
|
||||
assert llm.api_version == "2024-02-01"
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("mock_azure_credentials")
|
||||
def test_azure_completion_call():
|
||||
"""
|
||||
Test that AzureCompletion call method works
|
||||
@@ -203,6 +207,7 @@ def test_azure_completion_call():
|
||||
mock_call.assert_called_once_with("Hello, how are you?")
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("mock_azure_credentials")
|
||||
def test_azure_completion_called_during_crew_execution():
|
||||
"""
|
||||
Test that AzureCompletion.call is actually invoked when running a crew
|
||||
@@ -235,6 +240,7 @@ def test_azure_completion_called_during_crew_execution():
|
||||
assert "14 million" in str(result)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("mock_azure_credentials")
|
||||
def test_azure_completion_call_arguments():
|
||||
"""
|
||||
Test that AzureCompletion.call is invoked with correct arguments
|
||||
@@ -661,38 +667,17 @@ def test_azure_http_error_handling():
|
||||
llm.call("Hello")
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_azure_streaming_completion():
|
||||
"""
|
||||
Test that streaming completions work properly
|
||||
"""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
from azure.ai.inference.models import StreamingChatCompletionsUpdate
|
||||
llm = LLM(model="azure/gpt-4o-mini", stream=True)
|
||||
result = llm.call("Say hello")
|
||||
|
||||
llm = LLM(model="azure/gpt-4", stream=True)
|
||||
|
||||
# Mock streaming response
|
||||
with patch.object(llm.client, 'complete') as mock_complete:
|
||||
# Create mock streaming updates with proper type
|
||||
mock_updates = []
|
||||
for chunk in ["Hello", " ", "world", "!"]:
|
||||
mock_delta = MagicMock()
|
||||
mock_delta.content = chunk
|
||||
mock_delta.tool_calls = None
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.delta = mock_delta
|
||||
|
||||
# Create mock update as StreamingChatCompletionsUpdate instance
|
||||
mock_update = MagicMock(spec=StreamingChatCompletionsUpdate)
|
||||
mock_update.choices = [mock_choice]
|
||||
mock_updates.append(mock_update)
|
||||
|
||||
mock_complete.return_value = iter(mock_updates)
|
||||
|
||||
result = llm.call("Say hello")
|
||||
|
||||
# Verify the full response was assembled
|
||||
assert result == "Hello world!"
|
||||
assert result is not None
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_azure_api_version_default():
|
||||
@@ -1112,4 +1097,33 @@ def test_azure_completion_params_preparation_with_drop_params():
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
params = llm._prepare_completion_params(messages)
|
||||
|
||||
assert params.get('stop') == None
|
||||
assert params.get('stop') == None
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_azure_streaming_returns_usage_metrics():
|
||||
"""
|
||||
Test that Azure streaming calls return proper token usage metrics.
|
||||
"""
|
||||
agent = Agent(
|
||||
role="Research Assistant",
|
||||
goal="Find information about the capital of Spain",
|
||||
backstory="You are a helpful research assistant.",
|
||||
llm=LLM(model="azure/gpt-4o-mini", stream=True),
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="What is the capital of Spain?",
|
||||
expected_output="The capital of Spain",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
result = crew.kickoff()
|
||||
|
||||
assert result.token_usage is not None
|
||||
assert result.token_usage.total_tokens > 0
|
||||
assert result.token_usage.prompt_tokens > 0
|
||||
assert result.token_usage.completion_tokens > 0
|
||||
assert result.token_usage.successful_requests >= 1
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import pytest
|
||||
import tiktoken
|
||||
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai.llm import LLM
|
||||
|
||||
|
||||
@@ -114,3 +115,33 @@ async def test_azure_async_conversation():
|
||||
|
||||
assert result is not None
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure_async_streaming_returns_usage_metrics():
|
||||
"""
|
||||
Test that Azure async streaming calls return proper token usage metrics.
|
||||
"""
|
||||
agent = Agent(
|
||||
role="Research Assistant",
|
||||
goal="Find information about the capital of Germany",
|
||||
backstory="You are a helpful research assistant.",
|
||||
llm=LLM(model="azure/gpt-4o-mini", stream=True),
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="What is the capital of Germany?",
|
||||
expected_output="The capital of Germany",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
result = await crew.kickoff_async()
|
||||
|
||||
assert result.token_usage is not None
|
||||
assert result.token_usage.total_tokens > 0
|
||||
assert result.token_usage.prompt_tokens > 0
|
||||
assert result.token_usage.completion_tokens > 0
|
||||
assert result.token_usage.successful_requests >= 1
|
||||
|
||||
@@ -698,3 +698,33 @@ def test_gemini_stop_sequences_sent_to_api():
|
||||
assert hasattr(config, 'stop_sequences') or 'stop_sequences' in config.__dict__
|
||||
if hasattr(config, 'stop_sequences'):
|
||||
assert config.stop_sequences == ["\nObservation:", "\nThought:"]
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
@pytest.mark.skip(reason="VCR cannot replay SSE streaming responses")
|
||||
def test_google_streaming_returns_usage_metrics():
|
||||
"""
|
||||
Test that Google Gemini streaming calls return proper token usage metrics.
|
||||
"""
|
||||
agent = Agent(
|
||||
role="Research Assistant",
|
||||
goal="Find information about the capital of Japan",
|
||||
backstory="You are a helpful research assistant.",
|
||||
llm=LLM(model="gemini/gemini-2.0-flash-exp", stream=True),
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="What is the capital of Japan?",
|
||||
expected_output="The capital of Japan",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
result = crew.kickoff()
|
||||
|
||||
assert result.token_usage is not None
|
||||
assert result.token_usage.total_tokens > 0
|
||||
assert result.token_usage.prompt_tokens > 0
|
||||
assert result.token_usage.completion_tokens > 0
|
||||
assert result.token_usage.successful_requests >= 1
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import pytest
|
||||
import tiktoken
|
||||
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai.llm import LLM
|
||||
from crewai.llms.providers.gemini.completion import GeminiCompletion
|
||||
|
||||
@@ -112,3 +113,34 @@ async def test_gemini_async_with_parameters():
|
||||
|
||||
assert result is not None
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="VCR cannot replay SSE streaming responses")
|
||||
async def test_google_async_streaming_returns_usage_metrics():
|
||||
"""
|
||||
Test that Google Gemini async streaming calls return proper token usage metrics.
|
||||
"""
|
||||
agent = Agent(
|
||||
role="Research Assistant",
|
||||
goal="Find information about the capital of Canada",
|
||||
backstory="You are a helpful research assistant.",
|
||||
llm=LLM(model="gemini/gemini-2.0-flash-exp", stream=True),
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="What is the capital of Canada?",
|
||||
expected_output="The capital of Canada",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
result = await crew.kickoff_async()
|
||||
|
||||
assert result.token_usage is not None
|
||||
assert result.token_usage.total_tokens > 0
|
||||
assert result.token_usage.prompt_tokens > 0
|
||||
assert result.token_usage.completion_tokens > 0
|
||||
assert result.token_usage.successful_requests >= 1
|
||||
|
||||
@@ -592,3 +592,32 @@ def test_openai_response_format_none():
|
||||
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_openai_streaming_returns_usage_metrics():
|
||||
"""
|
||||
Test that OpenAI streaming calls return proper token usage metrics.
|
||||
"""
|
||||
agent = Agent(
|
||||
role="Research Assistant",
|
||||
goal="Find information about the capital of France",
|
||||
backstory="You are a helpful research assistant.",
|
||||
llm=LLM(model="gpt-4o-mini", stream=True),
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="What is the capital of France?",
|
||||
expected_output="The capital of France",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
result = crew.kickoff()
|
||||
|
||||
assert result.token_usage is not None
|
||||
assert result.token_usage.total_tokens > 0
|
||||
assert result.token_usage.prompt_tokens > 0
|
||||
assert result.token_usage.completion_tokens > 0
|
||||
assert result.token_usage.successful_requests >= 1
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import pytest
|
||||
import tiktoken
|
||||
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai.llm import LLM
|
||||
|
||||
|
||||
@@ -137,3 +138,33 @@ async def test_openai_async_with_parameters():
|
||||
|
||||
assert result is not None
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_async_streaming_returns_usage_metrics():
|
||||
"""
|
||||
Test that OpenAI async streaming calls return proper token usage metrics.
|
||||
"""
|
||||
agent = Agent(
|
||||
role="Research Assistant",
|
||||
goal="Find information about the capital of Italy",
|
||||
backstory="You are a helpful research assistant.",
|
||||
llm=LLM(model="gpt-4o-mini", stream=True),
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="What is the capital of Italy?",
|
||||
expected_output="The capital of Italy",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
result = await crew.kickoff_async()
|
||||
|
||||
assert result.token_usage is not None
|
||||
assert result.token_usage.total_tokens > 0
|
||||
assert result.token_usage.prompt_tokens > 0
|
||||
assert result.token_usage.completion_tokens > 0
|
||||
assert result.token_usage.successful_requests >= 1
|
||||
|
||||
Reference in New Issue
Block a user