fix: ensure token usage recording, validate response model on stream
Some checks failed
Build uv cache / build-cache (3.10) (push) Has been cancelled
Build uv cache / build-cache (3.11) (push) Has been cancelled
Build uv cache / build-cache (3.12) (push) Has been cancelled
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Check Documentation Broken Links / Check broken links (push) Has been cancelled
Build uv cache / build-cache (3.13) (push) Has been cancelled

This commit is contained in:
Greyson LaLonde
2025-12-10 20:32:10 -05:00
committed by GitHub
parent 8e99d490b0
commit bdafe0fac7
312 changed files with 7846 additions and 33124 deletions

View File

@@ -10,9 +10,9 @@ from crewai.agent import Agent
from crewai.task import Task
@pytest.fixture(autouse=True)
@pytest.fixture
def mock_azure_credentials():
"""Automatically mock Azure credentials for all tests in this module."""
"""Mock Azure credentials for tests that need them."""
with patch.dict(os.environ, {
"AZURE_API_KEY": "test-key",
"AZURE_ENDPOINT": "https://test.openai.azure.com"
@@ -20,6 +20,7 @@ def mock_azure_credentials():
yield
@pytest.mark.usefixtures("mock_azure_credentials")
def test_azure_completion_is_used_when_azure_provider():
"""
Test that AzureCompletion from completion.py is used when LLM uses provider 'azure'
@@ -31,6 +32,7 @@ def test_azure_completion_is_used_when_azure_provider():
assert llm.model == "gpt-4"
@pytest.mark.usefixtures("mock_azure_credentials")
def test_azure_completion_is_used_when_azure_openai_provider():
"""
Test that AzureCompletion is used when provider is 'azure_openai'
@@ -101,6 +103,7 @@ def test_azure_tool_use_conversation_flow():
assert mock_complete.called
@pytest.mark.usefixtures("mock_azure_credentials")
def test_azure_completion_module_is_imported():
"""
Test that the completion module is properly imported when using Azure provider
@@ -189,6 +192,7 @@ def test_azure_specific_parameters():
assert llm.api_version == "2024-02-01"
@pytest.mark.usefixtures("mock_azure_credentials")
def test_azure_completion_call():
"""
Test that AzureCompletion call method works
@@ -203,6 +207,7 @@ def test_azure_completion_call():
mock_call.assert_called_once_with("Hello, how are you?")
@pytest.mark.usefixtures("mock_azure_credentials")
def test_azure_completion_called_during_crew_execution():
"""
Test that AzureCompletion.call is actually invoked when running a crew
@@ -235,6 +240,7 @@ def test_azure_completion_called_during_crew_execution():
assert "14 million" in str(result)
@pytest.mark.usefixtures("mock_azure_credentials")
def test_azure_completion_call_arguments():
"""
Test that AzureCompletion.call is invoked with correct arguments
@@ -661,38 +667,17 @@ def test_azure_http_error_handling():
llm.call("Hello")
@pytest.mark.vcr()
def test_azure_streaming_completion():
"""
Test that streaming completions work properly
"""
from crewai.llms.providers.azure.completion import AzureCompletion
from azure.ai.inference.models import StreamingChatCompletionsUpdate
llm = LLM(model="azure/gpt-4o-mini", stream=True)
result = llm.call("Say hello")
llm = LLM(model="azure/gpt-4", stream=True)
# Mock streaming response
with patch.object(llm.client, 'complete') as mock_complete:
# Create mock streaming updates with proper type
mock_updates = []
for chunk in ["Hello", " ", "world", "!"]:
mock_delta = MagicMock()
mock_delta.content = chunk
mock_delta.tool_calls = None
mock_choice = MagicMock()
mock_choice.delta = mock_delta
# Create mock update as StreamingChatCompletionsUpdate instance
mock_update = MagicMock(spec=StreamingChatCompletionsUpdate)
mock_update.choices = [mock_choice]
mock_updates.append(mock_update)
mock_complete.return_value = iter(mock_updates)
result = llm.call("Say hello")
# Verify the full response was assembled
assert result == "Hello world!"
assert result is not None
assert isinstance(result, str)
assert len(result) > 0
def test_azure_api_version_default():
@@ -1112,4 +1097,33 @@ def test_azure_completion_params_preparation_with_drop_params():
messages = [{"role": "user", "content": "Hello"}]
params = llm._prepare_completion_params(messages)
assert params.get('stop') == None
assert params.get('stop') == None
@pytest.mark.vcr()
def test_azure_streaming_returns_usage_metrics():
"""
Test that Azure streaming calls return proper token usage metrics.
"""
agent = Agent(
role="Research Assistant",
goal="Find information about the capital of Spain",
backstory="You are a helpful research assistant.",
llm=LLM(model="azure/gpt-4o-mini", stream=True),
verbose=True,
)
task = Task(
description="What is the capital of Spain?",
expected_output="The capital of Spain",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = crew.kickoff()
assert result.token_usage is not None
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1

View File

@@ -3,6 +3,7 @@
import pytest
import tiktoken
from crewai import Agent, Task, Crew
from crewai.llm import LLM
@@ -114,3 +115,33 @@ async def test_azure_async_conversation():
assert result is not None
assert isinstance(result, str)
@pytest.mark.vcr()
@pytest.mark.asyncio
async def test_azure_async_streaming_returns_usage_metrics():
"""
Test that Azure async streaming calls return proper token usage metrics.
"""
agent = Agent(
role="Research Assistant",
goal="Find information about the capital of Germany",
backstory="You are a helpful research assistant.",
llm=LLM(model="azure/gpt-4o-mini", stream=True),
verbose=True,
)
task = Task(
description="What is the capital of Germany?",
expected_output="The capital of Germany",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = await crew.kickoff_async()
assert result.token_usage is not None
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1

View File

@@ -698,3 +698,33 @@ def test_gemini_stop_sequences_sent_to_api():
assert hasattr(config, 'stop_sequences') or 'stop_sequences' in config.__dict__
if hasattr(config, 'stop_sequences'):
assert config.stop_sequences == ["\nObservation:", "\nThought:"]
@pytest.mark.vcr()
@pytest.mark.skip(reason="VCR cannot replay SSE streaming responses")
def test_google_streaming_returns_usage_metrics():
"""
Test that Google Gemini streaming calls return proper token usage metrics.
"""
agent = Agent(
role="Research Assistant",
goal="Find information about the capital of Japan",
backstory="You are a helpful research assistant.",
llm=LLM(model="gemini/gemini-2.0-flash-exp", stream=True),
verbose=True,
)
task = Task(
description="What is the capital of Japan?",
expected_output="The capital of Japan",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = crew.kickoff()
assert result.token_usage is not None
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1

View File

@@ -3,6 +3,7 @@
import pytest
import tiktoken
from crewai import Agent, Task, Crew
from crewai.llm import LLM
from crewai.llms.providers.gemini.completion import GeminiCompletion
@@ -112,3 +113,34 @@ async def test_gemini_async_with_parameters():
assert result is not None
assert isinstance(result, str)
@pytest.mark.vcr()
@pytest.mark.asyncio
@pytest.mark.skip(reason="VCR cannot replay SSE streaming responses")
async def test_google_async_streaming_returns_usage_metrics():
"""
Test that Google Gemini async streaming calls return proper token usage metrics.
"""
agent = Agent(
role="Research Assistant",
goal="Find information about the capital of Canada",
backstory="You are a helpful research assistant.",
llm=LLM(model="gemini/gemini-2.0-flash-exp", stream=True),
verbose=True,
)
task = Task(
description="What is the capital of Canada?",
expected_output="The capital of Canada",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = await crew.kickoff_async()
assert result.token_usage is not None
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1

View File

@@ -592,3 +592,32 @@ def test_openai_response_format_none():
assert isinstance(result, str)
assert len(result) > 0
@pytest.mark.vcr()
def test_openai_streaming_returns_usage_metrics():
"""
Test that OpenAI streaming calls return proper token usage metrics.
"""
agent = Agent(
role="Research Assistant",
goal="Find information about the capital of France",
backstory="You are a helpful research assistant.",
llm=LLM(model="gpt-4o-mini", stream=True),
verbose=True,
)
task = Task(
description="What is the capital of France?",
expected_output="The capital of France",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = crew.kickoff()
assert result.token_usage is not None
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1

View File

@@ -3,6 +3,7 @@
import pytest
import tiktoken
from crewai import Agent, Task, Crew
from crewai.llm import LLM
@@ -137,3 +138,33 @@ async def test_openai_async_with_parameters():
assert result is not None
assert isinstance(result, str)
@pytest.mark.vcr()
@pytest.mark.asyncio
async def test_openai_async_streaming_returns_usage_metrics():
"""
Test that OpenAI async streaming calls return proper token usage metrics.
"""
agent = Agent(
role="Research Assistant",
goal="Find information about the capital of Italy",
backstory="You are a helpful research assistant.",
llm=LLM(model="gpt-4o-mini", stream=True),
verbose=True,
)
task = Task(
description="What is the capital of Italy?",
expected_output="The capital of Italy",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = await crew.kickoff_async()
assert result.token_usage is not None
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1