Merge branch 'main' into gl/feat/native-multimodal-files

This commit is contained in:
Greyson LaLonde
2026-01-22 20:47:35 -05:00
3 changed files with 73 additions and 22 deletions

View File

@@ -28,4 +28,4 @@ class KickoffInputs(TypedDict, total=False):
files: Named file inputs accessible to tasks during execution.
"""
files: dict[str, FileInput]
files: dict[str, FileInput]

View File

@@ -0,0 +1,64 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "Hello"}], "role": "user"}], "generationConfig":
{}}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- '*/*'
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
content-length:
- '86'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
x-goog-api-client:
- google-genai-sdk/1.49.0 gl-python/3.13.3
x-goog-api-key:
- X-GOOG-API-KEY-XXX
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-001:generateContent
response:
body:
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
[\n {\n \"text\": \"Hi there! How can I help you today?\\n\"\n
\ }\n ],\n \"role\": \"model\"\n },\n \"finishReason\":
\"STOP\",\n \"avgLogprobs\": -0.13875236294486307\n }\n ],\n \"usageMetadata\":
{\n \"promptTokenCount\": 1,\n \"candidatesTokenCount\": 11,\n \"totalTokenCount\":
12,\n \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n
\ \"tokenCount\": 1\n }\n ],\n \"candidatesTokensDetails\":
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 11\n
\ }\n ]\n },\n \"modelVersion\": \"gemini-2.0-flash-001\",\n \"responseId\":
\"RM9yabK9GeLCjMcPivCRyA4\"\n}\n"
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Type:
- application/json; charset=UTF-8
Date:
- Fri, 23 Jan 2026 01:30:44 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=469
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
X-Frame-Options:
- X-FRAME-OPTIONS-XXX
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1

View File

@@ -617,35 +617,22 @@ def test_gemini_environment_variable_api_key():
assert llm.api_key == "test-google-key"
@pytest.mark.vcr()
def test_gemini_token_usage_tracking():
"""
Test that token usage is properly tracked for Gemini responses
"""
llm = LLM(model="google/gemini-2.0-flash-001")
# Mock the Gemini response with usage information
with patch.object(llm.client.models, 'generate_content') as mock_generate:
mock_response = MagicMock()
mock_response.text = "test response"
mock_response.candidates = []
mock_response.usage_metadata = MagicMock(
prompt_token_count=50,
candidates_token_count=25,
total_token_count=75
)
mock_generate.return_value = mock_response
result = llm.call("Hello")
result = llm.call("Hello")
assert result.strip() == "Hi there! How can I help you today?"
# Verify the response
assert result == "test response"
# Verify token usage was extracted
usage = llm._extract_token_usage(mock_response)
assert usage["prompt_token_count"] == 50
assert usage["candidates_token_count"] == 25
assert usage["total_token_count"] == 75
assert usage["total_tokens"] == 75
usage = llm.get_token_usage_summary()
assert usage.successful_requests == 1
assert usage.prompt_tokens > 0
assert usage.completion_tokens > 0
assert usage.total_tokens > 0
def test_gemini_stop_sequences_sync():