This commit is contained in:
Brandon Hancock
2025-03-05 10:01:11 -05:00
parent 9e240e3ea4
commit 3df5278ee9
4 changed files with 77 additions and 344 deletions

View File

@@ -295,6 +295,13 @@ class LLM:
last_chunk = None
chunk_count = 0
debug_info = []
aggregated_usage = {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
"successful_requests": 0,
"cached_prompt_tokens": 0,
}
# --- 2) Make sure stream is set to True
params["stream"] = True
@@ -314,6 +321,13 @@ class LLM:
# Handle ModelResponse objects
if isinstance(chunk, ModelResponse):
debug_info.append("Chunk is ModelResponse")
# Capture and aggregate usage information from the chunk if available
chunk_usage = getattr(chunk, "usage", None)
if isinstance(chunk_usage, dict):
for key in aggregated_usage:
aggregated_usage[key] += chunk_usage.get(key, 0)
choices = getattr(chunk, "choices", [])
if choices and len(choices) > 0:
choice = choices[0]
@@ -378,6 +392,12 @@ class LLM:
if last_chunk is not None:
# Try to extract any content from the last chunk
if isinstance(last_chunk, ModelResponse):
# Capture and aggregate usage information from the last chunk if available
chunk_usage = getattr(last_chunk, "usage", None)
if isinstance(chunk_usage, dict):
for key in aggregated_usage:
aggregated_usage[key] += chunk_usage.get(key, 0)
choices = getattr(last_chunk, "choices", [])
if choices and len(choices) > 0:
choice = choices[0]
@@ -406,6 +426,12 @@ class LLM:
# --- 7) Check for tool calls in the final response
if isinstance(last_chunk, ModelResponse):
# Capture and aggregate usage information from the last chunk if available
chunk_usage = getattr(last_chunk, "usage", None)
if isinstance(chunk_usage, dict):
for key in aggregated_usage:
aggregated_usage[key] += chunk_usage.get(key, 0)
choices = getattr(last_chunk, "choices", [])
if choices and len(choices) > 0:
choice = choices[0]
@@ -418,7 +444,23 @@ class LLM:
if tool_result is not None:
return tool_result
# --- 8) Emit completion event and return response
# --- 8) Log token usage if available
# Use aggregated usage if any tokens were counted
if any(value > 0 for value in aggregated_usage.values()):
logging.info(
f"Aggregated token usage from streaming response: {aggregated_usage}"
)
if self.callbacks and len(self.callbacks) > 0:
for callback in self.callbacks:
if hasattr(callback, "log_success_event"):
callback.log_success_event(
kwargs=params,
response_obj={"usage": aggregated_usage},
start_time=0,
end_time=0,
)
# --- 9) Emit completion event and return response
self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL)
return full_response
@@ -465,6 +507,16 @@ class LLM:
"""
# --- 1) Make the completion call
response = litellm.completion(**params)
# Extract usage info if none is provided, default to zero
usage_info = getattr(response, "usage", None)
if usage_info is None:
usage_info = {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
"successful_requests": 0,
"cached_prompt_tokens": 0,
}
# --- 2) Extract response message and content
response_message = cast(Choices, cast(ModelResponse, response).choices)[
@@ -472,15 +524,29 @@ class LLM:
].message
text_response = response_message.content or ""
# --- 3) Check for tool calls
# --- 3) Handle callbacks with usage info
if self.callbacks and len(self.callbacks) > 0:
for callback in self.callbacks:
if hasattr(callback, "log_success_event"):
logging.info(
f"Token usage from non-streaming response: {usage_info}"
)
callback.log_success_event(
kwargs=params,
response_obj={"usage": usage_info},
start_time=0,
end_time=0,
)
# --- 4) Check for tool calls
tool_calls = getattr(response_message, "tool_calls", [])
# --- 4) Handle tool calls if present
# --- 5) Handle tool calls if present
tool_result = self._handle_tool_call(tool_calls, available_functions)
if tool_result is not None:
return tool_result
# --- 5) Emit completion event and return response
# --- 6) Emit completion event and return response
self._handle_emit_call_events(text_response, LLMCallType.LLM_CALL)
return text_response

View File

@@ -1,338 +0,0 @@
interactions:
- request:
body: '{"messages": [{"role": "system", "content": "You are dog Researcher. You
have a lot of experience with dog.\nYour personal goal is: Express hot takes
on dog.\nTo give my best complete final answer to the task use the exact following
format:\n\nThought: I now can give a great answer\nFinal Answer: Your final
answer must be the great and the most complete as possible, it must be outcome
described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user",
"content": "\nCurrent Task: Give me an analysis around dog.\n\nThis is the expect
criteria for your final answer: 1 bullet point about dog that''s under 15 words.\nyou
MUST return the actual complete content as the final answer, not a summary.\n\nBegin!
This is VERY important to you, use the tools available and give your best Final
Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
["\nObservation:"], "stream": false}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '919'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AcdAr57gPSeXoBUvpM7ihR5ocSg8K\",\n \"object\":
\"chat.completion\",\n \"created\": 1733770413,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"I now can give a great answer \\nFinal
Answer: Dogs are loyal companions, enhancing human lives with love and joy.\",\n
\ \"refusal\": null\n },\n \"logprobs\": null,\n \"finish_reason\":
\"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 175,\n \"completion_tokens\":
24,\n \"total_tokens\": 199,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_bba3c8e70b\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ef732d93c754554-ATL
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 09 Dec 2024 18:53:33 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw;
path=/; expires=Mon, 09-Dec-24 19:23:33 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '476'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999793'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_0d2fbea0013ad1ad0768fcdd457f5be3
http_version: HTTP/1.1
status_code: 200
- request:
body: '{"messages": [{"role": "system", "content": "You are cat Researcher. You
have a lot of experience with cat.\nYour personal goal is: Express hot takes
on cat.\nTo give my best complete final answer to the task use the exact following
format:\n\nThought: I now can give a great answer\nFinal Answer: Your final
answer must be the great and the most complete as possible, it must be outcome
described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user",
"content": "\nCurrent Task: Give me an analysis around cat.\n\nThis is the expect
criteria for your final answer: 1 bullet point about cat that''s under 15 words.\nyou
MUST return the actual complete content as the final answer, not a summary.\n\nBegin!
This is VERY important to you, use the tools available and give your best Final
Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
["\nObservation:"], "stream": false}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '919'
content-type:
- application/json
cookie:
- __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw;
_cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AcdArzlm4vKRhN7P4mtNE7X3UrCb3\",\n \"object\":
\"chat.completion\",\n \"created\": 1733770413,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"I now can give a great answer \\nFinal
Answer: Cats are independent yet affectionate, making them unique companions
for humans.\",\n \"refusal\": null\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
175,\n \"completion_tokens\": 24,\n \"total_tokens\": 199,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_818c284075\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ef732dcfadf4554-ATL
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 09 Dec 2024 18:53:34 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '418'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999793'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_483f4fc624bef1034c884b5d0e847aee
http_version: HTTP/1.1
status_code: 200
- request:
body: '{"messages": [{"role": "system", "content": "You are apple Researcher.
You have a lot of experience with apple.\nYour personal goal is: Express hot
takes on apple.\nTo give my best complete final answer to the task use the exact
following format:\n\nThought: I now can give a great answer\nFinal Answer: Your
final answer must be the great and the most complete as possible, it must be
outcome described.\n\nI MUST use these formats, my job depends on it!"}, {"role":
"user", "content": "\nCurrent Task: Give me an analysis around apple.\n\nThis
is the expect criteria for your final answer: 1 bullet point about apple that''s
under 15 words.\nyou MUST return the actual complete content as the final answer,
not a summary.\n\nBegin! This is VERY important to you, use the tools available
and give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
"gpt-4o-mini", "stop": ["\nObservation:"], "stream": false}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '929'
content-type:
- application/json
cookie:
- __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw;
_cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.52.1
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.52.1
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AcdAsHWkJ3K1sBl3O6XYMZZ3BhHx4\",\n \"object\":
\"chat.completion\",\n \"created\": 1733770414,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"I now can give a great answer \\nFinal
Answer: Apple consistently innovates, leading the tech industry with flagship
products.\",\n \"refusal\": null\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
175,\n \"completion_tokens\": 24,\n \"total_tokens\": 199,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\":
\"fp_818c284075\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ef732e028194554-ATL
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 09 Dec 2024 18:53:34 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '393'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999791'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_01df88b62376d0b63fb3fa2761bc9c2b
http_version: HTTP/1.1
status_code: 200
version: 1

View File

@@ -948,7 +948,7 @@ def test_api_calls_throttling(capsys):
moveon.assert_called()
@pytest.mark.vcr(filter_headers=["authorization"])
# @pytest.mark.vcr(filter_headers=["authorization"])
def test_crew_kickoff_usage_metrics():
inputs = [
{"topic": "dog"},
@@ -960,6 +960,7 @@ def test_crew_kickoff_usage_metrics():
role="{topic} Researcher",
goal="Express hot takes on {topic}.",
backstory="You have a lot of experience with {topic}.",
llm=LLM(model="gpt-4o"),
)
task = Task(
@@ -968,12 +969,13 @@ def test_crew_kickoff_usage_metrics():
agent=agent,
)
# Use real LLM calls instead of mocking
crew = Crew(agents=[agent], tasks=[task])
results = crew.kickoff_for_each(inputs=inputs)
assert len(results) == len(inputs)
for result in results:
# Assert that all required keys are in usage_metrics and their values are not None
# Assert that all required keys are in usage_metrics and their values are greater than 0
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
@@ -3973,3 +3975,5 @@ def test_crew_with_knowledge_sources_works_with_copy():
assert crew_copy.knowledge_sources == crew.knowledge_sources
assert len(crew_copy.agents) == len(crew.agents)
assert len(crew_copy.tasks) == len(crew.tasks)
assert len(crew_copy.tasks) == len(crew.tasks)

View File

@@ -285,6 +285,7 @@ def test_o3_mini_reasoning_effort_medium():
assert isinstance(result, str)
assert "Paris" in result
def test_context_window_validation():
"""Test that context window validation works correctly."""
# Test valid window size