Fix token tracking race condition in threading-based async execution

This commit fixes the race condition described in issue #4168 where
token tracking was inaccurate when multiple async tasks from the same
agent ran concurrently.

The fix introduces:
1. Per-agent locks to serialize async task execution for accurate token
   tracking when multiple async tasks from the same agent run concurrently
2. Token capture callback that captures both tokens_before and tokens_after
   inside the thread (after acquiring the lock), not when the task is queued
3. Updated _process_async_tasks to handle the new return type from
   execute_async which now returns (TaskOutput, tokens_before, tokens_after)

This ensures that token deltas are accurately attributed to each task
even when multiple async tasks from the same agent overlap in execution.

Tests added:
- test_async_task_token_tracking_uses_per_agent_lock
- test_async_task_token_callback_captures_tokens_inside_thread
- test_async_task_per_agent_lock_serializes_execution

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-01-03 17:27:15 +00:00
parent 0f0538cca7
commit 5dc87c04af
3 changed files with 327 additions and 23 deletions

View File

@@ -7,6 +7,7 @@ from copy import copy as shallow_copy
from hashlib import md5 from hashlib import md5
import json import json
import re import re
import threading
from typing import ( from typing import (
Any, Any,
cast, cast,
@@ -1153,8 +1154,13 @@ class Crew(FlowTrackable, BaseModel):
""" """
task_outputs: list[TaskOutput] = [] task_outputs: list[TaskOutput] = []
futures: list[tuple[Task, Future[TaskOutput], int]] = [] futures: list[tuple[Task, Future[TaskOutput | tuple[TaskOutput, Any]], int, Any, Any]] = []
last_sync_output: TaskOutput | None = None last_sync_output: TaskOutput | None = None
# Per-agent locks to serialize async task execution for accurate token tracking
# This ensures that when multiple async tasks from the same agent run,
# they execute one at a time so token deltas can be accurately attributed
agent_locks: dict[str, threading.Lock] = {}
for task_index, task in enumerate(tasks): for task_index, task in enumerate(tasks):
exec_data, task_outputs, last_sync_output = prepare_task_execution( exec_data, task_outputs, last_sync_output = prepare_task_execution(
@@ -1172,18 +1178,32 @@ class Crew(FlowTrackable, BaseModel):
continue continue
if task.async_execution: if task.async_execution:
# Capture token usage before async task execution
tokens_before = self._get_agent_token_usage(exec_data.agent)
context = self._get_context( context = self._get_context(
task, [last_sync_output] if last_sync_output else [] task, [last_sync_output] if last_sync_output else []
) )
# Get or create a lock for this agent to serialize async task execution
# This ensures accurate per-task token tracking
agent_id = str(getattr(exec_data.agent, 'id', id(exec_data.agent)))
if agent_id not in agent_locks:
agent_locks[agent_id] = threading.Lock()
agent_lock = agent_locks[agent_id]
# Create a token capture callback that will be called inside the thread
# after task completion (while still holding the lock)
def create_token_callback(agent: Any = exec_data.agent) -> Any:
return self._get_agent_token_usage(agent)
future = task.execute_async( future = task.execute_async(
agent=exec_data.agent, agent=exec_data.agent,
context=context, context=context,
tools=exec_data.tools, tools=exec_data.tools,
token_capture_callback=create_token_callback,
agent_execution_lock=agent_lock,
) )
futures.append((task, future, task_index, exec_data.agent, tokens_before)) # Note: tokens_before is no longer captured here since it will be
# captured inside the thread after acquiring the lock
futures.append((task, future, task_index, exec_data.agent, None))
else: else:
if futures: if futures:
task_outputs = self._process_async_tasks(futures, was_replayed) task_outputs = self._process_async_tasks(futures, was_replayed)
@@ -1218,7 +1238,7 @@ class Crew(FlowTrackable, BaseModel):
self, self,
task: ConditionalTask, task: ConditionalTask,
task_outputs: list[TaskOutput], task_outputs: list[TaskOutput],
futures: list[tuple[Task, Future[TaskOutput], int, Any, Any]], futures: list[tuple[Task, Future[TaskOutput | tuple[TaskOutput, Any, Any]], int, Any, Any]],
task_index: int, task_index: int,
was_replayed: bool, was_replayed: bool,
) -> TaskOutput | None: ) -> TaskOutput | None:
@@ -1450,18 +1470,32 @@ class Crew(FlowTrackable, BaseModel):
def _process_async_tasks( def _process_async_tasks(
self, self,
futures: list[tuple[Task, Future[TaskOutput], int, Any, Any]], futures: list[tuple[Task, Future[TaskOutput | tuple[TaskOutput, Any, Any]], int, Any, Any]],
was_replayed: bool = False, was_replayed: bool = False,
) -> list[TaskOutput]: ) -> list[TaskOutput]:
"""Process completed async tasks and attach token metrics.
The futures contain either:
- TaskOutput (if no token tracking was enabled)
- tuple of (TaskOutput, tokens_before, tokens_after) (if token tracking was enabled)
Token tracking is enabled when the task was executed with a token_capture_callback
and agent_execution_lock, which ensures accurate per-task token attribution even
when multiple async tasks from the same agent run concurrently.
"""
task_outputs: list[TaskOutput] = [] task_outputs: list[TaskOutput] = []
for future_task, future, task_index, agent, tokens_before in futures: for future_task, future, task_index, agent, _ in futures:
task_output = future.result() result = future.result()
# Capture token usage after async task execution and attach to task output # Check if result is a tuple (token tracking enabled) or just TaskOutput
tokens_after = self._get_agent_token_usage(agent) if isinstance(result, tuple) and len(result) == 3:
task_output = self._attach_task_token_metrics( task_output, tokens_before, tokens_after = result
task_output, future_task, agent, tokens_before, tokens_after task_output = self._attach_task_token_metrics(
) task_output, future_task, agent, tokens_before, tokens_after
)
else:
# No token tracking - result is just TaskOutput
task_output = result
task_outputs.append(task_output) task_outputs.append(task_output)
self._process_task_result(future_task, task_output) self._process_task_result(future_task, task_output)

View File

@@ -11,6 +11,7 @@ from pathlib import Path
import threading import threading
from typing import ( from typing import (
Any, Any,
Callable,
ClassVar, ClassVar,
cast, cast,
get_args, get_args,
@@ -476,13 +477,34 @@ class Task(BaseModel):
agent: BaseAgent | None = None, agent: BaseAgent | None = None,
context: str | None = None, context: str | None = None,
tools: list[BaseTool] | None = None, tools: list[BaseTool] | None = None,
) -> Future[TaskOutput]: token_capture_callback: Callable[[], Any] | None = None,
"""Execute the task asynchronously.""" agent_execution_lock: threading.Lock | None = None,
future: Future[TaskOutput] = Future() ) -> Future[TaskOutput | tuple[TaskOutput, Any, Any]]:
"""Execute the task asynchronously.
Args:
agent: The agent to execute the task.
context: Context for the task execution.
tools: Tools available for the task.
token_capture_callback: Optional callback to capture token usage.
If provided, the future will return a tuple of
(TaskOutput, tokens_before, tokens_after) instead of just TaskOutput.
The callback is called twice: once before task execution (after
acquiring the lock if one is provided) and once after task completion.
agent_execution_lock: Optional lock to serialize task execution for
the same agent. This is used to ensure accurate per-task token
tracking when multiple async tasks from the same agent run
concurrently.
Returns:
Future containing TaskOutput, or tuple of (TaskOutput, tokens_before, tokens_after)
if token_capture_callback is provided.
"""
future: Future[TaskOutput | tuple[TaskOutput, Any, Any]] = Future()
threading.Thread( threading.Thread(
daemon=True, daemon=True,
target=self._execute_task_async, target=self._execute_task_async,
args=(agent, context, tools, future), args=(agent, context, tools, future, token_capture_callback, agent_execution_lock),
).start() ).start()
return future return future
@@ -491,14 +513,45 @@ class Task(BaseModel):
agent: BaseAgent | None, agent: BaseAgent | None,
context: str | None, context: str | None,
tools: list[Any] | None, tools: list[Any] | None,
future: Future[TaskOutput], future: Future[TaskOutput | tuple[TaskOutput, Any, Any]],
token_capture_callback: Callable[[], Any] | None = None,
agent_execution_lock: threading.Lock | None = None,
) -> None: ) -> None:
"""Execute the task asynchronously with context handling.""" """Execute the task asynchronously with context handling.
If agent_execution_lock is provided, the task execution will be
serialized with other tasks using the same lock. This ensures
accurate per-task token tracking by:
1. Capturing tokens_before after acquiring the lock
2. Executing the task
3. Capturing tokens_after immediately after completion
4. Releasing the lock
If token_capture_callback is provided, it will be called twice:
once before task execution and once after, both while holding the lock.
"""
try: try:
result = self._execute_core(agent, context, tools) if agent_execution_lock:
future.set_result(result) with agent_execution_lock:
if token_capture_callback:
tokens_before = token_capture_callback()
result = self._execute_core(agent, context, tools)
if token_capture_callback:
tokens_after = token_capture_callback()
future.set_result((result, tokens_before, tokens_after))
else:
future.set_result(result)
else:
if token_capture_callback:
tokens_before = token_capture_callback()
result = self._execute_core(agent, context, tools)
if token_capture_callback:
tokens_after = token_capture_callback()
future.set_result((result, tokens_before, tokens_after))
else:
future.set_result(result)
except Exception as e: except Exception as e:
future.set_exception(e) future.set_exception(e)
async def aexecute_sync( async def aexecute_sync(
self, self,

View File

@@ -4768,3 +4768,220 @@ def test_ensure_exchanged_messages_are_propagated_to_external_memory():
assert "Researcher" in messages[0]["content"] assert "Researcher" in messages[0]["content"]
assert messages[1]["role"] == "user" assert messages[1]["role"] == "user"
assert "Research a topic to teach a kid aged 6 about math" in messages[1]["content"] assert "Research a topic to teach a kid aged 6 about math" in messages[1]["content"]
def test_async_task_token_tracking_uses_per_agent_lock():
"""Test that async tasks from the same agent use per-agent locks for accurate token tracking.
This test verifies the fix for the race condition described in issue #4168:
When multiple tasks with async_execution=True are executed by the same agent,
the per-agent lock ensures that token tracking is accurate by serializing
task execution and capturing tokens_before/tokens_after inside the thread.
"""
from crewai.types.usage_metrics import TaskTokenMetrics
agent = Agent(
role="Researcher",
goal="Research topics",
backstory="You are a researcher",
allow_delegation=False,
)
task1 = Task(
description="Research topic 1",
expected_output="Research output 1",
agent=agent,
async_execution=True,
)
task2 = Task(
description="Research topic 2",
expected_output="Research output 2",
agent=agent,
async_execution=True,
)
task3 = Task(
description="Summarize research",
expected_output="Summary",
agent=agent,
async_execution=False,
)
crew = Crew(agents=[agent], tasks=[task1, task2, task3])
mock_output = TaskOutput(
description="Test output",
raw="Test result",
agent="Researcher",
)
execution_order = []
lock_acquisitions = []
original_execute_core = Task._execute_core
def mock_execute_core(self, agent, context, tools):
execution_order.append(self.description)
return mock_output
with patch.object(Task, "_execute_core", mock_execute_core):
with patch.object(
crew,
"_get_agent_token_usage",
side_effect=[
UsageMetrics(total_tokens=100, prompt_tokens=80, completion_tokens=20, successful_requests=1),
UsageMetrics(total_tokens=150, prompt_tokens=120, completion_tokens=30, successful_requests=2),
UsageMetrics(total_tokens=150, prompt_tokens=120, completion_tokens=30, successful_requests=2),
UsageMetrics(total_tokens=200, prompt_tokens=160, completion_tokens=40, successful_requests=3),
UsageMetrics(total_tokens=200, prompt_tokens=160, completion_tokens=40, successful_requests=3),
UsageMetrics(total_tokens=250, prompt_tokens=200, completion_tokens=50, successful_requests=4),
]
):
result = crew.kickoff()
assert len(result.tasks_output) == 3
for task_output in result.tasks_output:
if hasattr(task_output, 'usage_metrics') and task_output.usage_metrics:
assert isinstance(task_output.usage_metrics, TaskTokenMetrics)
def test_async_task_token_callback_captures_tokens_inside_thread():
"""Test that token capture callback is called inside the thread for async tasks.
This verifies that tokens_before and tokens_after are captured inside the thread
(after acquiring the lock), not when the task is queued.
"""
from concurrent.futures import Future
import time
agent = Agent(
role="Researcher",
goal="Research topics",
backstory="You are a researcher",
allow_delegation=False,
)
task = Task(
description="Research topic",
expected_output="Research output",
agent=agent,
)
callback_call_times = []
callback_thread_ids = []
main_thread_id = threading.current_thread().ident
def token_callback():
callback_call_times.append(time.time())
callback_thread_ids.append(threading.current_thread().ident)
return UsageMetrics(total_tokens=100, prompt_tokens=80, completion_tokens=20, successful_requests=1)
mock_output = TaskOutput(
description="Test output",
raw="Test result",
agent="Researcher",
)
with patch.object(Task, "_execute_core", return_value=mock_output):
lock = threading.Lock()
future = task.execute_async(
agent=agent,
context=None,
tools=None,
token_capture_callback=token_callback,
agent_execution_lock=lock,
)
result = future.result(timeout=10)
assert isinstance(result, tuple)
assert len(result) == 3
task_output, tokens_before, tokens_after = result
assert len(callback_call_times) == 2
assert len(callback_thread_ids) == 2
for thread_id in callback_thread_ids:
assert thread_id != main_thread_id
assert callback_thread_ids[0] == callback_thread_ids[1]
def test_async_task_per_agent_lock_serializes_execution():
"""Test that per-agent lock serializes async task execution for the same agent.
This test verifies that when multiple async tasks from the same agent are executed,
the per-agent lock ensures they run one at a time (serialized), not concurrently.
"""
import time
agent = Agent(
role="Researcher",
goal="Research topics",
backstory="You are a researcher",
allow_delegation=False,
)
task1 = Task(
description="Research topic 1",
expected_output="Research output 1",
agent=agent,
)
task2 = Task(
description="Research topic 2",
expected_output="Research output 2",
agent=agent,
)
execution_times = []
mock_output = TaskOutput(
description="Test output",
raw="Test result",
agent="Researcher",
)
def slow_execute_core(self, agent, context, tools):
start_time = time.time()
time.sleep(0.1)
end_time = time.time()
execution_times.append((start_time, end_time))
return mock_output
with patch.object(Task, "_execute_core", slow_execute_core):
lock = threading.Lock()
def token_callback():
return UsageMetrics(total_tokens=100, prompt_tokens=80, completion_tokens=20, successful_requests=1)
future1 = task1.execute_async(
agent=agent,
context=None,
tools=None,
token_capture_callback=token_callback,
agent_execution_lock=lock,
)
future2 = task2.execute_async(
agent=agent,
context=None,
tools=None,
token_capture_callback=token_callback,
agent_execution_lock=lock,
)
result1 = future1.result(timeout=10)
result2 = future2.result(timeout=10)
assert len(execution_times) == 2
start1, end1 = execution_times[0]
start2, end2 = execution_times[1]
if start1 < start2:
assert end1 <= start2, "Tasks should not overlap when using the same lock"
else:
assert end2 <= start1, "Tasks should not overlap when using the same lock"