mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-06-11 19:28:20 +00:00
Compare commits
5 Commits
1.14.7
...
luzk/flow-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a64b41dd42 | ||
|
|
8565713a1a | ||
|
|
b720139eca | ||
|
|
540f5df767 | ||
|
|
c4476366ff |
@@ -226,6 +226,48 @@ counter=2 message='Hello from first_method - updated by second_method'
|
||||
من خلال ضمان إعادة مخرجات الدالة الأخيرة وتوفير الوصول إلى الحالة، تجعل تدفقات CrewAI من السهل دمج نتائج سير عمل الذكاء الاصطناعي في التطبيقات أو الأنظمة الأكبر،
|
||||
مع الحفاظ على الوصول إلى الحالة طوال تنفيذ التدفق.
|
||||
|
||||
## مقاييس استخدام التدفق
|
||||
|
||||
بعد اكتمال تنفيذ التدفق، يمكنك الوصول إلى الخاصية `usage_metrics` لعرض إجمالي استخدام التوكنات عبر **كل استدعاء لنموذج اللغة** يتم خلال التشغيل — بما في ذلك الاستدعاءات من كل فريق (Crew) ينظمه التدفق، والاستدعاءات داخل أدوات الـ Agents، والاستدعاءات المباشرة لـ `LLM.call(...)` من دوال التدفق. هذا هو المكافئ على جانب الـ SDK للإجماليات المعروضة في واجهة CrewAI Enterprise.
|
||||
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
from crewai.flow.flow import Flow, listen, start
|
||||
|
||||
class UsageMetricsFlow(Flow):
|
||||
@start()
|
||||
def run_first_crew(self):
|
||||
self.state.first_result = FirstCrew().crew().kickoff()
|
||||
|
||||
@listen(run_first_crew)
|
||||
def call_llm_directly(self):
|
||||
# استدعاء مباشر لنموذج اللغة — يُحسب أيضًا ضمن flow.usage_metrics
|
||||
llm = LLM(model="openai/gpt-4o-mini")
|
||||
self.state.summary = llm.call("لخّص النقاط الرئيسية.")
|
||||
|
||||
@listen(call_llm_directly)
|
||||
def run_second_crew(self):
|
||||
self.state.second_result = SecondCrew().crew().kickoff()
|
||||
|
||||
flow = UsageMetricsFlow()
|
||||
flow.kickoff()
|
||||
|
||||
print(flow.usage_metrics)
|
||||
# UsageMetrics(total_tokens=8579, prompt_tokens=6210, completion_tokens=2369,
|
||||
# cached_prompt_tokens=0, reasoning_tokens=0,
|
||||
# cache_creation_tokens=0, successful_requests=5)
|
||||
```
|
||||
|
||||
<Note>
|
||||
`flow.usage_metrics` **ليست** نفس `flow.kickoff().token_usage`. هذه الأخيرة
|
||||
ترجع فقط `CrewOutput.token_usage` لـ **آخر** دالة `@listen` أعادت
|
||||
`CrewOutput`، مما يعني أنها تعكس فقط الفريق الأخير وتتجاهل الفرق السابقة
|
||||
وكذلك أي استدعاءات مباشرة لـ `LLM.call(...)`. استخدم `flow.usage_metrics`
|
||||
كلما احتجت إلى الإجمالي **الكامل** للتوكنات لتنفيذ التدفق.
|
||||
</Note>
|
||||
|
||||
كل حقل في [`UsageMetrics`](https://github.com/crewAIInc/crewAI/blob/main/lib/crewai/src/crewai/types/usage_metrics.py) المُعاد هو مجموع جميع استدعاءات نموذج اللغة التي حدثت خلال استدعاء واحد لـ `flow.kickoff()`. تتم إعادة تعيين العدادات عند الاستدعاء التالي لـ `kickoff()` (وفي كل تكرار من `kickoff_for_each`)، لذلك لن تتكرر العدّات عبر التشغيلات المتتالية. يمكن قراءة هذه الخاصية بأمان في أي وقت بعد اكتمال `kickoff()`؛ قراءتها أثناء التنفيذ تُرجع المجموع الجزئي المتراكم حتى تلك اللحظة.
|
||||
|
||||
## إدارة حالة التدفق
|
||||
|
||||
إدارة الحالة بفعالية أمر بالغ الأهمية لبناء سير عمل ذكاء اصطناعي موثوق وقابل للصيانة. توفر تدفقات CrewAI آليات قوية لإدارة الحالة غير المهيكلة والمهيكلة،
|
||||
|
||||
@@ -226,6 +226,49 @@ After the Flow has run, you can access the final state to see the updates made b
|
||||
By ensuring that the final method's output is returned and providing access to the state, CrewAI Flows make it easy to integrate the results of your AI workflows into larger applications or systems,
|
||||
while also maintaining and accessing the state throughout the Flow's execution.
|
||||
|
||||
## Flow Usage Metrics
|
||||
|
||||
After a Flow execution completes, you can access the `usage_metrics` property to view aggregated token usage across **every LLM call** made during the run — including calls from every Crew the Flow orchestrated, calls inside Agent tools, and bare `LLM.call(...)` invocations from Flow methods. This is the SDK-side equivalent of the totals shown in the CrewAI Enterprise UI.
|
||||
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
from crewai.flow.flow import Flow, listen, start
|
||||
|
||||
class UsageMetricsFlow(Flow):
|
||||
@start()
|
||||
def run_first_crew(self):
|
||||
self.state.first_result = FirstCrew().crew().kickoff()
|
||||
|
||||
@listen(run_first_crew)
|
||||
def call_llm_directly(self):
|
||||
# Bare LLM call — still counted by flow.usage_metrics
|
||||
llm = LLM(model="openai/gpt-4o-mini")
|
||||
self.state.summary = llm.call("Summarize the key takeaways.")
|
||||
|
||||
@listen(call_llm_directly)
|
||||
def run_second_crew(self):
|
||||
self.state.second_result = SecondCrew().crew().kickoff()
|
||||
|
||||
flow = UsageMetricsFlow()
|
||||
flow.kickoff()
|
||||
|
||||
print(flow.usage_metrics)
|
||||
# UsageMetrics(total_tokens=8579, prompt_tokens=6210, completion_tokens=2369,
|
||||
# cached_prompt_tokens=0, reasoning_tokens=0,
|
||||
# cache_creation_tokens=0, successful_requests=5)
|
||||
```
|
||||
|
||||
<Note>
|
||||
`flow.usage_metrics` is **not** the same as `flow.kickoff().token_usage`. The
|
||||
latter returns the `CrewOutput.token_usage` of the **last** `@listen` method
|
||||
that returned a `CrewOutput`, which means it only reflects the final Crew and
|
||||
ignores prior Crews and bare `LLM.call(...)` invocations entirely. Use
|
||||
`flow.usage_metrics` whenever you need the **full** token rollup for the Flow
|
||||
execution.
|
||||
</Note>
|
||||
|
||||
Each entry in the returned [`UsageMetrics`](https://github.com/crewAIInc/crewAI/blob/main/lib/crewai/src/crewai/types/usage_metrics.py) is the sum across all LLM calls made within a single `flow.kickoff()` invocation. Counters reset on the next `kickoff()` call (or on each iteration of `kickoff_for_each`), so successive runs don't double-count. The property is safe to read at any point after `kickoff()` completes; reading it during execution returns the partial total accumulated so far.
|
||||
|
||||
## Flow State Management
|
||||
|
||||
Managing state effectively is crucial for building reliable and maintainable AI workflows. CrewAI Flows provides robust mechanisms for both unstructured and structured state management,
|
||||
|
||||
@@ -221,6 +221,48 @@ Flow가 실행된 후, 이러한 메소드들에 의해 수행된 업데이트
|
||||
최종 메소드의 출력이 반환되고 상태에 접근할 수 있도록 함으로써, CrewAI Flow는 AI 워크플로우의 결과를 더 큰 애플리케이션이나 시스템에 쉽게 통합할 수 있게 하며,
|
||||
Flow 실행 과정 전반에 걸쳐 상태를 유지하고 접근하면서도 이를 용이하게 만듭니다.
|
||||
|
||||
## 플로우 사용 메트릭
|
||||
|
||||
Flow 실행이 완료된 후, `usage_metrics` 속성에 접근하여 실행 동안 발생한 **모든 LLM 호출**의 토큰 사용량 집계를 확인할 수 있습니다. 여기에는 Flow가 오케스트레이션한 모든 Crew의 호출, Agent의 도구 내부에서 발생한 호출, 그리고 Flow 메서드에서 직접 호출한 `LLM.call(...)`이 모두 포함됩니다. 이는 CrewAI Enterprise UI에 표시되는 총량과 동등한 SDK 측 값입니다.
|
||||
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
from crewai.flow.flow import Flow, listen, start
|
||||
|
||||
class UsageMetricsFlow(Flow):
|
||||
@start()
|
||||
def run_first_crew(self):
|
||||
self.state.first_result = FirstCrew().crew().kickoff()
|
||||
|
||||
@listen(run_first_crew)
|
||||
def call_llm_directly(self):
|
||||
# 직접 LLM 호출 — flow.usage_metrics에서도 집계됩니다
|
||||
llm = LLM(model="openai/gpt-4o-mini")
|
||||
self.state.summary = llm.call("핵심 내용을 요약해 주세요.")
|
||||
|
||||
@listen(call_llm_directly)
|
||||
def run_second_crew(self):
|
||||
self.state.second_result = SecondCrew().crew().kickoff()
|
||||
|
||||
flow = UsageMetricsFlow()
|
||||
flow.kickoff()
|
||||
|
||||
print(flow.usage_metrics)
|
||||
# UsageMetrics(total_tokens=8579, prompt_tokens=6210, completion_tokens=2369,
|
||||
# cached_prompt_tokens=0, reasoning_tokens=0,
|
||||
# cache_creation_tokens=0, successful_requests=5)
|
||||
```
|
||||
|
||||
<Note>
|
||||
`flow.usage_metrics`는 `flow.kickoff().token_usage`와 **동일하지 않습니다**.
|
||||
후자는 `CrewOutput`을 반환한 **마지막** `@listen` 메서드의
|
||||
`CrewOutput.token_usage`만 반환하므로, 이전에 실행된 Crew들과 Flow 메서드에서
|
||||
직접 호출한 `LLM.call(...)`은 전혀 포함되지 않습니다. Flow 실행에 대한
|
||||
**전체** 토큰 집계가 필요할 때는 항상 `flow.usage_metrics`를 사용하십시오.
|
||||
</Note>
|
||||
|
||||
반환되는 [`UsageMetrics`](https://github.com/crewAIInc/crewAI/blob/main/lib/crewai/src/crewai/types/usage_metrics.py)의 각 항목은 단일 `flow.kickoff()` 실행 동안 발생한 모든 LLM 호출의 합계입니다. 다음 `kickoff()` 호출(및 `kickoff_for_each`의 각 반복)에서 카운터가 초기화되므로 연속 실행이 이중으로 집계되지 않습니다. 이 속성은 `kickoff()` 완료 후 언제든지 안전하게 읽을 수 있으며, 실행 중에 읽으면 그 시점까지 누적된 부분 합계를 반환합니다.
|
||||
|
||||
## 플로우 상태 관리
|
||||
|
||||
상태를 효과적으로 관리하는 것은 신뢰할 수 있고 유지 보수가 용이한 AI 워크플로를 구축하는 데 매우 중요합니다. CrewAI 플로우는 비정형 및 정형 상태 관리를 위한 강력한 메커니즘을 제공하여, 개발자가 자신의 애플리케이션에 가장 적합한 접근 방식을 선택할 수 있도록 합니다.
|
||||
|
||||
@@ -219,6 +219,49 @@ Após o término da execução, é possível acessar o estado final e observar a
|
||||
Ao garantir que a saída do método final seja retornada e oferecer acesso ao estado, o CrewAI Flows facilita a integração dos resultados dos seus workflows de IA em aplicações maiores,
|
||||
além de permitir o gerenciamento e o acesso ao estado durante toda a execução do Flow.
|
||||
|
||||
## Métricas de Uso do Flow
|
||||
|
||||
Após a execução de um Flow, você pode acessar a propriedade `usage_metrics` para visualizar o consumo agregado de tokens em **todas as chamadas de LLM** realizadas durante a execução — incluindo chamadas das Crews orquestradas pelo Flow, chamadas dentro de tools de Agents, e invocações diretas de `LLM.call(...)` feitas a partir de métodos do Flow. Esse é o equivalente, do lado do SDK, ao total exibido na interface do CrewAI Enterprise.
|
||||
|
||||
```python Code
|
||||
from crewai import LLM
|
||||
from crewai.flow.flow import Flow, listen, start
|
||||
|
||||
class UsageMetricsFlow(Flow):
|
||||
@start()
|
||||
def run_first_crew(self):
|
||||
self.state.first_result = FirstCrew().crew().kickoff()
|
||||
|
||||
@listen(run_first_crew)
|
||||
def call_llm_directly(self):
|
||||
# Chamada direta de LLM — também contabilizada por flow.usage_metrics
|
||||
llm = LLM(model="openai/gpt-4o-mini")
|
||||
self.state.summary = llm.call("Resuma os principais pontos.")
|
||||
|
||||
@listen(call_llm_directly)
|
||||
def run_second_crew(self):
|
||||
self.state.second_result = SecondCrew().crew().kickoff()
|
||||
|
||||
flow = UsageMetricsFlow()
|
||||
flow.kickoff()
|
||||
|
||||
print(flow.usage_metrics)
|
||||
# UsageMetrics(total_tokens=8579, prompt_tokens=6210, completion_tokens=2369,
|
||||
# cached_prompt_tokens=0, reasoning_tokens=0,
|
||||
# cache_creation_tokens=0, successful_requests=5)
|
||||
```
|
||||
|
||||
<Note>
|
||||
`flow.usage_metrics` **não** é o mesmo que `flow.kickoff().token_usage`. Este
|
||||
último retorna apenas o `CrewOutput.token_usage` do **último** método
|
||||
`@listen` que retornou um `CrewOutput`, ou seja, reflete somente a Crew
|
||||
final e ignora completamente as Crews anteriores e quaisquer chamadas
|
||||
diretas de `LLM.call(...)`. Use `flow.usage_metrics` sempre que precisar do
|
||||
rollup **completo** de tokens da execução do Flow.
|
||||
</Note>
|
||||
|
||||
Cada campo do [`UsageMetrics`](https://github.com/crewAIInc/crewAI/blob/main/lib/crewai/src/crewai/types/usage_metrics.py) retornado representa a soma de todas as chamadas de LLM feitas em uma única invocação de `flow.kickoff()`. Os contadores são resetados a cada novo `kickoff()` (e em cada iteração de `kickoff_for_each`), de modo que execuções sucessivas não duplicam o total. A propriedade é segura para ser lida em qualquer momento após o `kickoff()`; lê-la durante a execução retorna o total parcial acumulado até aquele instante.
|
||||
|
||||
## Gerenciamento de Estado em Flows
|
||||
|
||||
Gerenciar o estado de forma eficaz é fundamental para construir fluxos de trabalho de IA confiáveis e de fácil manutenção. O CrewAI Flows oferece mecanismos robustos para o gerenciamento de estado tanto não estruturado quanto estruturado,
|
||||
|
||||
@@ -84,6 +84,7 @@ from crewai.events.types.flow_events import (
|
||||
MethodExecutionPausedEvent,
|
||||
MethodExecutionStartedEvent,
|
||||
)
|
||||
from crewai.events.types.llm_events import LLMCallCompletedEvent
|
||||
from crewai.flow.dsl._utils import build_flow_definition
|
||||
from crewai.flow.flow_context import (
|
||||
current_flow_defer_trace_finalization,
|
||||
@@ -129,6 +130,7 @@ if TYPE_CHECKING:
|
||||
|
||||
from crewai.flow.visualization import build_flow_structure, render_interactive
|
||||
from crewai.types.streaming import CrewStreamingOutput, FlowStreamingOutput
|
||||
from crewai.types.usage_metrics import UsageMetrics
|
||||
from crewai.utilities.env import get_env_context
|
||||
from crewai.utilities.streaming import (
|
||||
TaskInfo,
|
||||
@@ -153,6 +155,32 @@ ExecutionContext = Any # type: ignore[assignment,misc]
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _usage_dict_to_metrics(usage: dict[str, Any] | None) -> UsageMetrics | None:
|
||||
if not usage:
|
||||
return None
|
||||
|
||||
def _int(key: str) -> int:
|
||||
value = usage.get(key)
|
||||
try:
|
||||
return int(value) if value is not None else 0
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
prompt_tokens = _int("prompt_tokens")
|
||||
completion_tokens = _int("completion_tokens")
|
||||
total_tokens = _int("total_tokens") or (prompt_tokens + completion_tokens)
|
||||
|
||||
return UsageMetrics(
|
||||
total_tokens=total_tokens,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cached_prompt_tokens=_int("cached_prompt_tokens"),
|
||||
reasoning_tokens=_int("reasoning_tokens"),
|
||||
cache_creation_tokens=_int("cache_creation_tokens"),
|
||||
successful_requests=1,
|
||||
)
|
||||
|
||||
|
||||
def _condition_branches(
|
||||
condition: dict[str, Any],
|
||||
) -> tuple[Literal["and", "or"], list[FlowDefinitionCondition]]:
|
||||
@@ -905,6 +933,20 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta):
|
||||
_input_history: list[InputHistoryEntry] = PrivateAttr(default_factory=list)
|
||||
_state: Any = PrivateAttr(default=None)
|
||||
_deferred_flow_started_event_id: str | None = PrivateAttr(default=None)
|
||||
_aggregated_usage_metrics: UsageMetrics = PrivateAttr(default_factory=UsageMetrics)
|
||||
# Serializes mutations and snapshot reads on `_aggregated_usage_metrics`.
|
||||
# The bus dispatches sync handlers from a `ThreadPoolExecutor`, so two
|
||||
# concurrent `LLMCallCompletedEvent`s can race the read-modify-write
|
||||
# inside `add_usage_metrics`.
|
||||
_usage_metrics_lock: threading.Lock = PrivateAttr(default_factory=threading.Lock)
|
||||
_flow_match_id: str | None = PrivateAttr(default=None)
|
||||
_usage_aggregation_handler: Callable[..., Any] | None = PrivateAttr(default=None)
|
||||
# Incremented on every kickoff that takes ownership of usage aggregation.
|
||||
# The listener closure snapshots the epoch at attach time; a stale
|
||||
# handler still queued in the bus thread pool from a prior kickoff
|
||||
# compares its snapshot against the current value and bails out so it
|
||||
# cannot contaminate a later kickoff's accumulator.
|
||||
_usage_epoch: int = PrivateAttr(default=0)
|
||||
|
||||
def __class_getitem__(cls: type[Flow[T]], item: type[T]) -> type[Flow[T]]: # type: ignore[override]
|
||||
class _FlowGeneric(cls): # type: ignore[valid-type,misc]
|
||||
@@ -967,6 +1009,47 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta):
|
||||
method = method.__get__(self, self.__class__)
|
||||
self._methods[FlowMethodName(method_name)] = method
|
||||
|
||||
def _attach_usage_aggregation_listener(self) -> None:
|
||||
"""Wire an ``LLMCallCompletedEvent`` listener for the duration of one
|
||||
``kickoff_async`` call.
|
||||
"""
|
||||
if self._usage_aggregation_handler is not None:
|
||||
return
|
||||
|
||||
flow_ref = self
|
||||
captured_epoch = self._usage_epoch
|
||||
|
||||
def _accumulate(source: Any, event: LLMCallCompletedEvent) -> None:
|
||||
# Stale-handler guard: the bus dispatches sync handlers on a
|
||||
# thread pool that `emit` does not wait on, so a handler from
|
||||
# a prior kickoff can still be queued when a later kickoff
|
||||
# bumps the epoch and resets the accumulator. Bail out so we
|
||||
# don't leak prior-run usage into the new accumulator.
|
||||
if captured_epoch != flow_ref._usage_epoch:
|
||||
return
|
||||
if current_flow_id.get() != flow_ref._flow_match_id:
|
||||
return
|
||||
metrics = _usage_dict_to_metrics(event.usage)
|
||||
if metrics is None:
|
||||
return
|
||||
with flow_ref._usage_metrics_lock:
|
||||
flow_ref._aggregated_usage_metrics.add_usage_metrics(metrics)
|
||||
|
||||
crewai_event_bus.on(LLMCallCompletedEvent)(_accumulate)
|
||||
self._usage_aggregation_handler = _accumulate
|
||||
|
||||
def _detach_usage_aggregation_listener(self) -> None:
|
||||
handler = self._usage_aggregation_handler
|
||||
if handler is None:
|
||||
return
|
||||
crewai_event_bus.off(LLMCallCompletedEvent, handler)
|
||||
self._usage_aggregation_handler = None
|
||||
|
||||
@property
|
||||
def usage_metrics(self) -> UsageMetrics:
|
||||
with self._usage_metrics_lock:
|
||||
return self._aggregated_usage_metrics.model_copy()
|
||||
|
||||
def recall(self, query: str, **kwargs: Any) -> Any:
|
||||
"""Recall relevant memories. Delegates to this flow's memory.
|
||||
|
||||
@@ -1261,6 +1344,14 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta):
|
||||
instance._initialize_state(state_data)
|
||||
instance._pending_feedback_context = pending_context
|
||||
instance._is_execution_resuming = True
|
||||
# Seed the usage-aggregation match id so `resume_async` can wire its
|
||||
# listener and restore `current_flow_id` correctly. Without this,
|
||||
# a restored flow has a None match id and the handler would either
|
||||
# ignore its own LLM calls or absorb unrelated ones from sibling
|
||||
# flows. The accumulator itself starts at zero — any usage from
|
||||
# before the pause was only observable on the original kickoff
|
||||
# instance.
|
||||
instance._flow_match_id = instance.flow_id
|
||||
|
||||
return instance
|
||||
|
||||
@@ -1359,201 +1450,227 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta):
|
||||
"No pending feedback context. Use from_pending() to restore a paused flow."
|
||||
)
|
||||
|
||||
if get_current_parent_id() is None:
|
||||
reset_emission_counter()
|
||||
reset_last_event_id()
|
||||
|
||||
if not self.suppress_flow_events:
|
||||
future = crewai_event_bus.emit(
|
||||
self,
|
||||
FlowStartedEvent(
|
||||
type="flow_started",
|
||||
flow_name=self.name or self.__class__.__name__,
|
||||
inputs=None,
|
||||
),
|
||||
)
|
||||
if future and isinstance(future, Future):
|
||||
try:
|
||||
await asyncio.wrap_future(future)
|
||||
except Exception:
|
||||
logger.warning("FlowStartedEvent handler failed", exc_info=True)
|
||||
|
||||
get_env_context()
|
||||
|
||||
context = self._pending_feedback_context
|
||||
emit = context.emit
|
||||
default_outcome = context.default_outcome
|
||||
|
||||
# Try to get the live LLM from the re-imported decorator first.
|
||||
# This preserves the fully-configured object (credentials, safety_settings, etc.)
|
||||
# for same-process resume. For cross-process resume, fall back to the
|
||||
# serialized context.llm which is now a dict with full config (or a legacy string).
|
||||
from crewai.flow.human_feedback import _deserialize_llm_from_context
|
||||
|
||||
llm = None
|
||||
method = self._methods.get(FlowMethodName(context.method_name))
|
||||
if method is not None:
|
||||
live_llm = getattr(method, "_human_feedback_llm", None)
|
||||
if live_llm is not None:
|
||||
from crewai.llms.base_llm import BaseLLM as BaseLLMClass
|
||||
|
||||
if isinstance(live_llm, BaseLLMClass):
|
||||
llm = live_llm
|
||||
|
||||
if llm is None:
|
||||
llm = _deserialize_llm_from_context(context.llm)
|
||||
|
||||
collapsed_outcome: str | None = None
|
||||
|
||||
if not feedback.strip():
|
||||
if default_outcome:
|
||||
collapsed_outcome = default_outcome
|
||||
elif emit:
|
||||
collapsed_outcome = emit[0]
|
||||
elif emit:
|
||||
if llm is not None:
|
||||
collapsed_outcome = self._collapse_to_outcome(
|
||||
feedback=feedback,
|
||||
outcomes=emit,
|
||||
llm=llm,
|
||||
)
|
||||
else:
|
||||
collapsed_outcome = emit[0]
|
||||
|
||||
result = HumanFeedbackResult(
|
||||
output=context.method_output,
|
||||
feedback=feedback,
|
||||
outcome=collapsed_outcome,
|
||||
timestamp=datetime.now(),
|
||||
method_name=context.method_name,
|
||||
metadata=context.metadata,
|
||||
)
|
||||
|
||||
self.human_feedback_history.append(result)
|
||||
self.last_human_feedback = result
|
||||
|
||||
self._completed_methods.add(FlowMethodName(context.method_name))
|
||||
|
||||
self._pending_feedback_context = None
|
||||
|
||||
if self.persistence is not None:
|
||||
self.persistence.clear_pending_feedback(context.flow_id)
|
||||
|
||||
if not self.suppress_flow_events:
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
MethodExecutionFinishedEvent(
|
||||
type="method_execution_finished",
|
||||
flow_name=self.name or self.__class__.__name__,
|
||||
method_name=context.method_name,
|
||||
result=collapsed_outcome if emit else result,
|
||||
state=self._state,
|
||||
),
|
||||
)
|
||||
|
||||
# Clear resumption flag before triggering listeners
|
||||
# This allows methods to re-execute in loops (e.g., implement_changes → suggest_changes → implement_changes)
|
||||
self._is_execution_resuming = False
|
||||
|
||||
if emit and collapsed_outcome is None:
|
||||
collapsed_outcome = default_outcome or emit[0]
|
||||
result.outcome = collapsed_outcome
|
||||
# Wire usage aggregation for the resume phase. Two cases:
|
||||
# 1. We inherited an attached listener from a `kickoff_async`
|
||||
# that paused — keep counting into the same accumulator.
|
||||
# 2. The instance came from `from_pending` (fresh) — attach
|
||||
# a new listener.
|
||||
# In both cases we restore `current_flow_id` so the handler's
|
||||
# filter passes for LLM calls made during outcome collapsing and
|
||||
# downstream listener execution.
|
||||
flow_id_token = None
|
||||
if current_flow_id.get() is None and self._flow_match_id is not None:
|
||||
flow_id_token = current_flow_id.set(self._flow_match_id)
|
||||
self._attach_usage_aggregation_listener()
|
||||
|
||||
try:
|
||||
if emit and collapsed_outcome:
|
||||
self._method_outputs.append(collapsed_outcome)
|
||||
await self._execute_listeners(
|
||||
FlowMethodName(collapsed_outcome),
|
||||
result,
|
||||
)
|
||||
else:
|
||||
await self._execute_listeners(
|
||||
FlowMethodName(context.method_name),
|
||||
result,
|
||||
)
|
||||
except Exception as e:
|
||||
# Check if flow was paused again for human feedback (loop case)
|
||||
from crewai.flow.async_feedback.types import HumanFeedbackPending
|
||||
if get_current_parent_id() is None:
|
||||
reset_emission_counter()
|
||||
reset_last_event_id()
|
||||
|
||||
if isinstance(e, HumanFeedbackPending):
|
||||
self._pending_feedback_context = e.context
|
||||
|
||||
if self.persistence is None:
|
||||
from crewai.flow.persistence.factory import default_flow_persistence
|
||||
|
||||
self.persistence = default_flow_persistence()
|
||||
|
||||
state_data = (
|
||||
self._state
|
||||
if isinstance(self._state, dict)
|
||||
else self._state.model_dump()
|
||||
)
|
||||
self.persistence.save_pending_feedback(
|
||||
flow_uuid=e.context.flow_id,
|
||||
context=e.context,
|
||||
state_data=state_data,
|
||||
)
|
||||
|
||||
crewai_event_bus.emit(
|
||||
if not self.suppress_flow_events:
|
||||
future = crewai_event_bus.emit(
|
||||
self,
|
||||
FlowPausedEvent(
|
||||
type="flow_paused",
|
||||
FlowStartedEvent(
|
||||
type="flow_started",
|
||||
flow_name=self.name or self.__class__.__name__,
|
||||
flow_id=e.context.flow_id,
|
||||
method_name=e.context.method_name,
|
||||
state=self._copy_and_serialize_state(),
|
||||
message=e.context.message,
|
||||
emit=e.context.emit,
|
||||
inputs=None,
|
||||
),
|
||||
)
|
||||
return e
|
||||
raise
|
||||
if future and isinstance(future, Future):
|
||||
try:
|
||||
await asyncio.wrap_future(future)
|
||||
except Exception:
|
||||
logger.warning("FlowStartedEvent handler failed", exc_info=True)
|
||||
|
||||
final_result = self._method_outputs[-1] if self._method_outputs else result
|
||||
get_env_context()
|
||||
|
||||
if self._event_futures:
|
||||
await asyncio.gather(
|
||||
*[
|
||||
asyncio.wrap_future(f)
|
||||
for f in self._event_futures
|
||||
if isinstance(f, Future)
|
||||
]
|
||||
)
|
||||
self._event_futures.clear()
|
||||
context = self._pending_feedback_context
|
||||
emit = context.emit
|
||||
default_outcome = context.default_outcome
|
||||
|
||||
if (
|
||||
not self.suppress_flow_events
|
||||
and not self._should_defer_trace_finalization()
|
||||
):
|
||||
future = crewai_event_bus.emit(
|
||||
self,
|
||||
FlowFinishedEvent(
|
||||
type="flow_finished",
|
||||
flow_name=self.name or self.__class__.__name__,
|
||||
result=final_result,
|
||||
state=self._copy_and_serialize_state(),
|
||||
),
|
||||
)
|
||||
if future and isinstance(future, Future):
|
||||
try:
|
||||
await asyncio.wrap_future(future)
|
||||
except Exception:
|
||||
logger.warning("FlowFinishedEvent handler failed", exc_info=True)
|
||||
# Try to get the live LLM from the re-imported decorator first.
|
||||
# This preserves the fully-configured object (credentials, safety_settings, etc.)
|
||||
# for same-process resume. For cross-process resume, fall back to the
|
||||
# serialized context.llm which is now a dict with full config (or a legacy string).
|
||||
from crewai.flow.human_feedback import _deserialize_llm_from_context
|
||||
|
||||
trace_listener = TraceCollectionListener()
|
||||
if (
|
||||
trace_listener.batch_manager.batch_owner_type == "flow"
|
||||
and current_flow_id.get() == self.flow_id
|
||||
and not trace_listener.batch_manager.defer_session_finalization
|
||||
and not current_flow_defer_trace_finalization.get()
|
||||
):
|
||||
if trace_listener.first_time_handler.is_first_time:
|
||||
trace_listener.first_time_handler.mark_events_collected()
|
||||
trace_listener.first_time_handler.handle_execution_completion()
|
||||
llm = None
|
||||
method = self._methods.get(FlowMethodName(context.method_name))
|
||||
if method is not None:
|
||||
live_llm = getattr(method, "_human_feedback_llm", None)
|
||||
if live_llm is not None:
|
||||
from crewai.llms.base_llm import BaseLLM as BaseLLMClass
|
||||
|
||||
if isinstance(live_llm, BaseLLMClass):
|
||||
llm = live_llm
|
||||
|
||||
if llm is None:
|
||||
llm = _deserialize_llm_from_context(context.llm)
|
||||
|
||||
collapsed_outcome: str | None = None
|
||||
|
||||
if not feedback.strip():
|
||||
if default_outcome:
|
||||
collapsed_outcome = default_outcome
|
||||
elif emit:
|
||||
collapsed_outcome = emit[0]
|
||||
elif emit:
|
||||
if llm is not None:
|
||||
collapsed_outcome = self._collapse_to_outcome(
|
||||
feedback=feedback,
|
||||
outcomes=emit,
|
||||
llm=llm,
|
||||
)
|
||||
else:
|
||||
trace_listener.batch_manager.finalize_batch()
|
||||
collapsed_outcome = emit[0]
|
||||
|
||||
return final_result
|
||||
result = HumanFeedbackResult(
|
||||
output=context.method_output,
|
||||
feedback=feedback,
|
||||
outcome=collapsed_outcome,
|
||||
timestamp=datetime.now(),
|
||||
method_name=context.method_name,
|
||||
metadata=context.metadata,
|
||||
)
|
||||
|
||||
self.human_feedback_history.append(result)
|
||||
self.last_human_feedback = result
|
||||
|
||||
self._completed_methods.add(FlowMethodName(context.method_name))
|
||||
|
||||
self._pending_feedback_context = None
|
||||
|
||||
if self.persistence is not None:
|
||||
self.persistence.clear_pending_feedback(context.flow_id)
|
||||
|
||||
if not self.suppress_flow_events:
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
MethodExecutionFinishedEvent(
|
||||
type="method_execution_finished",
|
||||
flow_name=self.name or self.__class__.__name__,
|
||||
method_name=context.method_name,
|
||||
result=collapsed_outcome if emit else result,
|
||||
state=self._state,
|
||||
),
|
||||
)
|
||||
|
||||
# Clear resumption flag before triggering listeners
|
||||
# This allows methods to re-execute in loops (e.g., implement_changes → suggest_changes → implement_changes)
|
||||
self._is_execution_resuming = False
|
||||
|
||||
if emit and collapsed_outcome is None:
|
||||
collapsed_outcome = default_outcome or emit[0]
|
||||
result.outcome = collapsed_outcome
|
||||
|
||||
try:
|
||||
if emit and collapsed_outcome:
|
||||
self._method_outputs.append(collapsed_outcome)
|
||||
await self._execute_listeners(
|
||||
FlowMethodName(collapsed_outcome),
|
||||
result,
|
||||
)
|
||||
else:
|
||||
await self._execute_listeners(
|
||||
FlowMethodName(context.method_name),
|
||||
result,
|
||||
)
|
||||
except Exception as e:
|
||||
# Check if flow was paused again for human feedback (loop case)
|
||||
from crewai.flow.async_feedback.types import HumanFeedbackPending
|
||||
|
||||
if isinstance(e, HumanFeedbackPending):
|
||||
self._pending_feedback_context = e.context
|
||||
|
||||
if self.persistence is None:
|
||||
from crewai.flow.persistence.factory import (
|
||||
default_flow_persistence,
|
||||
)
|
||||
|
||||
self.persistence = default_flow_persistence()
|
||||
|
||||
state_data = (
|
||||
self._state
|
||||
if isinstance(self._state, dict)
|
||||
else self._state.model_dump()
|
||||
)
|
||||
self.persistence.save_pending_feedback(
|
||||
flow_uuid=e.context.flow_id,
|
||||
context=e.context,
|
||||
state_data=state_data,
|
||||
)
|
||||
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
FlowPausedEvent(
|
||||
type="flow_paused",
|
||||
flow_name=self.name or self.__class__.__name__,
|
||||
flow_id=e.context.flow_id,
|
||||
method_name=e.context.method_name,
|
||||
state=self._copy_and_serialize_state(),
|
||||
message=e.context.message,
|
||||
emit=e.context.emit,
|
||||
),
|
||||
)
|
||||
return e
|
||||
raise
|
||||
|
||||
final_result = self._method_outputs[-1] if self._method_outputs else result
|
||||
|
||||
if self._event_futures:
|
||||
await asyncio.gather(
|
||||
*[
|
||||
asyncio.wrap_future(f)
|
||||
for f in self._event_futures
|
||||
if isinstance(f, Future)
|
||||
]
|
||||
)
|
||||
self._event_futures.clear()
|
||||
|
||||
if (
|
||||
not self.suppress_flow_events
|
||||
and not self._should_defer_trace_finalization()
|
||||
):
|
||||
future = crewai_event_bus.emit(
|
||||
self,
|
||||
FlowFinishedEvent(
|
||||
type="flow_finished",
|
||||
flow_name=self.name or self.__class__.__name__,
|
||||
result=final_result,
|
||||
state=self._copy_and_serialize_state(),
|
||||
),
|
||||
)
|
||||
if future and isinstance(future, Future):
|
||||
try:
|
||||
await asyncio.wrap_future(future)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"FlowFinishedEvent handler failed", exc_info=True
|
||||
)
|
||||
|
||||
trace_listener = TraceCollectionListener()
|
||||
if (
|
||||
trace_listener.batch_manager.batch_owner_type == "flow"
|
||||
and current_flow_id.get() == self.flow_id
|
||||
and not trace_listener.batch_manager.defer_session_finalization
|
||||
and not current_flow_defer_trace_finalization.get()
|
||||
):
|
||||
if trace_listener.first_time_handler.is_first_time:
|
||||
trace_listener.first_time_handler.mark_events_collected()
|
||||
trace_listener.first_time_handler.handle_execution_completion()
|
||||
else:
|
||||
trace_listener.batch_manager.finalize_batch()
|
||||
|
||||
return final_result
|
||||
finally:
|
||||
# If we re-paused for human feedback, leave the listener
|
||||
# attached so the next `resume_async` can take over.
|
||||
# Otherwise (completion or unexpected error), release it.
|
||||
if self._pending_feedback_context is None:
|
||||
self._detach_usage_aggregation_listener()
|
||||
if flow_id_token is not None:
|
||||
current_flow_id.reset(flow_id_token)
|
||||
|
||||
def _create_initial_state(self) -> T:
|
||||
"""Create and initialize flow state with UUID and default values.
|
||||
@@ -2056,6 +2173,31 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta):
|
||||
request_id_token = current_flow_request_id.set(self.flow_id)
|
||||
|
||||
runtime_scope = crewai_event_bus._enter_runtime_scope()
|
||||
|
||||
# Guard against a reentrant kickoff on the same Flow instance: only
|
||||
# the outermost call captures `_flow_match_id`, resets the accumulator,
|
||||
# and owns the listener lifecycle. An inner reentrant call passes
|
||||
# through so it doesn't wipe outer's state or detach the shared handler.
|
||||
owns_usage_aggregation = self._usage_aggregation_handler is None
|
||||
if owns_usage_aggregation:
|
||||
# Capture the flow id seen by `FlowTrackable._set_flow_context` so
|
||||
# we can match LLM call events back to this flow even if `state.id`
|
||||
# gets overwritten later by `inputs["id"]`.
|
||||
self._flow_match_id = current_flow_id.get()
|
||||
self._aggregated_usage_metrics = UsageMetrics()
|
||||
# Bump the epoch BEFORE attaching so any in-flight handler from
|
||||
# a prior kickoff queued in the bus thread pool sees its stale
|
||||
# snapshot and bails out instead of writing into the fresh
|
||||
# accumulator.
|
||||
self._usage_epoch += 1
|
||||
self._attach_usage_aggregation_listener()
|
||||
|
||||
# Flips in the `HumanFeedbackPending` branch so `finally` keeps the
|
||||
# listener attached. Late LLM events during the pause window and
|
||||
# the subsequent `resume_async` call continue to accumulate into
|
||||
# this run's `flow.usage_metrics`.
|
||||
paused_for_feedback = False
|
||||
|
||||
try:
|
||||
# Reset flow state for fresh execution unless restoring from persistence
|
||||
is_restoring = (
|
||||
@@ -2240,6 +2382,7 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta):
|
||||
from crewai.flow.async_feedback.types import HumanFeedbackPending
|
||||
|
||||
if isinstance(e, HumanFeedbackPending):
|
||||
paused_for_feedback = True
|
||||
# Auto-save pending feedback (create default persistence if needed)
|
||||
if self.persistence is None:
|
||||
from crewai.flow.persistence.factory import (
|
||||
@@ -2345,6 +2488,12 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta):
|
||||
# Ensure all background memory saves complete before returning
|
||||
if self.memory is not None and hasattr(self.memory, "drain_writes"):
|
||||
self.memory.drain_writes()
|
||||
# On pause keep the listener attached so events during the
|
||||
# pause-to-resume window still count and `resume_async` can
|
||||
# take over the same accumulator. Otherwise (completion or
|
||||
# unexpected error) release it.
|
||||
if owns_usage_aggregation and not paused_for_feedback:
|
||||
self._detach_usage_aggregation_listener()
|
||||
if request_id_token is not None:
|
||||
current_flow_request_id.reset(request_id_token)
|
||||
if flow_defer_trace_finalization_token is not None:
|
||||
|
||||
395
lib/crewai/tests/test_flow_usage_metrics.py
Normal file
395
lib/crewai/tests/test_flow_usage_metrics.py
Normal file
@@ -0,0 +1,395 @@
|
||||
"""Tests for flow-level token usage aggregation
|
||||
|
||||
``flow.usage_metrics`` listens to ``LLMCallCompletedEvent`` for the duration
|
||||
of ``kickoff_async`` so it covers every LLM call inside the flow — crew-led,
|
||||
tool-led, AND bare ``LLM.call(...)`` from a flow method. We exercise the
|
||||
aggregator end-to-end through the real event bus with fabricated events and
|
||||
explicit contextvar control; no live LLM provider is required.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextvars
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any, Callable
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.events.event_bus import crewai_event_bus
|
||||
from crewai.events.types.llm_events import LLMCallCompletedEvent, LLMCallType
|
||||
from crewai.flow.async_feedback.types import PendingFeedbackContext
|
||||
from crewai.flow.flow import Flow, listen, start
|
||||
from crewai.flow.flow_context import current_flow_id
|
||||
from crewai.flow.persistence.sqlite import SQLiteFlowPersistence
|
||||
from crewai.flow.runtime import _usage_dict_to_metrics
|
||||
from crewai.types.usage_metrics import UsageMetrics
|
||||
|
||||
|
||||
def _emit_llm_call(
|
||||
*,
|
||||
flow_id: str | None,
|
||||
prompt_tokens: int = 0,
|
||||
completion_tokens: int = 0,
|
||||
cached_prompt_tokens: int = 0,
|
||||
reasoning_tokens: int = 0,
|
||||
cache_creation_tokens: int = 0,
|
||||
) -> None:
|
||||
"""Emit one fake ``LLMCallCompletedEvent`` with ``current_flow_id`` pinned
|
||||
to ``flow_id``.
|
||||
|
||||
Runs in a freshly-copied context so the value the bus snapshots at emit
|
||||
time is exactly ``flow_id`` — independent of the calling thread's outer
|
||||
context. Mirrors how the real ``LLM.call`` emits events at runtime.
|
||||
"""
|
||||
usage: dict[str, Any] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
for key, value in (
|
||||
("cached_prompt_tokens", cached_prompt_tokens),
|
||||
("reasoning_tokens", reasoning_tokens),
|
||||
("cache_creation_tokens", cache_creation_tokens),
|
||||
):
|
||||
if value:
|
||||
usage[key] = value
|
||||
event = LLMCallCompletedEvent(
|
||||
call_id=str(uuid4()),
|
||||
model="gpt-4o-mini",
|
||||
response="ok",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
ctx = contextvars.copy_context()
|
||||
|
||||
def _emit() -> None:
|
||||
current_flow_id.set(flow_id)
|
||||
future = crewai_event_bus.emit(object(), event)
|
||||
if future is not None:
|
||||
future.result(timeout=5.0)
|
||||
|
||||
ctx.run(_emit)
|
||||
|
||||
|
||||
class _ScriptedFlow(Flow):
|
||||
"""A Flow whose ``@start`` delegates to a per-instance ``_script`` closure.
|
||||
|
||||
Each test attaches a script with ``flow._script = lambda f: ...`` so we
|
||||
don't redefine a Flow subclass for every scenario.
|
||||
"""
|
||||
|
||||
@start()
|
||||
def run(self) -> None:
|
||||
script: Callable[[Flow], None] = getattr(self, "_script", lambda _f: None)
|
||||
script(self)
|
||||
|
||||
|
||||
def _run(script: Callable[[Flow], None] = lambda _f: None) -> Flow:
|
||||
"""Build a ``_ScriptedFlow``, attach ``script``, kickoff. Returns the flow."""
|
||||
flow = _ScriptedFlow()
|
||||
flow._script = script
|
||||
flow.kickoff()
|
||||
return flow
|
||||
|
||||
|
||||
class TestUsageDictToMetrics:
|
||||
"""Unit tests for the dict-to-UsageMetrics normalizer."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"usage, expected",
|
||||
[
|
||||
(None, None),
|
||||
({}, None),
|
||||
(
|
||||
{"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
|
||||
UsageMetrics(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=20,
|
||||
total_tokens=30,
|
||||
successful_requests=1,
|
||||
),
|
||||
),
|
||||
# total_tokens missing → derived from prompt + completion
|
||||
(
|
||||
{"prompt_tokens": 4, "completion_tokens": 6},
|
||||
UsageMetrics(
|
||||
prompt_tokens=4,
|
||||
completion_tokens=6,
|
||||
total_tokens=10,
|
||||
successful_requests=1,
|
||||
),
|
||||
),
|
||||
# Extended provider-specific keys flow through normalization
|
||||
(
|
||||
{
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 80,
|
||||
"total_tokens": 180,
|
||||
"cached_prompt_tokens": 40,
|
||||
"reasoning_tokens": 25,
|
||||
"cache_creation_tokens": 10,
|
||||
},
|
||||
UsageMetrics(
|
||||
prompt_tokens=100,
|
||||
completion_tokens=80,
|
||||
total_tokens=180,
|
||||
cached_prompt_tokens=40,
|
||||
reasoning_tokens=25,
|
||||
cache_creation_tokens=10,
|
||||
successful_requests=1,
|
||||
),
|
||||
),
|
||||
# Garbage / non-int values coerce to 0 instead of crashing
|
||||
(
|
||||
{"prompt_tokens": "n/a", "completion_tokens": None, "total_tokens": 7},
|
||||
UsageMetrics(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=7,
|
||||
successful_requests=1,
|
||||
),
|
||||
),
|
||||
],
|
||||
ids=["none", "empty", "all_keys", "no_total", "extended_keys", "garbage"],
|
||||
)
|
||||
def test_normalization(
|
||||
self, usage: dict[str, Any] | None, expected: UsageMetrics | None
|
||||
) -> None:
|
||||
assert _usage_dict_to_metrics(usage) == expected
|
||||
|
||||
|
||||
class TestFlowUsageAggregation:
|
||||
"""End-to-end tests driving the listener through the real event bus."""
|
||||
|
||||
def test_sums_every_llm_call_in_the_flow(self) -> None:
|
||||
"""Multiple LLM calls — including bare ``LLM.call(...)`` made outside
|
||||
any crew — accumulate; ``successful_requests`` tracks the call count."""
|
||||
|
||||
def script(flow: Flow) -> None:
|
||||
_emit_llm_call(flow_id=flow._flow_match_id, prompt_tokens=300, completion_tokens=300)
|
||||
_emit_llm_call(flow_id=flow._flow_match_id, prompt_tokens=200, completion_tokens=100)
|
||||
_emit_llm_call(flow_id=flow._flow_match_id, prompt_tokens=20, completion_tokens=20)
|
||||
|
||||
flow = _run(script)
|
||||
|
||||
assert flow.usage_metrics.total_tokens == 940
|
||||
assert flow.usage_metrics.prompt_tokens == 520
|
||||
assert flow.usage_metrics.completion_tokens == 420
|
||||
assert flow.usage_metrics.successful_requests == 3
|
||||
|
||||
def test_returns_zero_when_no_calls_happen(self) -> None:
|
||||
flow = _run()
|
||||
assert flow.usage_metrics == UsageMetrics()
|
||||
|
||||
def test_ignores_events_from_other_flows(self) -> None:
|
||||
"""Concurrent flow runs share the singleton bus, so the listener must
|
||||
scope itself to its own flow via the contextvar match."""
|
||||
|
||||
def script(flow: Flow) -> None:
|
||||
_emit_llm_call(flow_id=flow._flow_match_id, prompt_tokens=50, completion_tokens=50)
|
||||
_emit_llm_call(flow_id="some-other-flow", prompt_tokens=49_000, completion_tokens=50_999)
|
||||
|
||||
flow = _run(script)
|
||||
|
||||
assert flow.usage_metrics.total_tokens == 100
|
||||
assert flow.usage_metrics.successful_requests == 1
|
||||
|
||||
def test_resets_between_kickoffs(self) -> None:
|
||||
flow = _ScriptedFlow()
|
||||
flow._script = lambda f: _emit_llm_call(
|
||||
flow_id=f._flow_match_id, prompt_tokens=250, completion_tokens=250
|
||||
)
|
||||
|
||||
flow.kickoff()
|
||||
flow.kickoff()
|
||||
|
||||
assert flow.usage_metrics.total_tokens == 500
|
||||
assert flow.usage_metrics.successful_requests == 1
|
||||
|
||||
def test_usage_metrics_returns_independent_copy(self) -> None:
|
||||
"""``usage_metrics`` must return a copy, not the internal instance —
|
||||
otherwise callers can clobber the in-flight accumulator."""
|
||||
|
||||
flow = _run(
|
||||
lambda f: _emit_llm_call(
|
||||
flow_id=f._flow_match_id, prompt_tokens=50, completion_tokens=50
|
||||
)
|
||||
)
|
||||
|
||||
snapshot = flow.usage_metrics
|
||||
snapshot.total_tokens = 999_999
|
||||
|
||||
assert flow.usage_metrics.total_tokens == 100
|
||||
|
||||
def test_handler_is_unregistered_after_kickoff(self) -> None:
|
||||
"""Long-lived workers (Celery, devkit) must not leak one handler per
|
||||
kickoff on the singleton bus, on either the success or failure path."""
|
||||
|
||||
def handler_count() -> int:
|
||||
return len(
|
||||
crewai_event_bus._sync_handlers.get(LLMCallCompletedEvent, frozenset())
|
||||
)
|
||||
|
||||
before = handler_count()
|
||||
|
||||
flow = _ScriptedFlow()
|
||||
flow._script = lambda f: _emit_llm_call(
|
||||
flow_id=f._flow_match_id, prompt_tokens=5, completion_tokens=5
|
||||
)
|
||||
for _ in range(3):
|
||||
flow.kickoff()
|
||||
|
||||
assert handler_count() == before
|
||||
|
||||
def boom(_f: Flow) -> None:
|
||||
raise RuntimeError("boom")
|
||||
|
||||
failing = _ScriptedFlow()
|
||||
failing._script = boom
|
||||
|
||||
with pytest.raises(RuntimeError, match="boom"):
|
||||
failing.kickoff()
|
||||
|
||||
assert handler_count() == before
|
||||
|
||||
def test_stale_handler_from_prior_kickoff_does_not_contaminate(self) -> None:
|
||||
"""The bus dispatches sync handlers on a thread pool that ``emit``
|
||||
does not wait on. A handler still queued from a prior kickoff
|
||||
must not write into a later kickoff's accumulator — the epoch
|
||||
snapshot in the handler closure bails out on mismatch."""
|
||||
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
def script(flow: Flow) -> None:
|
||||
_emit_llm_call(flow_id=flow._flow_match_id, prompt_tokens=10, completion_tokens=10)
|
||||
captured["handler"] = flow._usage_aggregation_handler
|
||||
captured["match_id"] = flow._flow_match_id
|
||||
|
||||
flow = _run(script)
|
||||
first_total = flow.usage_metrics.total_tokens
|
||||
assert first_total == 20
|
||||
|
||||
# A second kickoff bumps the epoch and resets the accumulator.
|
||||
flow._script = lambda f: None
|
||||
flow.kickoff()
|
||||
assert flow.usage_metrics.total_tokens == 0
|
||||
|
||||
stale_handler = captured["handler"]
|
||||
assert stale_handler is not None
|
||||
|
||||
stale_event = LLMCallCompletedEvent(
|
||||
call_id=str(uuid4()),
|
||||
model="gpt-4o-mini",
|
||||
response="ok",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
usage={"prompt_tokens": 999, "completion_tokens": 999, "total_tokens": 1998},
|
||||
)
|
||||
ctx = contextvars.copy_context()
|
||||
ctx.run(lambda: (current_flow_id.set(captured["match_id"]), stale_handler(object(), stale_event)))
|
||||
|
||||
# Stale handler bailed: second kickoff's accumulator is still zero.
|
||||
assert flow.usage_metrics.total_tokens == 0
|
||||
|
||||
def test_listener_persists_after_pause(self) -> None:
|
||||
"""When ``kickoff_async`` pauses for human feedback, the listener
|
||||
must stay attached so late LLM events (queued in the bus thread
|
||||
pool by pre-pause LLM calls that emit but don't wait on their
|
||||
handler future) still count for this run. Otherwise the pause's
|
||||
``finally`` would detach the listener and silently drop them."""
|
||||
|
||||
from crewai.flow.async_feedback.types import HumanFeedbackPending
|
||||
|
||||
captured: dict[str, Any] = {}
|
||||
|
||||
class _PausingFlow(Flow):
|
||||
@start()
|
||||
def begin(self) -> None:
|
||||
_emit_llm_call(
|
||||
flow_id=self._flow_match_id,
|
||||
prompt_tokens=10,
|
||||
completion_tokens=20,
|
||||
)
|
||||
captured["pre_pause_total"] = self.usage_metrics.total_tokens
|
||||
raise HumanFeedbackPending(
|
||||
context=PendingFeedbackContext(
|
||||
flow_id=self.flow_id,
|
||||
flow_class="_PausingFlow",
|
||||
method_name="begin",
|
||||
method_output="content",
|
||||
message="Review:",
|
||||
)
|
||||
)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
persistence = SQLiteFlowPersistence(os.path.join(tmpdir, "f.db"))
|
||||
flow = _PausingFlow(persistence=persistence)
|
||||
result = flow.kickoff()
|
||||
|
||||
assert isinstance(result, HumanFeedbackPending)
|
||||
assert captured["pre_pause_total"] == 30
|
||||
assert flow._usage_aggregation_handler is not None
|
||||
|
||||
# Simulate a late LLM event arriving after the pause — without
|
||||
# the keep-on-pause fix this would be dropped silently.
|
||||
_emit_llm_call(
|
||||
flow_id=flow._flow_match_id,
|
||||
prompt_tokens=2,
|
||||
completion_tokens=3,
|
||||
)
|
||||
assert flow.usage_metrics.total_tokens == 35
|
||||
|
||||
flow._detach_usage_aggregation_listener()
|
||||
|
||||
def test_aggregates_resume_after_from_pending(self) -> None:
|
||||
"""A flow restored via ``from_pending`` is a fresh instance with no
|
||||
``_flow_match_id``; without seeding it, the listener attached in
|
||||
``resume_async`` either ignores its own LLM calls or absorbs unrelated
|
||||
ones. ``from_pending`` must seed the match id so the resume-phase
|
||||
aggregator counts our own calls and only our own calls."""
|
||||
|
||||
class _ResumeFlow(Flow):
|
||||
@start()
|
||||
def begin(self) -> str:
|
||||
return "content"
|
||||
|
||||
@listen(begin)
|
||||
def on_begin(self, _feedback: Any) -> str:
|
||||
_emit_llm_call(
|
||||
flow_id=self._flow_match_id,
|
||||
prompt_tokens=100,
|
||||
completion_tokens=50,
|
||||
)
|
||||
_emit_llm_call(
|
||||
flow_id="some-other-flow",
|
||||
prompt_tokens=9_999,
|
||||
completion_tokens=9_999,
|
||||
)
|
||||
return "done"
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
persistence = SQLiteFlowPersistence(os.path.join(tmpdir, "f.db"))
|
||||
flow_id = "usage-resume-test"
|
||||
persistence.save_pending_feedback(
|
||||
flow_uuid=flow_id,
|
||||
context=PendingFeedbackContext(
|
||||
flow_id=flow_id,
|
||||
flow_class="_ResumeFlow",
|
||||
method_name="begin",
|
||||
method_output="content",
|
||||
message="Review:",
|
||||
),
|
||||
state_data={"id": flow_id},
|
||||
)
|
||||
|
||||
flow = _ResumeFlow.from_pending(flow_id, persistence)
|
||||
assert flow._flow_match_id == flow.flow_id
|
||||
|
||||
flow.resume("ok")
|
||||
|
||||
assert flow.usage_metrics.total_tokens == 150
|
||||
assert flow.usage_metrics.prompt_tokens == 100
|
||||
assert flow.usage_metrics.completion_tokens == 50
|
||||
assert flow.usage_metrics.successful_requests == 1
|
||||
Reference in New Issue
Block a user