From 126d0010ba7e4dcc8760924030194ceae129f261 Mon Sep 17 00:00:00 2001 From: Joao Moura Date: Thu, 14 May 2026 16:17:44 -0400 Subject: [PATCH] fix: summary tag handling, TUI autocomplete focus, tool output flooding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Summary tags: Reverse the logic — for models like gpt-4.1 that wrap their actual response in tags (with thinking/CoT before it), extract the inner content instead of stripping it. Streaming uses a preflight buffer that waits for ; if none appears, flushes everything normally. 2. TUI autocomplete: Change @mention accept key from Tab to right-arrow so autocomplete doesn't steal focus from the input widget. Only triggers when there's an active mention context with matches. 3. Tool output: Truncate tool results >4000 chars in LLM message history to prevent the model from echoing full file contents. Add soul-layer instruction telling the agent to summarize tool results rather than repeating them verbatim. Co-Authored-By: Claude Opus 4.6 --- lib/cli/src/crewai_cli/agent_tui.py | 16 ++- lib/crewai/src/crewai/new_agent/executor.py | 115 +++++++++++++++----- 2 files changed, 97 insertions(+), 34 deletions(-) diff --git a/lib/cli/src/crewai_cli/agent_tui.py b/lib/cli/src/crewai_cli/agent_tui.py index c1378fe52..fed067e37 100644 --- a/lib/cli/src/crewai_cli/agent_tui.py +++ b/lib/cli/src/crewai_cli/agent_tui.py @@ -71,7 +71,7 @@ except ImportError: class ChatTextArea(TextArea): - """Multiline chat input: Enter submits, Shift+Enter inserts newline, Tab completes @mentions.""" + """Multiline chat input: Enter submits, Shift+Enter inserts newline, Right-arrow completes @mentions.""" BINDINGS = [ Binding("enter", "submit", "Send", show=False), @@ -161,10 +161,14 @@ class ChatTextArea(TextArea): event.prevent_default() self.action_submit() return - if event.key == "tab": - event.prevent_default() - self.action_complete() - return + if event.key == "right": + ctx = self._get_mention_context() + if ctx is not None: + _, _, prefix = ctx + if self._get_matches(prefix): + event.prevent_default() + self.action_complete() + return if event.key == "escape": self._last_mention_prefix = None self.post_message(self.MentionChanged("", [])) @@ -834,7 +838,7 @@ class AgentTUI(App[None]): hint.display = False return names = " ".join(f"@{n}" for n in event.matches[:6]) - hint.update(f"Tab to complete: {names}") + hint.update(f"→ to complete: {names}") hint.display = True # ── Message routing ── diff --git a/lib/crewai/src/crewai/new_agent/executor.py b/lib/crewai/src/crewai/new_agent/executor.py index b2a80490f..ff93bcb39 100644 --- a/lib/crewai/src/crewai/new_agent/executor.py +++ b/lib/crewai/src/crewai/new_agent/executor.py @@ -127,7 +127,10 @@ class ConversationalAgentExecutor(BaseModel): soul = ( f"You are {agent.role}.\n" f"Your goal: {agent.goal}\n" - f"Background: {agent.backstory}" + f"Background: {agent.backstory}\n\n" + "When you use tools, act on the results directly. " + "Never repeat raw tool output (file contents, command output, etc.) " + "in your response — summarize findings or state what you did instead." ) stack.add("soul", soul, source="agent.role/goal/backstory") @@ -629,14 +632,22 @@ class ConversationalAgentExecutor(BaseModel): pass return "" - _INTERNAL_TAG_RE = re.compile( - r".*?", re.DOTALL + _SUMMARY_EXTRACT_RE = re.compile( + r"\s*(.*?)\s*", re.DOTALL ) def _strip_internal_tags(self, text: str) -> str: - """Strip blocks that leak from the summarization prompt.""" - cleaned = self._INTERNAL_TAG_RE.sub("", text).strip() - return cleaned if cleaned else text + """Handle tags leaked from the summarization prompt. + + Some models (e.g. gpt-4.1) wrap their actual response in + tags, treating the preceding text as chain-of-thought. When summary + tags are present we extract the inner content as the real answer. + """ + match = self._SUMMARY_EXTRACT_RE.search(text) + if match: + inner = match.group(1).strip() + return inner if inner else text + return text def _detect_artifacts(self, tool_name: str, result_str: str) -> list[Artifact]: """GAP-67: Detect artifacts from tool results. @@ -2064,11 +2075,19 @@ class ConversationalAgentExecutor(BaseModel): ], } ) + _MAX_TOOL_RESULT_CHARS = 4000 + display_result = result_str + if len(result_str) > _MAX_TOOL_RESULT_CHARS: + display_result = ( + result_str[:_MAX_TOOL_RESULT_CHARS] + + f"\n\n[Truncated — {len(result_str)} chars total. " + "Use the result directly; do not repeat it in your response.]" + ) llm_messages.append( { "role": "tool", "tool_call_id": call_id or func_name, - "content": result_str, + "content": display_result, } ) @@ -2400,39 +2419,77 @@ class ConversationalAgentExecutor(BaseModel): invoke_task = asyncio.create_task(self.ainvoke(user_message)) _streamed_chars = 0 _last_status_time = time.monotonic() + + # States: "preflight" (buffering, no seen yet), + # "inside" ( found, yielding inner content), + # "done" ( seen, suppress the rest), + # "passthrough" (no summary tags — yield everything) + _state = "preflight" + _preflight_buf: list[str] = [] _tag_buf = "" - _suppressing = False def _filter_chunk(raw: str) -> str: - """Filter ... blocks from streamed chunks.""" - nonlocal _tag_buf, _suppressing - out = [] + nonlocal _state, _tag_buf + out: list[str] = [] for ch in raw: - if _suppressing: - _tag_buf += ch - if _tag_buf.endswith(""): - _suppressing = False - _tag_buf = "" + if _state == "done": + break + + if _state == "passthrough": + out.append(ch) continue + + # Accumulate into tag buffer when we might be mid-tag if _tag_buf: _tag_buf += ch - if len(_tag_buf) <= len(""): - if ""[: len(_tag_buf)] == _tag_buf: - if _tag_buf == "": - _suppressing = True + + if _state == "preflight": + target = "" + if target[: len(_tag_buf)] == _tag_buf: + if _tag_buf == target: + _state = "inside" + _preflight_buf.clear() + _tag_buf = "" continue - else: - out.append(_tag_buf) - _tag_buf = "" - else: + # Not a match — dump tag_buf to preflight buffer + _preflight_buf.append(_tag_buf) + _tag_buf = "" + + elif _state == "inside": + target = "" + if target[: len(_tag_buf)] == _tag_buf: + if _tag_buf == target: + _state = "done" + _tag_buf = "" + continue + # Not a closing tag — flush buffered chars out.append(_tag_buf) _tag_buf = "" - elif ch == "<": + continue + + if ch == "<": _tag_buf = ch - else: + continue + + if _state == "preflight": + _preflight_buf.append(ch) + elif _state == "inside": out.append(ch) + return "".join(out) + def _flush_preflight() -> str: + """If we never saw , flush buffered text.""" + nonlocal _state + if _state == "preflight": + _state = "passthrough" + result = "".join(_preflight_buf) + _preflight_buf.clear() + if _tag_buf: + result += _tag_buf + return result + return "" + try: while not invoke_task.done(): try: @@ -2456,8 +2513,10 @@ class ConversationalAgentExecutor(BaseModel): _streamed_chars += len(filtered) yield filtered - if _tag_buf and not _suppressing: - yield _tag_buf + leftover = _flush_preflight() + if leftover: + _streamed_chars += len(leftover) + yield leftover result = invoke_task.result() self._last_stream_result = result