From 126d0010ba7e4dcc8760924030194ceae129f261 Mon Sep 17 00:00:00 2001
From: Joao Moura <joaomdmoura@gmail.com>
Date: Thu, 14 May 2026 16:17:44 -0400
Subject: [PATCH] fix: summary tag handling, TUI autocomplete focus, tool
 output flooding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. Summary tags: Reverse the logic — for models like gpt-4.1 that wrap
   their actual response in <summary> tags (with thinking/CoT before it),
   extract the inner content instead of stripping it. Streaming uses a
   preflight buffer that waits for <summary>; if none appears, flushes
   everything normally.

2. TUI autocomplete: Change @mention accept key from Tab to right-arrow
   so autocomplete doesn't steal focus from the input widget. Only
   triggers when there's an active mention context with matches.

3. Tool output: Truncate tool results >4000 chars in LLM message history
   to prevent the model from echoing full file contents. Add soul-layer
   instruction telling the agent to summarize tool results rather than
   repeating them verbatim.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 lib/cli/src/crewai_cli/agent_tui.py         |  16 ++-
 lib/crewai/src/crewai/new_agent/executor.py | 115 +++++++++++++++-----
 2 files changed, 97 insertions(+), 34 deletions(-)
diff --git a/lib/cli/src/crewai_cli/agent_tui.py b/lib/cli/src/crewai_cli/agent_tui.py
index c1378fe52..fed067e37 100644
--- a/lib/cli/src/crewai_cli/agent_tui.py
+++ b/lib/cli/src/crewai_cli/agent_tui.py
@@ -71,7 +71,7 @@ except ImportError:
 
 
 class ChatTextArea(TextArea):
-    """Multiline chat input: Enter submits, Shift+Enter inserts newline, Tab completes @mentions."""
+    """Multiline chat input: Enter submits, Shift+Enter inserts newline, Right-arrow completes @mentions."""
 
     BINDINGS = [
         Binding("enter", "submit", "Send", show=False),
@@ -161,10 +161,14 @@ class ChatTextArea(TextArea):
             event.prevent_default()
             self.action_submit()
             return
-        if event.key == "tab":
-            event.prevent_default()
-            self.action_complete()
-            return
+        if event.key == "right":
+            ctx = self._get_mention_context()
+            if ctx is not None:
+                _, _, prefix = ctx
+                if self._get_matches(prefix):
+                    event.prevent_default()
+                    self.action_complete()
+                    return
         if event.key == "escape":
             self._last_mention_prefix = None
             self.post_message(self.MentionChanged("", []))
@@ -834,7 +838,7 @@ class AgentTUI(App[None]):
             hint.display = False
             return
         names = "  ".join(f"@{n}" for n in event.matches[:6])
-        hint.update(f"Tab to complete:  {names}")
+        hint.update(f"→ to complete:  {names}")
         hint.display = True
 
     # ── Message routing ──
diff --git a/lib/crewai/src/crewai/new_agent/executor.py b/lib/crewai/src/crewai/new_agent/executor.py
index b2a80490f..ff93bcb39 100644
--- a/lib/crewai/src/crewai/new_agent/executor.py
+++ b/lib/crewai/src/crewai/new_agent/executor.py
@@ -127,7 +127,10 @@ class ConversationalAgentExecutor(BaseModel):
         soul = (
             f"You are {agent.role}.\n"
             f"Your goal: {agent.goal}\n"
-            f"Background: {agent.backstory}"
+            f"Background: {agent.backstory}\n\n"
+            "When you use tools, act on the results directly. "
+            "Never repeat raw tool output (file contents, command output, etc.) "
+            "in your response — summarize findings or state what you did instead."
         )
         stack.add("soul", soul, source="agent.role/goal/backstory")
 
@@ -629,14 +632,22 @@ class ConversationalAgentExecutor(BaseModel):
             pass
         return ""
 
-    _INTERNAL_TAG_RE = re.compile(
-        r"<summary>.*?</summary>", re.DOTALL
+    _SUMMARY_EXTRACT_RE = re.compile(
+        r"<summary>\s*(.*?)\s*</summary>", re.DOTALL
     )
 
     def _strip_internal_tags(self, text: str) -> str:
-        """Strip <summary> blocks that leak from the summarization prompt."""
-        cleaned = self._INTERNAL_TAG_RE.sub("", text).strip()
-        return cleaned if cleaned else text
+        """Handle <summary> tags leaked from the summarization prompt.
+
+        Some models (e.g. gpt-4.1) wrap their actual response in <summary>
+        tags, treating the preceding text as chain-of-thought. When summary
+        tags are present we extract the inner content as the real answer.
+        """
+        match = self._SUMMARY_EXTRACT_RE.search(text)
+        if match:
+            inner = match.group(1).strip()
+            return inner if inner else text
+        return text
 
     def _detect_artifacts(self, tool_name: str, result_str: str) -> list[Artifact]:
         """GAP-67: Detect artifacts from tool results.
@@ -2064,11 +2075,19 @@ class ConversationalAgentExecutor(BaseModel):
                     ],
                 }
             )
+            _MAX_TOOL_RESULT_CHARS = 4000
+            display_result = result_str
+            if len(result_str) > _MAX_TOOL_RESULT_CHARS:
+                display_result = (
+                    result_str[:_MAX_TOOL_RESULT_CHARS]
+                    + f"\n\n[Truncated — {len(result_str)} chars total. "
+                    "Use the result directly; do not repeat it in your response.]"
+                )
             llm_messages.append(
                 {
                     "role": "tool",
                     "tool_call_id": call_id or func_name,
-                    "content": result_str,
+                    "content": display_result,
                 }
             )
 
@@ -2400,39 +2419,77 @@ class ConversationalAgentExecutor(BaseModel):
         invoke_task = asyncio.create_task(self.ainvoke(user_message))
         _streamed_chars = 0
         _last_status_time = time.monotonic()
+
+        # States: "preflight" (buffering, no <summary> seen yet),
+        #         "inside"   (<summary> found, yielding inner content),
+        #         "done"     (</summary> seen, suppress the rest),
+        #         "passthrough" (no summary tags — yield everything)
+        _state = "preflight"
+        _preflight_buf: list[str] = []
         _tag_buf = ""
-        _suppressing = False
 
         def _filter_chunk(raw: str) -> str:
-            """Filter <summary>...</summary> blocks from streamed chunks."""
-            nonlocal _tag_buf, _suppressing
-            out = []
+            nonlocal _state, _tag_buf
+            out: list[str] = []
             for ch in raw:
-                if _suppressing:
-                    _tag_buf += ch
-                    if _tag_buf.endswith("</summary>"):
-                        _suppressing = False
-                        _tag_buf = ""
+                if _state == "done":
+                    break
+
+                if _state == "passthrough":
+                    out.append(ch)
                     continue
+
+                # Accumulate into tag buffer when we might be mid-tag
                 if _tag_buf:
                     _tag_buf += ch
-                    if len(_tag_buf) <= len("<summary>"):
-                        if "<summary>"[: len(_tag_buf)] == _tag_buf:
-                            if _tag_buf == "<summary>":
-                                _suppressing = True
+
+                    if _state == "preflight":
+                        target = "<summary>"
+                        if target[: len(_tag_buf)] == _tag_buf:
+                            if _tag_buf == target:
+                                _state = "inside"
+                                _preflight_buf.clear()
+                                _tag_buf = ""
                             continue
-                        else:
-                            out.append(_tag_buf)
-                            _tag_buf = ""
-                    else:
+                        # Not a match — dump tag_buf to preflight buffer
+                        _preflight_buf.append(_tag_buf)
+                        _tag_buf = ""
+
+                    elif _state == "inside":
+                        target = "</summary>"
+                        if target[: len(_tag_buf)] == _tag_buf:
+                            if _tag_buf == target:
+                                _state = "done"
+                                _tag_buf = ""
+                            continue
+                        # Not a closing tag — flush buffered chars
                         out.append(_tag_buf)
                         _tag_buf = ""
-                elif ch == "<":
+                    continue
+
+                if ch == "<":
                     _tag_buf = ch
-                else:
+                    continue
+
+                if _state == "preflight":
+                    _preflight_buf.append(ch)
+                elif _state == "inside":
                     out.append(ch)
+
             return "".join(out)
 
+        def _flush_preflight() -> str:
+            """If we never saw <summary>, flush buffered text."""
+            nonlocal _state
+            if _state == "preflight":
+                _state = "passthrough"
+                result = "".join(_preflight_buf)
+                _preflight_buf.clear()
+                if _tag_buf:
+                    result += _tag_buf
+                return result
+            return ""
+
         try:
             while not invoke_task.done():
                 try:
@@ -2456,8 +2513,10 @@ class ConversationalAgentExecutor(BaseModel):
                     _streamed_chars += len(filtered)
                     yield filtered
 
-            if _tag_buf and not _suppressing:
-                yield _tag_buf
+            leftover = _flush_preflight()
+            if leftover:
+                _streamed_chars += len(leftover)
+                yield leftover
 
             result = invoke_task.result()
             self._last_stream_result = result