fix: summary tag handling, TUI autocomplete focus, tool output flooding

1. Summary tags: Reverse the logic — for models like gpt-4.1 that wrap their actual response in <summary> tags (with thinking/CoT before it), extract the inner content instead of stripping it. Streaming uses a preflight buffer that waits for <summary>; if none appears, flushes everything normally. 2. TUI autocomplete: Change @mention accept key from Tab to right-arrow so autocomplete doesn't steal focus from the input widget. Only triggers when there's an active mention context with matches. 3. Tool output: Truncate tool results >4000 chars in LLM message history to prevent the model from echoing full file contents. Add soul-layer instruction telling the agent to summarize tool results rather than repeating them verbatim. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-07-01 13:18:10 +00:00 · 2026-05-14 16:17:44 -04:00
parent 3eb1da2d9c
commit 126d0010ba
2 changed files with 97 additions and 34 deletions
--- a/lib/cli/src/crewai_cli/agent_tui.py
+++ b/lib/cli/src/crewai_cli/agent_tui.py
@@ -71,7 +71,7 @@ except ImportError:


 class ChatTextArea(TextArea):
-    """Multiline chat input: Enter submits, Shift+Enter inserts newline, Tab completes @mentions."""
+    """Multiline chat input: Enter submits, Shift+Enter inserts newline, Right-arrow completes @mentions."""

    BINDINGS = [
        Binding("enter", "submit", "Send", show=False),
@@ -161,10 +161,14 @@ class ChatTextArea(TextArea):
            event.prevent_default()
            self.action_submit()
            return
-        if event.key == "tab":
-            event.prevent_default()
-            self.action_complete()
-            return
+        if event.key == "right":
+            ctx = self._get_mention_context()
+            if ctx is not None:
+                _, _, prefix = ctx
+                if self._get_matches(prefix):
+                    event.prevent_default()
+                    self.action_complete()
+                    return
        if event.key == "escape":
            self._last_mention_prefix = None
            self.post_message(self.MentionChanged("", []))
@@ -834,7 +838,7 @@ class AgentTUI(App[None]):
            hint.display = False
            return
        names = "  ".join(f"@{n}" for n in event.matches[:6])
-        hint.update(f"Tab to complete:  {names}")
+        hint.update(f"→ to complete:  {names}")
        hint.display = True

    # ── Message routing ──
--- a/lib/crewai/src/crewai/new_agent/executor.py
+++ b/lib/crewai/src/crewai/new_agent/executor.py
@@ -127,7 +127,10 @@ class ConversationalAgentExecutor(BaseModel):
        soul = (
            f"You are {agent.role}.\n"
            f"Your goal: {agent.goal}\n"
-            f"Background: {agent.backstory}"
+            f"Background: {agent.backstory}\n\n"
+            "When you use tools, act on the results directly. "
+            "Never repeat raw tool output (file contents, command output, etc.) "
+            "in your response — summarize findings or state what you did instead."
        )
        stack.add("soul", soul, source="agent.role/goal/backstory")

@@ -629,14 +632,22 @@ class ConversationalAgentExecutor(BaseModel):
            pass
        return ""

-    _INTERNAL_TAG_RE = re.compile(
-        r"<summary>.*?</summary>", re.DOTALL
+    _SUMMARY_EXTRACT_RE = re.compile(
+        r"<summary>\s*(.*?)\s*</summary>", re.DOTALL
    )

    def _strip_internal_tags(self, text: str) -> str:
-        """Strip <summary> blocks that leak from the summarization prompt."""
-        cleaned = self._INTERNAL_TAG_RE.sub("", text).strip()
-        return cleaned if cleaned else text
+        """Handle <summary> tags leaked from the summarization prompt.
+
+        Some models (e.g. gpt-4.1) wrap their actual response in <summary>
+        tags, treating the preceding text as chain-of-thought. When summary
+        tags are present we extract the inner content as the real answer.
+        """
+        match = self._SUMMARY_EXTRACT_RE.search(text)
+        if match:
+            inner = match.group(1).strip()
+            return inner if inner else text
+        return text

    def _detect_artifacts(self, tool_name: str, result_str: str) -> list[Artifact]:
        """GAP-67: Detect artifacts from tool results.
@@ -2064,11 +2075,19 @@ class ConversationalAgentExecutor(BaseModel):
                    ],
                }
            )
+            _MAX_TOOL_RESULT_CHARS = 4000
+            display_result = result_str
+            if len(result_str) > _MAX_TOOL_RESULT_CHARS:
+                display_result = (
+                    result_str[:_MAX_TOOL_RESULT_CHARS]
+                    + f"\n\n[Truncated — {len(result_str)} chars total. "
+                    "Use the result directly; do not repeat it in your response.]"
+                )
            llm_messages.append(
                {
                    "role": "tool",
                    "tool_call_id": call_id or func_name,
-                    "content": result_str,
+                    "content": display_result,
                }
            )

@@ -2400,39 +2419,77 @@ class ConversationalAgentExecutor(BaseModel):
        invoke_task = asyncio.create_task(self.ainvoke(user_message))
        _streamed_chars = 0
        _last_status_time = time.monotonic()
+
+        # States: "preflight" (buffering, no <summary> seen yet),
+        #         "inside"   (<summary> found, yielding inner content),
+        #         "done"     (</summary> seen, suppress the rest),
+        #         "passthrough" (no summary tags — yield everything)
+        _state = "preflight"
+        _preflight_buf: list[str] = []
        _tag_buf = ""
-        _suppressing = False

        def _filter_chunk(raw: str) -> str:
-            """Filter <summary>...</summary> blocks from streamed chunks."""
-            nonlocal _tag_buf, _suppressing
-            out = []
+            nonlocal _state, _tag_buf
+            out: list[str] = []
            for ch in raw:
-                if _suppressing:
-                    _tag_buf += ch
-                    if _tag_buf.endswith("</summary>"):
-                        _suppressing = False
-                        _tag_buf = ""
+                if _state == "done":
+                    break
+
+                if _state == "passthrough":
+                    out.append(ch)
                    continue
+
+                # Accumulate into tag buffer when we might be mid-tag
                if _tag_buf:
                    _tag_buf += ch
-                    if len(_tag_buf) <= len("<summary>"):
-                        if "<summary>"[: len(_tag_buf)] == _tag_buf:
-                            if _tag_buf == "<summary>":
-                                _suppressing = True
+
+                    if _state == "preflight":
+                        target = "<summary>"
+                        if target[: len(_tag_buf)] == _tag_buf:
+                            if _tag_buf == target:
+                                _state = "inside"
+                                _preflight_buf.clear()
+                                _tag_buf = ""
                            continue
-                        else:
-                            out.append(_tag_buf)
-                            _tag_buf = ""
-                    else:
+                        # Not a match — dump tag_buf to preflight buffer
+                        _preflight_buf.append(_tag_buf)
+                        _tag_buf = ""
+
+                    elif _state == "inside":
+                        target = "</summary>"
+                        if target[: len(_tag_buf)] == _tag_buf:
+                            if _tag_buf == target:
+                                _state = "done"
+                                _tag_buf = ""
+                            continue
+                        # Not a closing tag — flush buffered chars
                        out.append(_tag_buf)
                        _tag_buf = ""
-                elif ch == "<":
+                    continue
+
+                if ch == "<":
                    _tag_buf = ch
-                else:
+                    continue
+
+                if _state == "preflight":
+                    _preflight_buf.append(ch)
+                elif _state == "inside":
                    out.append(ch)
+
            return "".join(out)

+        def _flush_preflight() -> str:
+            """If we never saw <summary>, flush buffered text."""
+            nonlocal _state
+            if _state == "preflight":
+                _state = "passthrough"
+                result = "".join(_preflight_buf)
+                _preflight_buf.clear()
+                if _tag_buf:
+                    result += _tag_buf
+                return result
+            return ""
+
        try:
            while not invoke_task.done():
                try:
@@ -2456,8 +2513,10 @@ class ConversationalAgentExecutor(BaseModel):
                    _streamed_chars += len(filtered)
                    yield filtered

-            if _tag_buf and not _suppressing:
-                yield _tag_buf
+            leftover = _flush_preflight()
+            if leftover:
+                _streamed_chars += len(leftover)
+                yield leftover

            result = invoke_task.result()
            self._last_stream_result = result