refactor: unify rag storage with instance-specific client support (#3455)

- ignore line length errors globally - migrate knowledge/memory and crew query_knowledge to `SearchResult` - remove legacy chromadb utils; fix empty metadata handling - restore openai as default embedding provider; support instance-specific clients - update and fix tests for `SearchResult` migration and rag changes
2026-05-01 15:22:37 +00:00 · 2025-09-17 14:46:54 -04:00
parent 81bd81e5f5
commit f28e78c5ba
30 changed files with 1956 additions and 976 deletions
--- a/src/crewai/memory/contextual/contextual_memory.py
+++ b/src/crewai/memory/contextual/contextual_memory.py
@@ -1,4 +1,6 @@
-from typing import Optional, TYPE_CHECKING
+from __future__ import annotations
+
+from typing import TYPE_CHECKING

 from crewai.memory import (
    EntityMemory,
@@ -19,9 +21,9 @@ class ContextualMemory:
        ltm: LongTermMemory,
        em: EntityMemory,
        exm: ExternalMemory,
-        agent: Optional["Agent"] = None,
-        task: Optional["Task"] = None,
-    ):
+        agent: Agent | None = None,
+        task: Task | None = None,
+    ) -> None:
        self.stm = stm
        self.ltm = ltm
        self.em = em
@@ -42,7 +44,7 @@ class ContextualMemory:
            self.exm.agent = self.agent
            self.exm.task = self.task

-    def build_context_for_task(self, task, context) -> str:
+    def build_context_for_task(self, task: Task, context: str) -> str:
        """
        Automatically builds a minimal, highly relevant set of contextual information
        for a given task.
@@ -52,14 +54,15 @@ class ContextualMemory:
        if query == "":
            return ""

-        context = []
-        context.append(self._fetch_ltm_context(task.description))
-        context.append(self._fetch_stm_context(query))
-        context.append(self._fetch_entity_context(query))
-        context.append(self._fetch_external_context(query))
-        return "\n".join(filter(None, context))
+        context_parts = [
+            self._fetch_ltm_context(task.description),
+            self._fetch_stm_context(query),
+            self._fetch_entity_context(query),
+            self._fetch_external_context(query),
+        ]
+        return "\n".join(filter(None, context_parts))

-    def _fetch_stm_context(self, query) -> str:
+    def _fetch_stm_context(self, query: str) -> str:
        """
        Fetches recent relevant insights from STM related to the task's description and expected_output,
        formatted as bullet points.
@@ -70,11 +73,11 @@ class ContextualMemory:

        stm_results = self.stm.search(query)
        formatted_results = "\n".join(
-            [f"- {result['context']}" for result in stm_results]
+            [f"- {result['content']}" for result in stm_results]
        )
        return f"Recent Insights:\n{formatted_results}" if stm_results else ""

-    def _fetch_ltm_context(self, task) -> Optional[str]:
+    def _fetch_ltm_context(self, task: str) -> str | None:
        """
        Fetches historical data or insights from LTM that are relevant to the task's description and expected_output,
        formatted as bullet points.
@@ -90,14 +93,14 @@ class ContextualMemory:
        formatted_results = [
            suggestion
            for result in ltm_results
-            for suggestion in result["metadata"]["suggestions"]  # type: ignore # Invalid index type "str" for "str"; expected type "SupportsIndex | slice"
+            for suggestion in result["metadata"]["suggestions"]
        ]
        formatted_results = list(dict.fromkeys(formatted_results))
        formatted_results = "\n".join([f"- {result}" for result in formatted_results])  # type: ignore # Incompatible types in assignment (expression has type "str", variable has type "list[str]")

        return f"Historical Data:\n{formatted_results}" if ltm_results else ""

-    def _fetch_entity_context(self, query) -> str:
+    def _fetch_entity_context(self, query: str) -> str:
        """
        Fetches relevant entity information from Entity Memory related to the task's description and expected_output,
        formatted as bullet points.
@@ -107,7 +110,7 @@ class ContextualMemory:

        em_results = self.em.search(query)
        formatted_results = "\n".join(
-            [f"- {result['context']}" for result in em_results]  # type: ignore #  Invalid index type "str" for "str"; expected type "SupportsIndex | slice"
+            [f"- {result['content']}" for result in em_results]
        )
        return f"Entities:\n{formatted_results}" if em_results else ""

@@ -128,6 +131,6 @@ class ContextualMemory:
            return ""

        formatted_memories = "\n".join(
-            f"- {result['context']}" for result in external_memories
+            f"- {result['content']}" for result in external_memories
        )
        return f"External memories:\n{formatted_memories}"