Add Record/Replay functionality for offline processing (Issue #2759)

Co-Authored-By: Joe Moura <joao@crewai.com>
2026-01-08 15:48:29 +00:00 · 2025-05-05 22:23:03 +00:00
parent dabf02a90d
commit d5dfd5a1f5
8 changed files with 463 additions and 10 deletions
--- a/src/crewai/cli/cli.py
+++ b/src/crewai/cli/cli.py
@@ -201,9 +201,20 @@ def install(context):


@crewai.command()
-def run():
+@click.option(
+    "--record",
+    is_flag=True,
+    help="Record LLM responses for later replay",
+)
+@click.option(
+    "--replay",
+    is_flag=True,
+    help="Replay from recorded LLM responses without making network calls",
+)
+def run(record: bool = False, replay: bool = False):
    """Run the Crew."""
-    run_crew()
+    click.echo("Running the Crew")
+    run_crew(record=record, replay=replay)


@crewai.command()
--- a/src/crewai/cli/run_crew.py
+++ b/src/crewai/cli/run_crew.py
@@ -14,13 +14,17 @@ class CrewType(Enum):
    FLOW = "flow"


-def run_crew() -> None:
+def run_crew(record: bool = False, replay: bool = False) -> None:
    """
    Run the crew or flow by running a command in the UV environment.

    Starting from version 0.103.0, this command can be used to run both
    standard crews and flows. For flows, it detects the type from pyproject.toml
    and automatically runs the appropriate command.
+    
+    Args:
+        record (bool, optional): Whether to record LLM responses. Defaults to False.
+        replay (bool, optional): Whether to replay from recorded LLM responses. Defaults to False.
    """
    crewai_version = get_crewai_version()
    min_required_version = "0.71.0"
@@ -44,17 +48,24 @@ def run_crew() -> None:
    click.echo(f"Running the {'Flow' if is_flow else 'Crew'}")

    # Execute the appropriate command
-    execute_command(crew_type)
+    execute_command(crew_type, record, replay)


-def execute_command(crew_type: CrewType) -> None:
+def execute_command(crew_type: CrewType, record: bool = False, replay: bool = False) -> None:
    """
    Execute the appropriate command based on crew type.

    Args:
        crew_type: The type of crew to run
+        record: Whether to record LLM responses
+        replay: Whether to replay from recorded LLM responses
    """
    command = ["uv", "run", "kickoff" if crew_type == CrewType.FLOW else "run_crew"]
+    
+    if record:
+        command.append("--record")
+    if replay:
+        command.append("--replay")

    try:
        subprocess.run(command, capture_output=False, text=True, check=True)
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -244,6 +244,15 @@ class Crew(FlowTrackable, BaseModel):
        default_factory=SecurityConfig,
        description="Security configuration for the crew, including fingerprinting.",
    )
+    record_mode: bool = Field(
+        default=False,
+        description="Whether to record LLM responses for later replay.",
+    )
+    replay_mode: bool = Field(
+        default=False,
+        description="Whether to replay from recorded LLM responses without making network calls.",
+    )
+    _llm_response_cache_handler: Optional[Any] = PrivateAttr(default=None)

    @field_validator("id", mode="before")
    @classmethod
@@ -633,6 +642,17 @@ class Crew(FlowTrackable, BaseModel):
            self._task_output_handler.reset()
            self._logging_color = "bold_purple"

+            if self.record_mode and self.replay_mode:
+                raise ValueError("Cannot use both record_mode and replay_mode at the same time")
+                
+            if self.record_mode or self.replay_mode:
+                from crewai.utilities.llm_response_cache_handler import LLMResponseCacheHandler
+                self._llm_response_cache_handler = LLMResponseCacheHandler()
+                if self.record_mode:
+                    self._llm_response_cache_handler.start_recording()
+                elif self.replay_mode:
+                    self._llm_response_cache_handler.start_replaying()
+
            if inputs is not None:
                self._inputs = inputs
                self._interpolate_inputs(inputs)
@@ -651,6 +671,12 @@ class Crew(FlowTrackable, BaseModel):

                if not agent.step_callback:  # type: ignore # "BaseAgent" has no attribute "step_callback"
                    agent.step_callback = self.step_callback  # type: ignore # "BaseAgent" has no attribute "step_callback"
+                    
+                if self._llm_response_cache_handler:
+                    if hasattr(agent, "llm") and agent.llm:
+                        agent.llm.set_response_cache_handler(self._llm_response_cache_handler)
+                    if hasattr(agent, "function_calling_llm") and agent.function_calling_llm:
+                        agent.function_calling_llm.set_response_cache_handler(self._llm_response_cache_handler)

                agent.create_agent_executor()

@@ -1287,6 +1313,9 @@ class Crew(FlowTrackable, BaseModel):
    def _finish_execution(self, final_string_output: str) -> None:
        if self.max_rpm:
            self._rpm_controller.stop_rpm_counter()
+            
+        if self._llm_response_cache_handler:
+            self._llm_response_cache_handler.stop()

    def calculate_usage_metrics(self) -> UsageMetrics:
        """Calculates and returns the usage metrics."""
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -296,6 +296,7 @@ class LLM(BaseLLM):
        self.additional_params = kwargs
        self.is_anthropic = self._is_anthropic_model(model)
        self.stream = stream
+        self._response_cache_handler = None

        litellm.drop_params = True

@@ -869,25 +870,43 @@ class LLM(BaseLLM):
            for message in messages:
                if message.get("role") == "system":
                    message["role"] = "assistant"
+                    
+        if self._response_cache_handler and self._response_cache_handler.is_replaying():
+            cached_response = self._response_cache_handler.get_cached_response(
+                self.model, messages
+            )
+            if cached_response:
+                # Emit completion event for the cached response
+                self._handle_emit_call_events(cached_response, LLMCallType.LLM_CALL)
+                return cached_response

-        # --- 5) Set up callbacks if provided
+        # --- 6) Set up callbacks if provided
        with suppress_warnings():
            if callbacks and len(callbacks) > 0:
                self.set_callbacks(callbacks)

            try:
-                # --- 6) Prepare parameters for the completion call
+                # --- 7) Prepare parameters for the completion call
                params = self._prepare_completion_params(messages, tools)

-                # --- 7) Make the completion call and handle response
+                # --- 8) Make the completion call and handle response
                if self.stream:
-                    return self._handle_streaming_response(
+                    response = self._handle_streaming_response(
                        params, callbacks, available_functions
                    )
                else:
-                    return self._handle_non_streaming_response(
+                    response = self._handle_non_streaming_response(
                        params, callbacks, available_functions
                    )
+                    
+                if (self._response_cache_handler and 
+                    self._response_cache_handler.is_recording() and 
+                    isinstance(response, str)):
+                    self._response_cache_handler.cache_response(
+                        self.model, messages, response
+                    )
+                    
+                return response

            except LLMContextLengthExceededException:
                # Re-raise LLMContextLengthExceededException as it should be handled
@@ -1107,3 +1126,18 @@ class LLM(BaseLLM):

                litellm.success_callback = success_callbacks
                litellm.failure_callback = failure_callbacks
+                
+    def set_response_cache_handler(self, handler):
+        """
+        Sets the response cache handler for record/replay functionality.
+        
+        Args:
+            handler: An instance of LLMResponseCacheHandler.
+        """
+        self._response_cache_handler = handler
+        
+    def clear_response_cache_handler(self):
+        """
+        Clears the response cache handler.
+        """
+        self._response_cache_handler = None
--- a/src/crewai/memory/storage/llm_response_cache_storage.py
+++ b/src/crewai/memory/storage/llm_response_cache_storage.py
@@ -0,0 +1,131 @@
+import json
+import sqlite3
+import hashlib
+from typing import Any, Dict, List, Optional
+
+from crewai.utilities import Printer
+from crewai.utilities.crew_json_encoder import CrewJSONEncoder
+from crewai.utilities.paths import db_storage_path
+
+class LLMResponseCacheStorage:
+    """
+    SQLite storage for caching LLM responses.
+    Used for offline record/replay functionality.
+    """
+
+    def __init__(
+        self, db_path: str = f"{db_storage_path()}/llm_response_cache.db"
+    ) -> None:
+        self.db_path = db_path
+        self._printer: Printer = Printer()
+        self._initialize_db()
+
+    def _initialize_db(self):
+        """
+        Initializes the SQLite database and creates the llm_response_cache table
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    """
+                    CREATE TABLE IF NOT EXISTS llm_response_cache (
+                        request_hash TEXT PRIMARY KEY,
+                        model TEXT,
+                        messages TEXT,
+                        response TEXT,
+                        timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+                    )
+                """
+                )
+                conn.commit()
+        except sqlite3.Error as e:
+            self._printer.print(
+                content=f"LLM RESPONSE CACHE ERROR: An error occurred during database initialization: {e}",
+                color="red",
+            )
+
+    def _compute_request_hash(self, model: str, messages: List[Dict[str, str]]) -> str:
+        """
+        Computes a hash for the request based on the model and messages.
+        This hash is used as the key for caching.
+        
+        Sensitive information like API keys should not be included in the hash.
+        """
+        message_str = json.dumps(messages, sort_keys=True)
+        request_hash = hashlib.sha256(f"{model}:{message_str}".encode()).hexdigest()
+        return request_hash
+
+    def add(self, model: str, messages: List[Dict[str, str]], response: str) -> None:
+        """
+        Adds a response to the cache.
+        """
+        try:
+            request_hash = self._compute_request_hash(model, messages)
+            messages_json = json.dumps(messages, cls=CrewJSONEncoder)
+            
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    """
+                    INSERT OR REPLACE INTO llm_response_cache
+                    (request_hash, model, messages, response)
+                    VALUES (?, ?, ?, ?)
+                    """,
+                    (
+                        request_hash,
+                        model,
+                        messages_json,
+                        response,
+                    ),
+                )
+                conn.commit()
+        except sqlite3.Error as e:
+            self._printer.print(
+                content=f"LLM RESPONSE CACHE ERROR: Failed to add response: {e}",
+                color="red",
+            )
+
+    def get(self, model: str, messages: List[Dict[str, str]]) -> Optional[str]:
+        """
+        Retrieves a response from the cache based on the model and messages.
+        Returns None if not found.
+        """
+        try:
+            request_hash = self._compute_request_hash(model, messages)
+            
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    """
+                    SELECT response
+                    FROM llm_response_cache
+                    WHERE request_hash = ?
+                    """,
+                    (request_hash,),
+                )
+                
+                result = cursor.fetchone()
+                return result[0] if result else None
+                
+        except sqlite3.Error as e:
+            self._printer.print(
+                content=f"LLM RESPONSE CACHE ERROR: Failed to retrieve response: {e}",
+                color="red",
+            )
+            return None
+
+    def delete_all(self) -> None:
+        """
+        Deletes all records from the llm_response_cache table.
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute("DELETE FROM llm_response_cache")
+                conn.commit()
+        except sqlite3.Error as e:
+            self._printer.print(
+                content=f"LLM RESPONSE CACHE ERROR: Failed to clear cache: {e}",
+                color="red",
+            )
--- a/src/crewai/utilities/llm_response_cache_handler.py
+++ b/src/crewai/utilities/llm_response_cache_handler.py
@@ -0,0 +1,69 @@
+from typing import Any, Dict, List, Optional
+
+from crewai.memory.storage.llm_response_cache_storage import LLMResponseCacheStorage
+
+class LLMResponseCacheHandler:
+    """
+    Handler for the LLM response cache storage.
+    Used for record/replay functionality.
+    """
+
+    def __init__(self) -> None:
+        self.storage = LLMResponseCacheStorage()
+        self._recording = False
+        self._replaying = False
+
+    def start_recording(self) -> None:
+        """
+        Starts recording LLM responses.
+        """
+        self._recording = True
+        self._replaying = False
+
+    def start_replaying(self) -> None:
+        """
+        Starts replaying LLM responses from the cache.
+        """
+        self._recording = False
+        self._replaying = True
+
+    def stop(self) -> None:
+        """
+        Stops recording or replaying.
+        """
+        self._recording = False
+        self._replaying = False
+
+    def is_recording(self) -> bool:
+        """
+        Returns whether recording is active.
+        """
+        return self._recording
+
+    def is_replaying(self) -> bool:
+        """
+        Returns whether replaying is active.
+        """
+        return self._replaying
+
+    def cache_response(self, model: str, messages: List[Dict[str, str]], response: str) -> None:
+        """
+        Caches an LLM response if recording is active.
+        """
+        if self._recording:
+            self.storage.add(model, messages, response)
+
+    def get_cached_response(self, model: str, messages: List[Dict[str, str]]) -> Optional[str]:
+        """
+        Retrieves a cached LLM response if replaying is active.
+        Returns None if not found or if replaying is not active.
+        """
+        if self._replaying:
+            return self.storage.get(model, messages)
+        return None
+
+    def clear_cache(self) -> None:
+        """
+        Clears the LLM response cache.
+        """
+        self.storage.delete_all()
--- a/tests/llm_response_cache_test.py
+++ b/tests/llm_response_cache_test.py
@@ -0,0 +1,78 @@
+import pytest
+from unittest.mock import MagicMock, patch
+
+from crewai.llm import LLM
+from crewai.utilities.llm_response_cache_handler import LLMResponseCacheHandler
+
+
+@pytest.fixture
+def handler():
+    handler = LLMResponseCacheHandler()
+    handler.storage.add = MagicMock()
+    handler.storage.get = MagicMock()
+    return handler
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_llm_recording(handler):
+    handler.start_recording()
+    
+    llm = LLM(model="gpt-4o-mini")
+    llm.set_response_cache_handler(handler)
+    
+    messages = [{"role": "user", "content": "Hello, world!"}]
+    
+    with patch('litellm.completion') as mock_completion:
+        mock_completion.return_value = {
+            "choices": [{"message": {"content": "Hello, human!"}}]
+        }
+        
+        response = llm.call(messages)
+        
+        assert response == "Hello, human!"
+        
+        handler.storage.add.assert_called_once_with(
+            "gpt-4o-mini", messages, "Hello, human!"
+        )
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_llm_replaying(handler):
+    handler.start_replaying()
+    handler.storage.get.return_value = "Cached response"
+    
+    llm = LLM(model="gpt-4o-mini")
+    llm.set_response_cache_handler(handler)
+    
+    messages = [{"role": "user", "content": "Hello, world!"}]
+    
+    with patch('litellm.completion') as mock_completion:
+        response = llm.call(messages)
+        
+        assert response == "Cached response"
+        
+        mock_completion.assert_not_called()
+        
+        handler.storage.get.assert_called_once_with("gpt-4o-mini", messages)
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_llm_replay_fallback(handler):
+    handler.start_replaying()
+    handler.storage.get.return_value = None
+    
+    llm = LLM(model="gpt-4o-mini")
+    llm.set_response_cache_handler(handler)
+    
+    messages = [{"role": "user", "content": "Hello, world!"}]
+    
+    with patch('litellm.completion') as mock_completion:
+        mock_completion.return_value = {
+            "choices": [{"message": {"content": "Hello, human!"}}]
+        }
+        
+        response = llm.call(messages)
+        
+        assert response == "Hello, human!"
+        
+        mock_completion.assert_called_once()
--- a/tests/record_replay_test.py
+++ b/tests/record_replay_test.py
@@ -0,0 +1,90 @@
+import pytest
+from unittest.mock import MagicMock, patch
+
+from crewai.agent import Agent
+from crewai.crew import Crew
+from crewai.process import Process
+from crewai.task import Task
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_crew_recording_mode():
+    agent = Agent(
+        role="Test Agent",
+        goal="Test the recording functionality",
+        backstory="A test agent for recording LLM responses",
+    )
+    
+    task = Task(
+        description="Return a simple response",
+        expected_output="A simple response",
+        agent=agent,
+    )
+    
+    crew = Crew(
+        agents=[agent],
+        tasks=[task],
+        process=Process.sequential,
+        record_mode=True,
+    )
+    
+    mock_handler = MagicMock()
+    crew._llm_response_cache_handler = mock_handler
+    
+    mock_llm = MagicMock()
+    agent.llm = mock_llm
+    
+    with patch('crewai.utilities.llm_response_cache_handler.LLMResponseCacheHandler', return_value=mock_handler):
+        crew.kickoff()
+    
+    mock_handler.start_recording.assert_called_once()
+    
+    mock_llm.set_response_cache_handler.assert_called_once_with(mock_handler)
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_crew_replay_mode():
+    agent = Agent(
+        role="Test Agent",
+        goal="Test the replay functionality",
+        backstory="A test agent for replaying LLM responses",
+    )
+    
+    task = Task(
+        description="Return a simple response",
+        expected_output="A simple response",
+        agent=agent,
+    )
+    
+    crew = Crew(
+        agents=[agent],
+        tasks=[task],
+        process=Process.sequential,
+        replay_mode=True,
+    )
+    
+    mock_handler = MagicMock()
+    crew._llm_response_cache_handler = mock_handler
+    
+    mock_llm = MagicMock()
+    agent.llm = mock_llm
+    
+    with patch('crewai.utilities.llm_response_cache_handler.LLMResponseCacheHandler', return_value=mock_handler):
+        crew.kickoff()
+    
+    mock_handler.start_replaying.assert_called_once()
+    
+    mock_llm.set_response_cache_handler.assert_called_once_with(mock_handler)
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_record_replay_flags_conflict():
+    with pytest.raises(ValueError):
+        crew = Crew(
+            agents=[],
+            tasks=[],
+            process=Process.sequential,
+            record_mode=True,
+            replay_mode=True,
+        )
+        crew.kickoff()