fix: handle empty documents list in KnowledgeStorage.save() and asave()

Fixes #4277 When KnowledgeStorage.save() or asave() is called with an empty documents list, the method now returns early instead of propagating a low-level ValueError from ChromaDB's upsert operation. This is a valid edge case that can occur in real-world workflows (e.g., after filtering, retrieval failures, or conditional logic), and should be handled gracefully as a no-op. Co-Authored-By: João <joao@crewai.com>
2026-01-25 16:18:13 +00:00 · 2026-01-25 13:04:47 +00:00
5 changed files with 44 additions and 372 deletions
--- a/lib/crewai/src/crewai/knowledge/storage/knowledge_storage.py
+++ b/lib/crewai/src/crewai/knowledge/storage/knowledge_storage.py
@@ -99,6 +99,9 @@ class KnowledgeStorage(BaseKnowledgeStorage):
            )

    def save(self, documents: list[str]) -> None:
+        if not documents:
+            return
+
        try:
            client = self._get_client()
            collection_name = (
@@ -177,6 +180,9 @@ class KnowledgeStorage(BaseKnowledgeStorage):
        Args:
            documents: List of document strings to save.
        """
+        if not documents:
+            return
+
        try:
            client = self._get_client()
            collection_name = (
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -555,11 +555,7 @@ class GeminiCompletion(BaseLLM):

                response_data: dict[str, Any]
                try:
-                    parsed = json.loads(text_content) if text_content else {}
-                    if isinstance(parsed, dict):
-                        response_data = parsed
-                    else:
-                        response_data = {"result": parsed}
+                    response_data = json.loads(text_content) if text_content else {}
                except (json.JSONDecodeError, TypeError):
                    response_data = {"result": text_content}

--- a/lib/crewai/tests/cassettes/llms/google/test_gemini_tool_returning_float.yaml
+++ b/lib/crewai/tests/cassettes/llms/google/test_gemini_tool_returning_float.yaml
@@ -1,319 +0,0 @@
-interactions:
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: What is 10000 + 20000?
-      Use the sum_numbers tool to calculate this.\n\nThis is the expected criteria
-      for your final answer: The sum of the two numbers\nyou MUST return the actual
-      complete content as the final answer, not a summary.\n\nThis is VERY important
-      to you, your job depends on it!"}], "role": "user"}], "systemInstruction": {"parts":
-      [{"text": "You are Calculator. You are a calculator that adds numbers.\nYour
-      personal goal is: Calculate numbers accurately"}], "role": "user"}, "tools":
-      [{"functionDeclarations": [{"description": "Add two numbers together and return
-      the result", "name": "sum_numbers", "parameters": {"properties": {"a": {"description":
-      "The first number to add", "title": "A", "type": "NUMBER"}, "b": {"description":
-      "The second number to add", "title": "B", "type": "NUMBER"}}, "required": ["a",
-      "b"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '962'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-001:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"functionCall\": {\n              \"name\": \"sum_numbers\",\n
-        \             \"args\": {\n                \"a\": 10000,\n                \"b\":
-        20000\n              }\n            }\n          }\n        ],\n        \"role\":
-        \"model\"\n      },\n      \"finishReason\": \"STOP\",\n      \"avgLogprobs\":
-        -0.00059548033667462211\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        127,\n    \"candidatesTokenCount\": 7,\n    \"totalTokenCount\": 134,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 127\n
-        \     }\n    ],\n    \"candidatesTokensDetails\": [\n      {\n        \"modality\":
-        \"TEXT\",\n        \"tokenCount\": 7\n      }\n    ]\n  },\n  \"modelVersion\":
-        \"gemini-2.0-flash-001\",\n  \"responseId\": \"bLBzabiACaP3-8YP7s-P6QI\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Fri, 23 Jan 2026 17:31:24 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=673
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: What is 10000 + 20000?
-      Use the sum_numbers tool to calculate this.\n\nThis is the expected criteria
-      for your final answer: The sum of the two numbers\nyou MUST return the actual
-      complete content as the final answer, not a summary.\n\nThis is VERY important
-      to you, your job depends on it!"}], "role": "user"}, {"parts": [{"functionCall":
-      {"args": {"a": 10000, "b": 20000}, "name": "sum_numbers"}}], "role": "model"},
-      {"parts": [{"functionResponse": {"name": "sum_numbers", "response": {"result":
-      30000}}}], "role": "user"}, {"parts": [{"text": "Analyze the tool result. If
-      requirements are met, provide the Final Answer. Otherwise, call the next tool.
-      Deliver only the answer without meta-commentary."}], "role": "user"}], "systemInstruction":
-      {"parts": [{"text": "You are Calculator. You are a calculator that adds numbers.\nYour
-      personal goal is: Calculate numbers accurately"}], "role": "user"}, "tools":
-      [{"functionDeclarations": [{"description": "Add two numbers together and return
-      the result", "name": "sum_numbers", "parameters": {"properties": {"a": {"description":
-      "The first number to add", "title": "A", "type": "NUMBER"}, "b": {"description":
-      "The second number to add", "title": "B", "type": "NUMBER"}}, "required": ["a",
-      "b"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1374'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-001:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"\"\n          }\n        ],\n        \"role\":
-        \"model\"\n      },\n      \"finishReason\": \"STOP\"\n    }\n  ],\n  \"usageMetadata\":
-        {\n    \"promptTokenCount\": 171,\n    \"totalTokenCount\": 171,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 171\n
-        \     }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-001\",\n  \"responseId\":
-        \"bLBzaaKgMc-ajrEPk7bIuQ8\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Fri, 23 Jan 2026 17:31:25 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=382
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: What is 10000 + 20000?
-      Use the sum_numbers tool to calculate this.\n\nThis is the expected criteria
-      for your final answer: The sum of the two numbers\nyou MUST return the actual
-      complete content as the final answer, not a summary.\n\nThis is VERY important
-      to you, your job depends on it!"}], "role": "user"}, {"parts": [{"functionCall":
-      {"args": {"a": 10000, "b": 20000}, "name": "sum_numbers"}}], "role": "model"},
-      {"parts": [{"functionResponse": {"name": "sum_numbers", "response": {"result":
-      30000}}}], "role": "user"}, {"parts": [{"text": "Analyze the tool result. If
-      requirements are met, provide the Final Answer. Otherwise, call the next tool.
-      Deliver only the answer without meta-commentary."}], "role": "user"}, {"parts":
-      [{"text": "\nCurrent Task: What is 10000 + 20000? Use the sum_numbers tool to
-      calculate this.\n\nThis is the expected criteria for your final answer: The
-      sum of the two numbers\nyou MUST return the actual complete content as the final
-      answer, not a summary.\n\nThis is VERY important to you, your job depends on
-      it!"}], "role": "user"}], "systemInstruction": {"parts": [{"text": "You are
-      Calculator. You are a calculator that adds numbers.\nYour personal goal is:
-      Calculate numbers accurately\n\nYou are Calculator. You are a calculator that
-      adds numbers.\nYour personal goal is: Calculate numbers accurately"}], "role":
-      "user"}, "tools": [{"functionDeclarations": [{"description": "Add two numbers
-      together and return the result", "name": "sum_numbers", "parameters": {"properties":
-      {"a": {"description": "The first number to add", "title": "A", "type": "NUMBER"},
-      "b": {"description": "The second number to add", "title": "B", "type": "NUMBER"}},
-      "required": ["a", "b"], "type": "OBJECT"}}]}], "generationConfig": {"stopSequences":
-      ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '1837'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-001:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"\"\n          }\n        ],\n        \"role\":
-        \"model\"\n      },\n      \"finishReason\": \"STOP\"\n    }\n  ],\n  \"usageMetadata\":
-        {\n    \"promptTokenCount\": 271,\n    \"totalTokenCount\": 271,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 271\n
-        \     }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-001\",\n  \"responseId\":
-        \"bbBzaczHDcW7jrEPgaj1CA\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Fri, 23 Jan 2026 17:31:25 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=410
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: What is 10000 + 20000?
-      Use the sum_numbers tool to calculate this.\n\nThis is the expected criteria
-      for your final answer: The sum of the two numbers\nyou MUST return the actual
-      complete content as the final answer, not a summary.\n\nThis is VERY important
-      to you, your job depends on it!"}], "role": "user"}, {"parts": [{"functionCall":
-      {"args": {"a": 10000, "b": 20000}, "name": "sum_numbers"}}], "role": "model"},
-      {"parts": [{"functionResponse": {"name": "sum_numbers", "response": {"result":
-      30000}}}], "role": "user"}, {"parts": [{"text": "Analyze the tool result. If
-      requirements are met, provide the Final Answer. Otherwise, call the next tool.
-      Deliver only the answer without meta-commentary."}], "role": "user"}, {"parts":
-      [{"text": "\nCurrent Task: What is 10000 + 20000? Use the sum_numbers tool to
-      calculate this.\n\nThis is the expected criteria for your final answer: The
-      sum of the two numbers\nyou MUST return the actual complete content as the final
-      answer, not a summary.\n\nThis is VERY important to you, your job depends on
-      it!"}], "role": "user"}, {"parts": [{"text": "\nCurrent Task: What is 10000
-      + 20000? Use the sum_numbers tool to calculate this.\n\nThis is the expected
-      criteria for your final answer: The sum of the two numbers\nyou MUST return
-      the actual complete content as the final answer, not a summary.\n\nThis is VERY
-      important to you, your job depends on it!"}], "role": "user"}], "systemInstruction":
-      {"parts": [{"text": "You are Calculator. You are a calculator that adds numbers.\nYour
-      personal goal is: Calculate numbers accurately\n\nYou are Calculator. You are
-      a calculator that adds numbers.\nYour personal goal is: Calculate numbers accurately\n\nYou
-      are Calculator. You are a calculator that adds numbers.\nYour personal goal
-      is: Calculate numbers accurately"}], "role": "user"}, "tools": [{"functionDeclarations":
-      [{"description": "Add two numbers together and return the result", "name": "sum_numbers",
-      "parameters": {"properties": {"a": {"description": "The first number to add",
-      "title": "A", "type": "NUMBER"}, "b": {"description": "The second number to
-      add", "title": "B", "type": "NUMBER"}}, "required": ["a", "b"], "type": "OBJECT"}}]}],
-      "generationConfig": {"stopSequences": ["\nObservation:"]}}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - '*/*'
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '2300'
-      content-type:
-      - application/json
-      host:
-      - generativelanguage.googleapis.com
-      x-goog-api-client:
-      - google-genai-sdk/1.49.0 gl-python/3.13.3
-      x-goog-api-key:
-      - X-GOOG-API-KEY-XXX
-    method: POST
-    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-001:generateContent
-  response:
-    body:
-      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"\\n{\\\"sum_numbers_response\\\":
-        {\\\"result\\\": 30000}}\\n\"\n          }\n        ],\n        \"role\":
-        \"model\"\n      },\n      \"finishReason\": \"STOP\",\n      \"avgLogprobs\":
-        -0.0038021293125654523\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        371,\n    \"candidatesTokenCount\": 19,\n    \"totalTokenCount\": 390,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 371\n
-        \     }\n    ],\n    \"candidatesTokensDetails\": [\n      {\n        \"modality\":
-        \"TEXT\",\n        \"tokenCount\": 19\n      }\n    ]\n  },\n  \"modelVersion\":
-        \"gemini-2.0-flash-001\",\n  \"responseId\": \"bbBzaauxJ_SgjrEP7onK2Ak\"\n}\n"
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Fri, 23 Jan 2026 17:31:26 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Server-Timing:
-      - gfet4t7; dur=454
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      X-Frame-Options:
-      - X-FRAME-OPTIONS-XXX
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/knowledge/test_knowledge_storage_integration.py
+++ b/lib/crewai/tests/knowledge/test_knowledge_storage_integration.py
@@ -193,3 +193,40 @@ def test_dimension_mismatch_error_handling(mock_get_client: MagicMock) -> None:

    with pytest.raises(ValueError, match="Embedding dimension mismatch"):
        storage.save(["test document"])
+
+
+@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
+def test_save_empty_documents_list(mock_get_client: MagicMock) -> None:
+    """Test that save() handles empty documents list gracefully.
+
+    Calling save() with an empty documents list should be a no-op and not
+    propagate low-level storage exceptions from ChromaDB.
+    """
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+
+    storage = KnowledgeStorage(collection_name="empty_docs_test")
+
+    storage.save([])
+
+    mock_client.get_or_create_collection.assert_not_called()
+    mock_client.add_documents.assert_not_called()
+
+
+@pytest.mark.asyncio
+@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
+async def test_asave_empty_documents_list(mock_get_client: MagicMock) -> None:
+    """Test that asave() handles empty documents list gracefully.
+
+    Calling asave() with an empty documents list should be a no-op and not
+    propagate low-level storage exceptions from ChromaDB.
+    """
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+
+    storage = KnowledgeStorage(collection_name="empty_docs_async_test")
+
+    await storage.asave([])
+
+    mock_client.aget_or_create_collection.assert_not_called()
+    mock_client.aadd_documents.assert_not_called()
--- a/lib/crewai/tests/llms/google/test_google.py
+++ b/lib/crewai/tests/llms/google/test_google.py
@@ -635,54 +635,6 @@ def test_gemini_token_usage_tracking():
    assert usage.total_tokens > 0


-@pytest.mark.vcr()
-def test_gemini_tool_returning_float():
-    """
-    Test that Gemini properly handles tools that return non-dict values like floats.
-
-    This is an end-to-end test that verifies the agent can use a tool that returns
-    a float (which gets wrapped in {"result": value} for Gemini's FunctionResponse).
-    """
-    from pydantic import BaseModel, Field
-    from typing import Type
-    from crewai.tools import BaseTool
-
-    class SumNumbersToolInput(BaseModel):
-        a: float = Field(..., description="The first number to add")
-        b: float = Field(..., description="The second number to add")
-
-    class SumNumbersTool(BaseTool):
-        name: str = "sum_numbers"
-        description: str = "Add two numbers together and return the result"
-        args_schema: Type[BaseModel] = SumNumbersToolInput
-
-        def _run(self, a: float, b: float) -> float:
-            return a + b
-
-    sum_tool = SumNumbersTool()
-
-    agent = Agent(
-        role="Calculator",
-        goal="Calculate numbers accurately",
-        backstory="You are a calculator that adds numbers.",
-        llm=LLM(model="google/gemini-2.0-flash-001"),
-        tools=[sum_tool],
-        verbose=True,
-    )
-
-    task = Task(
-        description="What is 10000 + 20000? Use the sum_numbers tool to calculate this.",
-        expected_output="The sum of the two numbers",
-        agent=agent,
-    )
-
-    crew = Crew(agents=[agent], tasks=[task], verbose=True)
-    result = crew.kickoff()
-
-    # The result should contain 30000 (the sum)
-    assert "30000" in result.raw
-
-
 def test_gemini_stop_sequences_sync():
    """Test that stop and stop_sequences attributes stay synchronized."""
    llm = LLM(model="google/gemini-2.0-flash-001")