Merge branch 'main' of github.com:crewAIInc/crewAI into devin/1741108142-custom-llm-support

2026-01-10 16:48:30 +00:00 · 2025-03-10 09:34:33 -07:00
parent 709941c4c7 7122a29a20
commit 0cece5fd59
26 changed files with 5382 additions and 426 deletions
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -18,6 +18,7 @@ from crewai.tools.tool_calling import InstructorToolCalling
 from crewai.tools.tool_usage import ToolUsage
 from crewai.utilities import RPMController
 from crewai.utilities.events import crewai_event_bus
+from crewai.utilities.events.llm_events import LLMStreamChunkEvent
 from crewai.utilities.events.tool_usage_events import ToolUsageFinishedEvent


@@ -259,9 +260,7 @@ def test_cache_hitting():
    def handle_tool_end(source, event):
        received_events.append(event)

-    with (
-        patch.object(CacheHandler, "read") as read,
-    ):
+    with (patch.object(CacheHandler, "read") as read,):
        read.return_value = "0"
        task = Task(
            description="What is 2 times 6? Ignore correctness and just return the result of the multiplication tool, you must use the tool.",
--- a/tests/cassettes/test_crew_kickoff_streaming_usage_metrics.yaml
+++ b/tests/cassettes/test_crew_kickoff_streaming_usage_metrics.yaml
--- a/tests/cassettes/test_crew_kickoff_usage_metrics.yaml
+++ b/tests/cassettes/test_crew_kickoff_usage_metrics.yaml
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -2,6 +2,7 @@

 import hashlib
 import json
+import os
 from concurrent.futures import Future
 from unittest import mock
 from unittest.mock import MagicMock, patch
@@ -35,6 +36,11 @@ from crewai.utilities.events.crew_events import (
 from crewai.utilities.rpm_controller import RPMController
 from crewai.utilities.task_output_storage_handler import TaskOutputStorageHandler

+# Skip streaming tests when running in CI/CD environments
+skip_streaming_in_ci = pytest.mark.skipif(
+    os.getenv("CI") is not None, reason="Skipping streaming tests in CI/CD environments"
+)
+
 ceo = Agent(
    role="CEO",
    goal="Make sure the writers in your company produce amazing content.",
@@ -948,6 +954,7 @@ def test_api_calls_throttling(capsys):
        moveon.assert_called()


+@skip_streaming_in_ci
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_crew_kickoff_usage_metrics():
    inputs = [
@@ -960,6 +967,7 @@ def test_crew_kickoff_usage_metrics():
        role="{topic} Researcher",
        goal="Express hot takes on {topic}.",
        backstory="You have a lot of experience with {topic}.",
+        llm=LLM(model="gpt-4o"),
    )

    task = Task(
@@ -968,12 +976,50 @@ def test_crew_kickoff_usage_metrics():
        agent=agent,
    )

+    # Use real LLM calls instead of mocking
    crew = Crew(agents=[agent], tasks=[task])
    results = crew.kickoff_for_each(inputs=inputs)

    assert len(results) == len(inputs)
    for result in results:
-        # Assert that all required keys are in usage_metrics and their values are not None
+        # Assert that all required keys are in usage_metrics and their values are greater than 0
+        assert result.token_usage.total_tokens > 0
+        assert result.token_usage.prompt_tokens > 0
+        assert result.token_usage.completion_tokens > 0
+        assert result.token_usage.successful_requests > 0
+        assert result.token_usage.cached_prompt_tokens == 0
+
+
+@skip_streaming_in_ci
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_crew_kickoff_streaming_usage_metrics():
+    inputs = [
+        {"topic": "dog"},
+        {"topic": "cat"},
+        {"topic": "apple"},
+    ]
+
+    agent = Agent(
+        role="{topic} Researcher",
+        goal="Express hot takes on {topic}.",
+        backstory="You have a lot of experience with {topic}.",
+        llm=LLM(model="gpt-4o", stream=True),
+        max_iter=3,
+    )
+
+    task = Task(
+        description="Give me an analysis around {topic}.",
+        expected_output="1 bullet point about {topic} that's under 15 words.",
+        agent=agent,
+    )
+
+    # Use real LLM calls instead of mocking
+    crew = Crew(agents=[agent], tasks=[task])
+    results = crew.kickoff_for_each(inputs=inputs)
+
+    assert len(results) == len(inputs)
+    for result in results:
+        # Assert that all required keys are in usage_metrics and their values are greater than 0
        assert result.token_usage.total_tokens > 0
        assert result.token_usage.prompt_tokens > 0
        assert result.token_usage.completion_tokens > 0
@@ -3973,3 +4019,5 @@ def test_crew_with_knowledge_sources_works_with_copy():
    assert crew_copy.knowledge_sources == crew.knowledge_sources
    assert len(crew_copy.agents) == len(crew.agents)
    assert len(crew_copy.tasks) == len(crew.tasks)
+
+    assert len(crew_copy.tasks) == len(crew.tasks)
--- a/tests/llm_test.py
+++ b/tests/llm_test.py
@@ -219,7 +219,7 @@ def test_get_custom_llm_provider_gemini():

 def test_get_custom_llm_provider_openai():
    llm = LLM(model="gpt-4")
-    assert llm._get_custom_llm_provider() == "openai"
+    assert llm._get_custom_llm_provider() == None


 def test_validate_call_params_supported():
@@ -285,6 +285,7 @@ def test_o3_mini_reasoning_effort_medium():
    assert isinstance(result, str)
    assert "Paris" in result

+
 def test_context_window_validation():
    """Test that context window validation works correctly."""
    # Test valid window size
--- a/tests/utilities/cassettes/test_llm_emits_stream_chunk_events.yaml
+++ b/tests/utilities/cassettes/test_llm_emits_stream_chunk_events.yaml
@@ -0,0 +1,170 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me a short joke"}], "model":
+      "gpt-3.5-turbo", "stop": [], "stream": true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '121'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=IY8ppO70AMHr2skDSUsGh71zqHHdCQCZ3OvkPi26NBc-1740424913267-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.65.1
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.65.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.8
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        couldn"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"''t"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        the"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        bicycle"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        stand"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        up"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        by"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        itself"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        Because"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        it"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        was"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        two"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"-t"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"ired"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-B74aE2TDl9ZbKx2fXoVatoMDnErNm","object":"chat.completion.chunk","created":1741025614,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 91ab1bcbad95bcda-ATL
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 03 Mar 2025 18:13:34 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=Jydtg8l0yjWRI2vKmejdq.C1W.sasIwEbTrV2rUt6V0-1741025614-1.0.1.1-Af3gmq.j2ecn9QEa3aCVY09QU4VqoW2GTk9AjvzPA.jyAZlwhJd4paniSt3kSusH0tryW03iC8uaX826hb2xzapgcfSm6Jdh_eWh_BMCh_8;
+        path=/; expires=Mon, 03-Mar-25 18:43:34 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=5wzaJSCvT1p1Eazad55wDvp1JsgxrlghhmmU9tx0fMs-1741025614868-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '127'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '50000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '49999978'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_2a2a04977ace88fdd64cf570f80c0202
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/tests/utilities/cassettes/test_llm_no_stream_chunks_when_streaming_disabled.yaml
+++ b/tests/utilities/cassettes/test_llm_no_stream_chunks_when_streaming_disabled.yaml
@@ -0,0 +1,107 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me a short joke"}], "model":
+      "gpt-4o", "stop": [], "stream": false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '115'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.65.1
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.65.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.8
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFJBbtswELzrFVteerEKSZbrxpcCDuBTUfSUtigCgSZXEhuKJLirNEbg
+        vxeSHMtBXSAXHmZ2BjPLfU4AhNFiA0K1klUXbLpde/X1tvtW/tnfrW6//Lzb7UraLn8s2+xpJxaD
+        wu9/o+IX1Qflu2CRjXcTrSJKxsE1X5d5kRWrdT4SnddoB1kTOC19WmRFmWaf0uzjSdh6o5DEBn4l
+        AADP4ztEdBqfxAayxQvSIZFsUGzOQwAiejsgQhIZYulYLGZSecfoxtTf2wNo794zkDLo2BATcOyJ
+        QbLv6DNsUcmeELjFA3TyAaEPgI8YD9wa17y7NI5Y9ySHXq639oQfz0mtb0L0ezrxZ7w2zlBbRZTk
+        3ZCK2AcxsscE4H7cSP+qpAjRd4Er9g/oBsO8mOzE/AVXSPYs7YwX5eKKW6WRpbF0sVGhpGpRz8p5
+        /bLXxl8QyUXnf8Nc8556G9e8xX4mlMLAqKsQURv1uvA8FnE40P+NnXc8BhaE8dEorNhgHP5BYy17
+        O92OoAMxdlVtXIMxRDMdUB2qWt3UuV5ny5VIjslfAAAA//8DADx20t9JAwAA
+    headers:
+      CF-RAY:
+      - 91bbfc033e461d6e-ATL
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 05 Mar 2025 19:22:51 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=LecfSlhN6VGr4kTlMiMCqRPInNb1m8zOikTZxtsE_WM-1741202571-1.0.1.1-T8nh2g1PcqyLIV97_HH9Q_nSUyCtaiFAOzvMxlswn6XjJCcSLJhi_fmkbylwppwoRPTxgs4S6VsVH0mp4ZcDTABBbtemKj7vS8QRDpRrmsU;
+        path=/; expires=Wed, 05-Mar-25 19:52:51 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=wyMrJP5k5bgWyD8rsK4JPvAJ78JWrsrT0lyV9DP4WZM-1741202571727-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '416'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '30000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '29999978'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_f42504d00bda0a492dced0ba3cf302d8
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/tests/utilities/test_events.py
+++ b/tests/utilities/test_events.py
@@ -1,3 +1,4 @@
+import os
 from datetime import datetime
 from unittest.mock import Mock, patch

@@ -38,6 +39,7 @@ from crewai.utilities.events.llm_events import (
    LLMCallFailedEvent,
    LLMCallStartedEvent,
    LLMCallType,
+    LLMStreamChunkEvent,
 )
 from crewai.utilities.events.task_events import (
    TaskCompletedEvent,
@@ -48,6 +50,11 @@ from crewai.utilities.events.tool_usage_events import (
    ToolUsageErrorEvent,
 )

+# Skip streaming tests when running in CI/CD environments
+skip_streaming_in_ci = pytest.mark.skipif(
+    os.getenv("CI") is not None, reason="Skipping streaming tests in CI/CD environments"
+)
+
 base_agent = Agent(
    role="base_agent",
    llm="gpt-4o-mini",
@@ -615,3 +622,152 @@ def test_llm_emits_call_failed_event():
        assert len(received_events) == 1
        assert received_events[0].type == "llm_call_failed"
        assert received_events[0].error == error_message
+
+
+@skip_streaming_in_ci
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_llm_emits_stream_chunk_events():
+    """Test that LLM emits stream chunk events when streaming is enabled."""
+    received_chunks = []
+
+    with crewai_event_bus.scoped_handlers():
+
+        @crewai_event_bus.on(LLMStreamChunkEvent)
+        def handle_stream_chunk(source, event):
+            received_chunks.append(event.chunk)
+
+        # Create an LLM with streaming enabled
+        llm = LLM(model="gpt-4o", stream=True)
+
+        # Call the LLM with a simple message
+        response = llm.call("Tell me a short joke")
+
+        # Verify that we received chunks
+        assert len(received_chunks) > 0
+
+        # Verify that concatenating all chunks equals the final response
+        assert "".join(received_chunks) == response
+
+
+@skip_streaming_in_ci
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_llm_no_stream_chunks_when_streaming_disabled():
+    """Test that LLM doesn't emit stream chunk events when streaming is disabled."""
+    received_chunks = []
+
+    with crewai_event_bus.scoped_handlers():
+
+        @crewai_event_bus.on(LLMStreamChunkEvent)
+        def handle_stream_chunk(source, event):
+            received_chunks.append(event.chunk)
+
+        # Create an LLM with streaming disabled
+        llm = LLM(model="gpt-4o", stream=False)
+
+        # Call the LLM with a simple message
+        response = llm.call("Tell me a short joke")
+
+        # Verify that we didn't receive any chunks
+        assert len(received_chunks) == 0
+
+        # Verify we got a response
+        assert response and isinstance(response, str)
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_streaming_fallback_to_non_streaming():
+    """Test that streaming falls back to non-streaming when there's an error."""
+    received_chunks = []
+    fallback_called = False
+
+    with crewai_event_bus.scoped_handlers():
+
+        @crewai_event_bus.on(LLMStreamChunkEvent)
+        def handle_stream_chunk(source, event):
+            received_chunks.append(event.chunk)
+
+        # Create an LLM with streaming enabled
+        llm = LLM(model="gpt-4o", stream=True)
+
+        # Store original methods
+        original_call = llm.call
+
+        # Create a mock call method that handles the streaming error
+        def mock_call(messages, tools=None, callbacks=None, available_functions=None):
+            nonlocal fallback_called
+            # Emit a couple of chunks to simulate partial streaming
+            crewai_event_bus.emit(llm, event=LLMStreamChunkEvent(chunk="Test chunk 1"))
+            crewai_event_bus.emit(llm, event=LLMStreamChunkEvent(chunk="Test chunk 2"))
+
+            # Mark that fallback would be called
+            fallback_called = True
+
+            # Return a response as if fallback succeeded
+            return "Fallback response after streaming error"
+
+        # Replace the call method with our mock
+        llm.call = mock_call
+
+        try:
+            # Call the LLM
+            response = llm.call("Tell me a short joke")
+
+            # Verify that we received some chunks
+            assert len(received_chunks) == 2
+            assert received_chunks[0] == "Test chunk 1"
+            assert received_chunks[1] == "Test chunk 2"
+
+            # Verify fallback was triggered
+            assert fallback_called
+
+            # Verify we got the fallback response
+            assert response == "Fallback response after streaming error"
+
+        finally:
+            # Restore the original method
+            llm.call = original_call
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_streaming_empty_response_handling():
+    """Test that streaming handles empty responses correctly."""
+    received_chunks = []
+
+    with crewai_event_bus.scoped_handlers():
+
+        @crewai_event_bus.on(LLMStreamChunkEvent)
+        def handle_stream_chunk(source, event):
+            received_chunks.append(event.chunk)
+
+        # Create an LLM with streaming enabled
+        llm = LLM(model="gpt-3.5-turbo", stream=True)
+
+        # Store original methods
+        original_call = llm.call
+
+        # Create a mock call method that simulates empty chunks
+        def mock_call(messages, tools=None, callbacks=None, available_functions=None):
+            # Emit a few empty chunks
+            for _ in range(3):
+                crewai_event_bus.emit(llm, event=LLMStreamChunkEvent(chunk=""))
+
+            # Return the default message for empty responses
+            return "I apologize, but I couldn't generate a proper response. Please try again or rephrase your request."
+
+        # Replace the call method with our mock
+        llm.call = mock_call
+
+        try:
+            # Call the LLM - this should handle empty response
+            response = llm.call("Tell me a short joke")
+
+            # Verify that we received empty chunks
+            assert len(received_chunks) == 3
+            assert all(chunk == "" for chunk in received_chunks)
+
+            # Verify the response is the default message for empty responses
+            assert "I apologize" in response and "couldn't generate" in response
+
+        finally:
+            # Restore the original method
+            llm.call = original_call