From 49b8cc95ae6a76af37f39e9b85c1f85d4a3e222b Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 12:11:06 +0000
Subject: [PATCH 1/3] fix: update LLMCallStartedEvent message type to support
 multimodal content (#2475)

fix: sort imports in test file to fix linting

fix: properly sort imports with ruff

Co-Authored-By: Joe Moura <joao@crewai.com>
---
 src/crewai/utilities/events/llm_events.py |  2 +-
 tests/test_multimodal_validation.py       | 46 +++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_multimodal_validation.py

diff --git a/src/crewai/utilities/events/llm_events.py b/src/crewai/utilities/events/llm_events.py
index 988b6f945..10a648e86 100644
--- a/src/crewai/utilities/events/llm_events.py
+++ b/src/crewai/utilities/events/llm_events.py
@@ -15,7 +15,7 @@ class LLMCallStartedEvent(CrewEvent):
     """Event emitted when a LLM call starts"""
 
     type: str = "llm_call_started"
-    messages: Union[str, List[Dict[str, str]]]
+    messages: Union[str, List[Dict[str, Any]]]
     tools: Optional[List[dict]] = None
     callbacks: Optional[List[Any]] = None
     available_functions: Optional[Dict[str, Any]] = None
diff --git a/tests/test_multimodal_validation.py b/tests/test_multimodal_validation.py
new file mode 100644
index 000000000..3b0817bf2
--- /dev/null
+++ b/tests/test_multimodal_validation.py
@@ -0,0 +1,46 @@
+import os
+
+import pytest
+
+from crewai import LLM, Agent, Crew, Task
+
+
+@pytest.mark.skip(reason="Only run manually with valid API keys")
+def test_multimodal_agent_with_image_url():
+    """
+    Test that a multimodal agent can process images without validation errors.
+    This test reproduces the scenario from issue #2475.
+    """
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+    if not OPENAI_API_KEY:
+        pytest.skip("OPENAI_API_KEY environment variable not set")
+
+    llm = LLM(
+        model="openai/gpt-4o",  # model with vision capabilities
+        api_key=OPENAI_API_KEY,
+        temperature=0.7
+    )
+
+    expert_analyst = Agent(
+        role="Visual Quality Inspector",
+        goal="Perform detailed quality analysis of product images",
+        backstory="Senior quality control expert with expertise in visual inspection",
+        llm=llm,
+        verbose=True,
+        allow_delegation=False,
+        multimodal=True
+    )
+
+    inspection_task = Task(
+        description="""
+        Analyze the product image at https://www.us.maguireshoes.com/collections/spring-25/products/lucena-black-boot with focus on:
+        1. Quality of materials
+        2. Manufacturing defects
+        3. Compliance with standards
+        Provide a detailed report highlighting any issues found.
+        """,
+        expected_output="A detailed report highlighting any issues found",
+        agent=expert_analyst
+    )
+
+    crew = Crew(agents=[expert_analyst], tasks=[inspection_task])

From e3dde17af0e082962a2cd8ac589cf92c438d4eb5 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 12:14:13 +0000
Subject: [PATCH 2/3] docs: improve LLMCallStartedEvent docstring to clarify
 multimodal support

Co-Authored-By: Joe Moura <joao@crewai.com>
---
 src/crewai/utilities/events/llm_events.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/crewai/utilities/events/llm_events.py b/src/crewai/utilities/events/llm_events.py
index 10a648e86..b92072340 100644
--- a/src/crewai/utilities/events/llm_events.py
+++ b/src/crewai/utilities/events/llm_events.py
@@ -12,7 +12,12 @@ class LLMCallType(Enum):
 
 
 class LLMCallStartedEvent(CrewEvent):
-    """Event emitted when a LLM call starts"""
+    """Event emitted when a LLM call starts
+    
+    Attributes:
+        messages: Content can be either a string or a list of dictionaries that support
+            multimodal content (text, images, etc.)
+    """
 
     type: str = "llm_call_started"
     messages: Union[str, List[Dict[str, Any]]]

From 3deeba4cab96727c8ba2e2405a08c466f5b2b3fe Mon Sep 17 00:00:00 2001
From: lucasgomide <lucaslg200@gmail.com>
Date: Wed, 26 Mar 2025 16:28:37 -0300
Subject: [PATCH 3/3] test: adding missing test to ensure multimodal content
 structures

---
 ...l_agent_describing_image_successfully.yaml | 378 ++++++++++++++++++
 tests/crew_test.py                            |  38 ++
 2 files changed, 416 insertions(+)
 create mode 100644 tests/cassettes/test_multimodal_agent_describing_image_successfully.yaml

diff --git a/tests/cassettes/test_multimodal_agent_describing_image_successfully.yaml b/tests/cassettes/test_multimodal_agent_describing_image_successfully.yaml
new file mode 100644
index 000000000..c9371c243
--- /dev/null
+++ b/tests/cassettes/test_multimodal_agent_describing_image_successfully.yaml
@@ -0,0 +1,378 @@
+interactions:
+- request:
+    body: !!binary |
+      CpIKCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkS6QkKEgoQY3Jld2FpLnRl
+      bGVtZXRyeRLBBwoQ08SlQ6w2FsCauTgZCqberRIITfOsgNi1qJkqDENyZXcgQ3JlYXRlZDABOdjG
+      6D/PcDAYQahPEkDPcDAYShsKDmNyZXdhaV92ZXJzaW9uEgkKBzAuMTA4LjBKGgoOcHl0aG9uX3Zl
+      cnNpb24SCAoGMy4xMi45Si4KCGNyZXdfa2V5EiIKIDkwNzMxMTU4MzVlMWNhZjJhNmUxNTIyZDA1
+      YTBiNTFkSjEKB2NyZXdfaWQSJgokMzdjOGM4NzgtN2NmZC00YjEyLWE4YzctYzIyZDZlOTIxODBk
+      ShwKDGNyZXdfcHJvY2VzcxIMCgpzZXF1ZW50aWFsShEKC2NyZXdfbWVtb3J5EgIQAEoaChRjcmV3
+      X251bWJlcl9vZl90YXNrcxICGAFKGwoVY3Jld19udW1iZXJfb2ZfYWdlbnRzEgIYAUrgAgoLY3Jl
+      d19hZ2VudHMS0AIKzQJbeyJrZXkiOiAiNzYyM2ZjNGY3ZDk0Y2YzZmRiZmNjMjlmYjBiMDIyYmIi
+      LCAiaWQiOiAiYmVjMjljMTAtOTljYi00MzQwLWIwYTItMWU1NTVkNGRmZGM0IiwgInJvbGUiOiAi
+      VmlzdWFsIFF1YWxpdHkgSW5zcGVjdG9yIiwgInZlcmJvc2U/IjogdHJ1ZSwgIm1heF9pdGVyIjog
+      MjUsICJtYXhfcnBtIjogbnVsbCwgImZ1bmN0aW9uX2NhbGxpbmdfbGxtIjogIiIsICJsbG0iOiAi
+      b3BlbmFpL2dwdC00byIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2Rl
+      X2V4ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6
+      IFtdfV1KjQIKCmNyZXdfdGFza3MS/gEK+wFbeyJrZXkiOiAiMDExM2E5ZTg0N2M2NjI2ZDY0ZDZk
+      Yzk4M2IwNDA5MTgiLCAiaWQiOiAiZWQzYmY1YWUtZTBjMS00MjIxLWFhYTgtMThlNjVkYTMyZjc1
+      IiwgImFzeW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdl
+      bnRfcm9sZSI6ICJWaXN1YWwgUXVhbGl0eSBJbnNwZWN0b3IiLCAiYWdlbnRfa2V5IjogIjc2MjNm
+      YzRmN2Q5NGNmM2ZkYmZjYzI5ZmIwYjAyMmJiIiwgInRvb2xzX25hbWVzIjogW119XXoCGAGFAQAB
+      AAASjgIKECo77ESam8oLrZMmgLLaoksSCLE6x14/Kb1vKgxUYXNrIENyZWF0ZWQwATlI/chAz3Aw
+      GEEAgMpAz3AwGEouCghjcmV3X2tleRIiCiA5MDczMTE1ODM1ZTFjYWYyYTZlMTUyMmQwNWEwYjUx
+      ZEoxCgdjcmV3X2lkEiYKJDM3YzhjODc4LTdjZmQtNGIxMi1hOGM3LWMyMmQ2ZTkyMTgwZEouCgh0
+      YXNrX2tleRIiCiAwMTEzYTllODQ3YzY2MjZkNjRkNmRjOTgzYjA0MDkxOEoxCgd0YXNrX2lkEiYK
+      JGVkM2JmNWFlLWUwYzEtNDIyMS1hYWE4LTE4ZTY1ZGEzMmY3NXoCGAGFAQABAAA=
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '1301'
+      Content-Type:
+      - application/x-protobuf
+      User-Agent:
+      - OTel-OTLP-Exporter-Python/1.31.1
+    method: POST
+    uri: https://telemetry.crewai.com:4319/v1/traces
+  response:
+    body:
+      string: "\n\0"
+    headers:
+      Content-Length:
+      - '2'
+      Content-Type:
+      - application/x-protobuf
+      Date:
+      - Wed, 26 Mar 2025 19:24:52 GMT
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Visual Quality Inspector.
+      Senior quality control expert with expertise in visual inspection\nYour personal
+      goal is: Perform detailed quality analysis of product images\nYou ONLY have
+      access to the following tools, and should NEVER make up tools that are not listed
+      here:\n\nTool Name: Add image to content\nTool Arguments: {''image_url'': {''description'':
+      ''The URL or path of the image to add'', ''type'': ''str''}, ''action'': {''description'':
+      ''Optional context or question about the image'', ''type'': ''Union[str, NoneType]''}}\nTool
+      Description: See image to understand its content, you can optionally ask a question
+      about the image\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought:
+      you should always think about what to do\nAction: the action to take, only one
+      name of [Add image to content], just the name, exactly as it''s written.\nAction
+      Input: the input to the action, just a simple JSON object, enclosed in curly
+      braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce
+      all necessary information is gathered, return the following format:\n\n```\nThought:
+      I now know the final answer\nFinal Answer: the final answer to the original
+      input question\n```"}, {"role": "user", "content": "\nCurrent Task: \n        Analyze
+      the product image at https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244
+      with focus on:\n        1. Quality of materials\n        2. Manufacturing defects\n        3.
+      Compliance with standards\n        Provide a detailed report highlighting any
+      issues found.\n        \n\nThis is the expected criteria for your final answer:
+      A detailed report highlighting any issues found\nyou MUST return the actual
+      complete content as the final answer, not a summary.\n\nBegin! This is VERY
+      important to you, use the tools available and give your best Final Answer, your
+      job depends on it!\n\nThought:"}], "model": "gpt-4o", "stop": ["\nObservation:"],
+      "temperature": 0.7}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '2033'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-BFQepLwSYYzdKLylSFsgcJeg6GTqS\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1743017091,\n  \"model\": \"gpt-4o-2024-08-06\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": \"Thought: I need to examine the product
+      image to assess the quality of materials, look for any manufacturing defects,
+      and check compliance with standards.\\n\\nAction: Add image to content\\nAction
+      Input: {\\\"image_url\\\": \\\"https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244\\\",
+      \\\"action\\\": \\\"Analyze the quality of materials, manufacturing defects,
+      and compliance with standards.\\\"}\",\n        \"refusal\": null,\n        \"annotations\":
+      []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+      \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 413,\n    \"completion_tokens\":
+      101,\n    \"total_tokens\": 514,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
+      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+      \"default\",\n  \"system_fingerprint\": \"fp_7e8d90e604\"\n}\n"
+    headers:
+      CF-RAY:
+      - 926907d79dcff1e7-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 26 Mar 2025 19:24:53 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=WK433.4kW8cr9rwvOlk4EZ2SfRYK9lAPwXCBYEvLcmU-1743017093-1.0.1.1-kVZyUew5rUbMk.2koGJF_rmX.fTseqN241n2M40n8KvBGoKgy6KM6xBmvFbIVWxUs2Y5ZAz8mWy9CrGjaNKSfCzxmv4.pq78z_DGHr37PgI;
+        path=/; expires=Wed, 26-Mar-25 19:54:53 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=T77PMcuNYeyzK0tQyDOe7EScjVBVzW_7DpD3YQBqmUc-1743017093675-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '1729'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '50000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '49999'
+      x-ratelimit-remaining-tokens:
+      - '149999534'
+      x-ratelimit-reset-requests:
+      - 1ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_2399c3355adf16734907c73611a7d330
+    http_version: HTTP/1.1
+    status_code: 200
+- request:
+    body: !!binary |
+      CtgBCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkSrwEKEgoQY3Jld2FpLnRl
+      bGVtZXRyeRKYAQoQp2ACB2xRGve4HGtU2RdWCBIIlQcsbhK22ykqClRvb2wgVXNhZ2UwATlACEXG
+      z3AwGEHAjGPGz3AwGEobCg5jcmV3YWlfdmVyc2lvbhIJCgcwLjEwOC4wSiMKCXRvb2xfbmFtZRIW
+      ChRBZGQgaW1hZ2UgdG8gY29udGVudEoOCghhdHRlbXB0cxICGAF6AhgBhQEAAQAA
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '219'
+      Content-Type:
+      - application/x-protobuf
+      User-Agent:
+      - OTel-OTLP-Exporter-Python/1.31.1
+    method: POST
+    uri: https://telemetry.crewai.com:4319/v1/traces
+  response:
+    body:
+      string: "\n\0"
+    headers:
+      Content-Length:
+      - '2'
+      Content-Type:
+      - application/x-protobuf
+      Date:
+      - Wed, 26 Mar 2025 19:24:57 GMT
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Visual Quality Inspector.
+      Senior quality control expert with expertise in visual inspection\nYour personal
+      goal is: Perform detailed quality analysis of product images\nYou ONLY have
+      access to the following tools, and should NEVER make up tools that are not listed
+      here:\n\nTool Name: Add image to content\nTool Arguments: {''image_url'': {''description'':
+      ''The URL or path of the image to add'', ''type'': ''str''}, ''action'': {''description'':
+      ''Optional context or question about the image'', ''type'': ''Union[str, NoneType]''}}\nTool
+      Description: See image to understand its content, you can optionally ask a question
+      about the image\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought:
+      you should always think about what to do\nAction: the action to take, only one
+      name of [Add image to content], just the name, exactly as it''s written.\nAction
+      Input: the input to the action, just a simple JSON object, enclosed in curly
+      braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce
+      all necessary information is gathered, return the following format:\n\n```\nThought:
+      I now know the final answer\nFinal Answer: the final answer to the original
+      input question\n```"}, {"role": "user", "content": "\nCurrent Task: \n        Analyze
+      the product image at https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244
+      with focus on:\n        1. Quality of materials\n        2. Manufacturing defects\n        3.
+      Compliance with standards\n        Provide a detailed report highlighting any
+      issues found.\n        \n\nThis is the expected criteria for your final answer:
+      A detailed report highlighting any issues found\nyou MUST return the actual
+      complete content as the final answer, not a summary.\n\nBegin! This is VERY
+      important to you, use the tools available and give your best Final Answer, your
+      job depends on it!\n\nThought:"}, {"role": "user", "content": [{"type": "text",
+      "text": "Analyze the quality of materials, manufacturing defects, and compliance
+      with standards."}, {"type": "image_url", "image_url": {"url": "https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244"}}]},
+      {"role": "assistant", "content": "Thought: I need to examine the product image
+      to assess the quality of materials, look for any manufacturing defects, and
+      check compliance with standards.\n\nAction: Add image to content\nAction Input:
+      {\"image_url\": \"https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244\",
+      \"action\": \"Analyze the quality of materials, manufacturing defects, and compliance
+      with standards.\"}"}], "model": "gpt-4o", "stop": ["\nObservation:"], "temperature":
+      0.7}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '2797'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=WK433.4kW8cr9rwvOlk4EZ2SfRYK9lAPwXCBYEvLcmU-1743017093-1.0.1.1-kVZyUew5rUbMk.2koGJF_rmX.fTseqN241n2M40n8KvBGoKgy6KM6xBmvFbIVWxUs2Y5ZAz8mWy9CrGjaNKSfCzxmv4.pq78z_DGHr37PgI;
+        _cfuvid=T77PMcuNYeyzK0tQyDOe7EScjVBVzW_7DpD3YQBqmUc-1743017093675-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-BFQetNNvmPgPxhzaKiHYsPqm8aN0i\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1743017095,\n  \"model\": \"gpt-4o-2024-08-06\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": \"Observation: The image displays a black
+      leather boot with a pointed toe and a low heel. \\n\\nQuality of Materials:\\n1.
+      The leather appears to be of good quality, displaying a consistent texture and
+      finish, which suggests durability.\\n2. The material has a slight sheen, indicating
+      a possible finishing treatment that enhances the appearance and may offer some
+      protection.\\n\\nManufacturing Defects:\\n1. There are no visible stitching
+      errors; the seams appear straight and clean.\\n2. No apparent glue marks or
+      uneven edges, which indicates good craftsmanship.\\n3. There is a slight distressed
+      effect, but it appears intentional as part of the design rather than a defect.\\n\\nCompliance
+      with Standards:\\n1. The shoe design seems to comply with typical fashion standards,
+      showing a balance of aesthetics and functionality.\\n2. The heel height and
+      shape appear to provide stability, aligning with safety standards for footwear.\\n\\nFinal
+      Answer: The analysis of the product image reveals that the black leather boot
+      is made of high-quality materials with no visible manufacturing defects. The
+      craftsmanship is precise, with clean seams and a well-executed design. The distressed
+      effect appears intentional and part of the aesthetic. The boot seems to comply
+      with fashion and safety standards, offering both style and functionality. No
+      significant issues were found.\",\n        \"refusal\": null,\n        \"annotations\":
+      []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+      \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 1300,\n    \"completion_tokens\":
+      250,\n    \"total_tokens\": 1550,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
+      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+      \"default\",\n  \"system_fingerprint\": \"fp_3a5b33c01a\"\n}\n"
+    headers:
+      CF-RAY:
+      - 926907e45f33f1e7-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 26 Mar 2025 19:25:01 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '7242'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-input-images:
+      - '250000'
+      x-ratelimit-limit-requests:
+      - '50000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-input-images:
+      - '249999'
+      x-ratelimit-remaining-requests:
+      - '49999'
+      x-ratelimit-remaining-tokens:
+      - '149998641'
+      x-ratelimit-reset-input-images:
+      - 0s
+      x-ratelimit-reset-requests:
+      - 1ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_c5dd144c8ac1bb3bd96ffbba40707b2d
+    http_version: HTTP/1.1
+    status_code: 200
+version: 1
diff --git a/tests/crew_test.py b/tests/crew_test.py
index 39a3e9a08..e5f464131 100644
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -3731,6 +3731,44 @@ def test_multimodal_agent_image_tool_handling():
         assert result["content"][1]["type"] == "image_url"
 
 
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_multimodal_agent_describing_image_successfully():
+    """
+    Test that a multimodal agent can process images without validation errors.
+    This test reproduces the scenario from issue #2475.
+    """
+    llm = LLM(model="openai/gpt-4o", temperature=0.7)  # model with vision capabilities
+
+    expert_analyst = Agent(
+        role="Visual Quality Inspector",
+        goal="Perform detailed quality analysis of product images",
+        backstory="Senior quality control expert with expertise in visual inspection",
+        llm=llm,
+        verbose=True,
+        allow_delegation=False,
+        multimodal=True,
+    )
+
+    inspection_task = Task(
+        description="""
+        Analyze the product image at https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244 with focus on:
+        1. Quality of materials
+        2. Manufacturing defects
+        3. Compliance with standards
+        Provide a detailed report highlighting any issues found.
+        """,
+        expected_output="A detailed report highlighting any issues found",
+        agent=expert_analyst,
+    )
+
+    crew = Crew(agents=[expert_analyst], tasks=[inspection_task])
+    result = crew.kickoff()
+
+    task_output = result.tasks_output[0]
+    assert isinstance(task_output, TaskOutput)
+    assert task_output.raw == result.raw
+
+
 @pytest.mark.vcr(filter_headers=["authorization"])
 def test_multimodal_agent_live_image_analysis():
     """