From 49b8cc95ae6a76af37f39e9b85c1f85d4a3e222b Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 12:11:06 +0000 Subject: [PATCH 1/3] fix: update LLMCallStartedEvent message type to support multimodal content (#2475) fix: sort imports in test file to fix linting fix: properly sort imports with ruff Co-Authored-By: Joe Moura --- src/crewai/utilities/events/llm_events.py | 2 +- tests/test_multimodal_validation.py | 46 +++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 tests/test_multimodal_validation.py diff --git a/src/crewai/utilities/events/llm_events.py b/src/crewai/utilities/events/llm_events.py index 988b6f945..10a648e86 100644 --- a/src/crewai/utilities/events/llm_events.py +++ b/src/crewai/utilities/events/llm_events.py @@ -15,7 +15,7 @@ class LLMCallStartedEvent(CrewEvent): """Event emitted when a LLM call starts""" type: str = "llm_call_started" - messages: Union[str, List[Dict[str, str]]] + messages: Union[str, List[Dict[str, Any]]] tools: Optional[List[dict]] = None callbacks: Optional[List[Any]] = None available_functions: Optional[Dict[str, Any]] = None diff --git a/tests/test_multimodal_validation.py b/tests/test_multimodal_validation.py new file mode 100644 index 000000000..3b0817bf2 --- /dev/null +++ b/tests/test_multimodal_validation.py @@ -0,0 +1,46 @@ +import os + +import pytest + +from crewai import LLM, Agent, Crew, Task + + +@pytest.mark.skip(reason="Only run manually with valid API keys") +def test_multimodal_agent_with_image_url(): + """ + Test that a multimodal agent can process images without validation errors. + This test reproduces the scenario from issue #2475. + """ + OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") + if not OPENAI_API_KEY: + pytest.skip("OPENAI_API_KEY environment variable not set") + + llm = LLM( + model="openai/gpt-4o", # model with vision capabilities + api_key=OPENAI_API_KEY, + temperature=0.7 + ) + + expert_analyst = Agent( + role="Visual Quality Inspector", + goal="Perform detailed quality analysis of product images", + backstory="Senior quality control expert with expertise in visual inspection", + llm=llm, + verbose=True, + allow_delegation=False, + multimodal=True + ) + + inspection_task = Task( + description=""" + Analyze the product image at https://www.us.maguireshoes.com/collections/spring-25/products/lucena-black-boot with focus on: + 1. Quality of materials + 2. Manufacturing defects + 3. Compliance with standards + Provide a detailed report highlighting any issues found. + """, + expected_output="A detailed report highlighting any issues found", + agent=expert_analyst + ) + + crew = Crew(agents=[expert_analyst], tasks=[inspection_task]) From e3dde17af0e082962a2cd8ac589cf92c438d4eb5 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 12:14:13 +0000 Subject: [PATCH 2/3] docs: improve LLMCallStartedEvent docstring to clarify multimodal support Co-Authored-By: Joe Moura --- src/crewai/utilities/events/llm_events.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/crewai/utilities/events/llm_events.py b/src/crewai/utilities/events/llm_events.py index 10a648e86..b92072340 100644 --- a/src/crewai/utilities/events/llm_events.py +++ b/src/crewai/utilities/events/llm_events.py @@ -12,7 +12,12 @@ class LLMCallType(Enum): class LLMCallStartedEvent(CrewEvent): - """Event emitted when a LLM call starts""" + """Event emitted when a LLM call starts + + Attributes: + messages: Content can be either a string or a list of dictionaries that support + multimodal content (text, images, etc.) + """ type: str = "llm_call_started" messages: Union[str, List[Dict[str, Any]]] From 3deeba4cab96727c8ba2e2405a08c466f5b2b3fe Mon Sep 17 00:00:00 2001 From: lucasgomide Date: Wed, 26 Mar 2025 16:28:37 -0300 Subject: [PATCH 3/3] test: adding missing test to ensure multimodal content structures --- ...l_agent_describing_image_successfully.yaml | 378 ++++++++++++++++++ tests/crew_test.py | 38 ++ 2 files changed, 416 insertions(+) create mode 100644 tests/cassettes/test_multimodal_agent_describing_image_successfully.yaml diff --git a/tests/cassettes/test_multimodal_agent_describing_image_successfully.yaml b/tests/cassettes/test_multimodal_agent_describing_image_successfully.yaml new file mode 100644 index 000000000..c9371c243 --- /dev/null +++ b/tests/cassettes/test_multimodal_agent_describing_image_successfully.yaml @@ -0,0 +1,378 @@ +interactions: +- request: + body: !!binary | + CpIKCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkS6QkKEgoQY3Jld2FpLnRl + bGVtZXRyeRLBBwoQ08SlQ6w2FsCauTgZCqberRIITfOsgNi1qJkqDENyZXcgQ3JlYXRlZDABOdjG + 6D/PcDAYQahPEkDPcDAYShsKDmNyZXdhaV92ZXJzaW9uEgkKBzAuMTA4LjBKGgoOcHl0aG9uX3Zl + cnNpb24SCAoGMy4xMi45Si4KCGNyZXdfa2V5EiIKIDkwNzMxMTU4MzVlMWNhZjJhNmUxNTIyZDA1 + YTBiNTFkSjEKB2NyZXdfaWQSJgokMzdjOGM4NzgtN2NmZC00YjEyLWE4YzctYzIyZDZlOTIxODBk + ShwKDGNyZXdfcHJvY2VzcxIMCgpzZXF1ZW50aWFsShEKC2NyZXdfbWVtb3J5EgIQAEoaChRjcmV3 + X251bWJlcl9vZl90YXNrcxICGAFKGwoVY3Jld19udW1iZXJfb2ZfYWdlbnRzEgIYAUrgAgoLY3Jl + d19hZ2VudHMS0AIKzQJbeyJrZXkiOiAiNzYyM2ZjNGY3ZDk0Y2YzZmRiZmNjMjlmYjBiMDIyYmIi + LCAiaWQiOiAiYmVjMjljMTAtOTljYi00MzQwLWIwYTItMWU1NTVkNGRmZGM0IiwgInJvbGUiOiAi + VmlzdWFsIFF1YWxpdHkgSW5zcGVjdG9yIiwgInZlcmJvc2U/IjogdHJ1ZSwgIm1heF9pdGVyIjog + MjUsICJtYXhfcnBtIjogbnVsbCwgImZ1bmN0aW9uX2NhbGxpbmdfbGxtIjogIiIsICJsbG0iOiAi + b3BlbmFpL2dwdC00byIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2Rl + X2V4ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6 + IFtdfV1KjQIKCmNyZXdfdGFza3MS/gEK+wFbeyJrZXkiOiAiMDExM2E5ZTg0N2M2NjI2ZDY0ZDZk + Yzk4M2IwNDA5MTgiLCAiaWQiOiAiZWQzYmY1YWUtZTBjMS00MjIxLWFhYTgtMThlNjVkYTMyZjc1 + IiwgImFzeW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdl + bnRfcm9sZSI6ICJWaXN1YWwgUXVhbGl0eSBJbnNwZWN0b3IiLCAiYWdlbnRfa2V5IjogIjc2MjNm + YzRmN2Q5NGNmM2ZkYmZjYzI5ZmIwYjAyMmJiIiwgInRvb2xzX25hbWVzIjogW119XXoCGAGFAQAB + AAASjgIKECo77ESam8oLrZMmgLLaoksSCLE6x14/Kb1vKgxUYXNrIENyZWF0ZWQwATlI/chAz3Aw + GEEAgMpAz3AwGEouCghjcmV3X2tleRIiCiA5MDczMTE1ODM1ZTFjYWYyYTZlMTUyMmQwNWEwYjUx + ZEoxCgdjcmV3X2lkEiYKJDM3YzhjODc4LTdjZmQtNGIxMi1hOGM3LWMyMmQ2ZTkyMTgwZEouCgh0 + YXNrX2tleRIiCiAwMTEzYTllODQ3YzY2MjZkNjRkNmRjOTgzYjA0MDkxOEoxCgd0YXNrX2lkEiYK + JGVkM2JmNWFlLWUwYzEtNDIyMS1hYWE4LTE4ZTY1ZGEzMmY3NXoCGAGFAQABAAA= + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '1301' + Content-Type: + - application/x-protobuf + User-Agent: + - OTel-OTLP-Exporter-Python/1.31.1 + method: POST + uri: https://telemetry.crewai.com:4319/v1/traces + response: + body: + string: "\n\0" + headers: + Content-Length: + - '2' + Content-Type: + - application/x-protobuf + Date: + - Wed, 26 Mar 2025 19:24:52 GMT + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Visual Quality Inspector. + Senior quality control expert with expertise in visual inspection\nYour personal + goal is: Perform detailed quality analysis of product images\nYou ONLY have + access to the following tools, and should NEVER make up tools that are not listed + here:\n\nTool Name: Add image to content\nTool Arguments: {''image_url'': {''description'': + ''The URL or path of the image to add'', ''type'': ''str''}, ''action'': {''description'': + ''Optional context or question about the image'', ''type'': ''Union[str, NoneType]''}}\nTool + Description: See image to understand its content, you can optionally ask a question + about the image\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: + you should always think about what to do\nAction: the action to take, only one + name of [Add image to content], just the name, exactly as it''s written.\nAction + Input: the input to the action, just a simple JSON object, enclosed in curly + braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce + all necessary information is gathered, return the following format:\n\n```\nThought: + I now know the final answer\nFinal Answer: the final answer to the original + input question\n```"}, {"role": "user", "content": "\nCurrent Task: \n Analyze + the product image at https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244 + with focus on:\n 1. Quality of materials\n 2. Manufacturing defects\n 3. + Compliance with standards\n Provide a detailed report highlighting any + issues found.\n \n\nThis is the expected criteria for your final answer: + A detailed report highlighting any issues found\nyou MUST return the actual + complete content as the final answer, not a summary.\n\nBegin! This is VERY + important to you, use the tools available and give your best Final Answer, your + job depends on it!\n\nThought:"}], "model": "gpt-4o", "stop": ["\nObservation:"], + "temperature": 0.7}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '2033' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.9 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-BFQepLwSYYzdKLylSFsgcJeg6GTqS\",\n \"object\": + \"chat.completion\",\n \"created\": 1743017091,\n \"model\": \"gpt-4o-2024-08-06\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Thought: I need to examine the product + image to assess the quality of materials, look for any manufacturing defects, + and check compliance with standards.\\n\\nAction: Add image to content\\nAction + Input: {\\\"image_url\\\": \\\"https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244\\\", + \\\"action\\\": \\\"Analyze the quality of materials, manufacturing defects, + and compliance with standards.\\\"}\",\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 413,\n \"completion_tokens\": + 101,\n \"total_tokens\": 514,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n + \ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_7e8d90e604\"\n}\n" + headers: + CF-RAY: + - 926907d79dcff1e7-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 26 Mar 2025 19:24:53 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=WK433.4kW8cr9rwvOlk4EZ2SfRYK9lAPwXCBYEvLcmU-1743017093-1.0.1.1-kVZyUew5rUbMk.2koGJF_rmX.fTseqN241n2M40n8KvBGoKgy6KM6xBmvFbIVWxUs2Y5ZAz8mWy9CrGjaNKSfCzxmv4.pq78z_DGHr37PgI; + path=/; expires=Wed, 26-Mar-25 19:54:53 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=T77PMcuNYeyzK0tQyDOe7EScjVBVzW_7DpD3YQBqmUc-1743017093675-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '1729' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '50000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '49999' + x-ratelimit-remaining-tokens: + - '149999534' + x-ratelimit-reset-requests: + - 1ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2399c3355adf16734907c73611a7d330 + http_version: HTTP/1.1 + status_code: 200 +- request: + body: !!binary | + CtgBCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkSrwEKEgoQY3Jld2FpLnRl + bGVtZXRyeRKYAQoQp2ACB2xRGve4HGtU2RdWCBIIlQcsbhK22ykqClRvb2wgVXNhZ2UwATlACEXG + z3AwGEHAjGPGz3AwGEobCg5jcmV3YWlfdmVyc2lvbhIJCgcwLjEwOC4wSiMKCXRvb2xfbmFtZRIW + ChRBZGQgaW1hZ2UgdG8gY29udGVudEoOCghhdHRlbXB0cxICGAF6AhgBhQEAAQAA + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '219' + Content-Type: + - application/x-protobuf + User-Agent: + - OTel-OTLP-Exporter-Python/1.31.1 + method: POST + uri: https://telemetry.crewai.com:4319/v1/traces + response: + body: + string: "\n\0" + headers: + Content-Length: + - '2' + Content-Type: + - application/x-protobuf + Date: + - Wed, 26 Mar 2025 19:24:57 GMT + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Visual Quality Inspector. + Senior quality control expert with expertise in visual inspection\nYour personal + goal is: Perform detailed quality analysis of product images\nYou ONLY have + access to the following tools, and should NEVER make up tools that are not listed + here:\n\nTool Name: Add image to content\nTool Arguments: {''image_url'': {''description'': + ''The URL or path of the image to add'', ''type'': ''str''}, ''action'': {''description'': + ''Optional context or question about the image'', ''type'': ''Union[str, NoneType]''}}\nTool + Description: See image to understand its content, you can optionally ask a question + about the image\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: + you should always think about what to do\nAction: the action to take, only one + name of [Add image to content], just the name, exactly as it''s written.\nAction + Input: the input to the action, just a simple JSON object, enclosed in curly + braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce + all necessary information is gathered, return the following format:\n\n```\nThought: + I now know the final answer\nFinal Answer: the final answer to the original + input question\n```"}, {"role": "user", "content": "\nCurrent Task: \n Analyze + the product image at https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244 + with focus on:\n 1. Quality of materials\n 2. Manufacturing defects\n 3. + Compliance with standards\n Provide a detailed report highlighting any + issues found.\n \n\nThis is the expected criteria for your final answer: + A detailed report highlighting any issues found\nyou MUST return the actual + complete content as the final answer, not a summary.\n\nBegin! This is VERY + important to you, use the tools available and give your best Final Answer, your + job depends on it!\n\nThought:"}, {"role": "user", "content": [{"type": "text", + "text": "Analyze the quality of materials, manufacturing defects, and compliance + with standards."}, {"type": "image_url", "image_url": {"url": "https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244"}}]}, + {"role": "assistant", "content": "Thought: I need to examine the product image + to assess the quality of materials, look for any manufacturing defects, and + check compliance with standards.\n\nAction: Add image to content\nAction Input: + {\"image_url\": \"https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244\", + \"action\": \"Analyze the quality of materials, manufacturing defects, and compliance + with standards.\"}"}], "model": "gpt-4o", "stop": ["\nObservation:"], "temperature": + 0.7}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '2797' + content-type: + - application/json + cookie: + - __cf_bm=WK433.4kW8cr9rwvOlk4EZ2SfRYK9lAPwXCBYEvLcmU-1743017093-1.0.1.1-kVZyUew5rUbMk.2koGJF_rmX.fTseqN241n2M40n8KvBGoKgy6KM6xBmvFbIVWxUs2Y5ZAz8mWy9CrGjaNKSfCzxmv4.pq78z_DGHr37PgI; + _cfuvid=T77PMcuNYeyzK0tQyDOe7EScjVBVzW_7DpD3YQBqmUc-1743017093675-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.9 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-BFQetNNvmPgPxhzaKiHYsPqm8aN0i\",\n \"object\": + \"chat.completion\",\n \"created\": 1743017095,\n \"model\": \"gpt-4o-2024-08-06\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Observation: The image displays a black + leather boot with a pointed toe and a low heel. \\n\\nQuality of Materials:\\n1. + The leather appears to be of good quality, displaying a consistent texture and + finish, which suggests durability.\\n2. The material has a slight sheen, indicating + a possible finishing treatment that enhances the appearance and may offer some + protection.\\n\\nManufacturing Defects:\\n1. There are no visible stitching + errors; the seams appear straight and clean.\\n2. No apparent glue marks or + uneven edges, which indicates good craftsmanship.\\n3. There is a slight distressed + effect, but it appears intentional as part of the design rather than a defect.\\n\\nCompliance + with Standards:\\n1. The shoe design seems to comply with typical fashion standards, + showing a balance of aesthetics and functionality.\\n2. The heel height and + shape appear to provide stability, aligning with safety standards for footwear.\\n\\nFinal + Answer: The analysis of the product image reveals that the black leather boot + is made of high-quality materials with no visible manufacturing defects. The + craftsmanship is precise, with clean seams and a well-executed design. The distressed + effect appears intentional and part of the aesthetic. The boot seems to comply + with fashion and safety standards, offering both style and functionality. No + significant issues were found.\",\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 1300,\n \"completion_tokens\": + 250,\n \"total_tokens\": 1550,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n + \ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_3a5b33c01a\"\n}\n" + headers: + CF-RAY: + - 926907e45f33f1e7-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 26 Mar 2025 19:25:01 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '7242' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-input-images: + - '250000' + x-ratelimit-limit-requests: + - '50000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-input-images: + - '249999' + x-ratelimit-remaining-requests: + - '49999' + x-ratelimit-remaining-tokens: + - '149998641' + x-ratelimit-reset-input-images: + - 0s + x-ratelimit-reset-requests: + - 1ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_c5dd144c8ac1bb3bd96ffbba40707b2d + http_version: HTTP/1.1 + status_code: 200 +version: 1 diff --git a/tests/crew_test.py b/tests/crew_test.py index 39a3e9a08..e5f464131 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -3731,6 +3731,44 @@ def test_multimodal_agent_image_tool_handling(): assert result["content"][1]["type"] == "image_url" +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_multimodal_agent_describing_image_successfully(): + """ + Test that a multimodal agent can process images without validation errors. + This test reproduces the scenario from issue #2475. + """ + llm = LLM(model="openai/gpt-4o", temperature=0.7) # model with vision capabilities + + expert_analyst = Agent( + role="Visual Quality Inspector", + goal="Perform detailed quality analysis of product images", + backstory="Senior quality control expert with expertise in visual inspection", + llm=llm, + verbose=True, + allow_delegation=False, + multimodal=True, + ) + + inspection_task = Task( + description=""" + Analyze the product image at https://www.us.maguireshoes.com/cdn/shop/files/FW24-Edito-Lucena-Distressed-01_1920x.jpg?v=1736371244 with focus on: + 1. Quality of materials + 2. Manufacturing defects + 3. Compliance with standards + Provide a detailed report highlighting any issues found. + """, + expected_output="A detailed report highlighting any issues found", + agent=expert_analyst, + ) + + crew = Crew(agents=[expert_analyst], tasks=[inspection_task]) + result = crew.kickoff() + + task_output = result.tasks_output[0] + assert isinstance(task_output, TaskOutput) + assert task_output.raw == result.raw + + @pytest.mark.vcr(filter_headers=["authorization"]) def test_multimodal_agent_live_image_analysis(): """