Merge pull request #82 from crewAIInc/feat/dall-e-tool

feat: Add Dall-E tool to generate images
2026-01-10 16:48:30 +00:00 · 2024-07-30 18:37:34 -07:00
parent 88becbd6e1 0070df7451
commit d28dba453e
2 changed files with 89 additions and 0 deletions
--- a/src/crewai_tools/tools/dalle_tool/README.MD
+++ b/src/crewai_tools/tools/dalle_tool/README.MD
@@ -0,0 +1,41 @@
+# DALL-E Tool
+
+## Description
+This tool is used to give the Agent the ability to generate images using the DALL-E model. It is a transformer-based model that generates images from textual descriptions. This tool allows the Agent to generate images based on the text input provided by the user.
+
+## Installation
+Install the crewai_tools package
+```shell
+pip install 'crewai[tools]'
+```
+
+## Example
+
+Remember that when using this tool, the text must be generated by the Agent itself. The text must be a description of the image you want to generate.
+
+```python
+from crewai_tools import DallETool
+
+Agent(
+    ...
+    tools=[DallETool()],
+)
+```
+
+If needed you can also tweak the parameters of the DALL-E model by passing them as arguments to the `DallETool` class. For example:
+
+```python
+from crewai_tools import DallETool
+
+dalle_tool = DallETool(model: str = "dall-e-3",
+                       size: str = "1024x1024",
+                       quality: str = "standard",
+                       n: int = 1)
+
+Agent(
+    ...
+    tools=[dalle_tool]
+)
+```
+
+The parameter are based on the `client.images.generate` method from the OpenAI API. For more information on the parameters, please refer to the [OpenAI API documentation](https://platform.openai.com/docs/guides/images/introduction?lang=python).
--- a/src/crewai_tools/tools/dalle_tool/dalle_tool.py
+++ b/src/crewai_tools/tools/dalle_tool/dalle_tool.py
@@ -0,0 +1,48 @@
+import json
+from typing import Type
+
+from crewai_tools.tools.base_tool import BaseTool
+from openai import OpenAI
+from pydantic.v1 import BaseModel
+
+
+class ImagePromptSchema(BaseModel):
+    """Input for Dall-E Tool."""
+
+    image_description: str = "Description of the image to be generated by Dall-E."
+
+
+class DallETool(BaseTool):
+    name: str = "Dall-E Tool"
+    description: str = "Generates images using OpenAI's Dall-E model."
+    args_schema: Type[BaseModel] = ImagePromptSchema
+
+    model: str = "dall-e-3"
+    size: str = "1024x1024"
+    quality: str = "standard"
+    n: int = 1
+
+    def _run(self, **kwargs) -> str:
+        client = OpenAI()
+
+        image_description = kwargs.get("image_description")
+
+        if not image_description:
+            return "Image description is required."
+
+        response = client.images.generate(
+            model=self.model,
+            prompt=image_description,
+            size=self.size,
+            quality=self.quality,
+            n=self.n,
+        )
+
+        image_data = json.dumps(
+            {
+                "image_url": response.data[0].url,
+                "image_description": response.data[0].revised_prompt,
+            }
+        )
+
+        return image_data