Add Tavily Research and get Research (#5483)

* Add Tavily Research and get Research

- Added tavily research with docs to crew AI

- Added tavily get research with docs to crew AI

* Update `tavily-python` installation instructions and adjust version constraints

- Changed installation command from `pip install` to `uv add` for `tavily-python` in multiple documentation files.
- Updated version constraint for `tavily-python` in `pyproject.toml` from `>=0.7.14` to `~=0.7.14`.
- Modified the `exclude-newer` date in `uv.lock` to `2026-04-23T07:00:00Z`.

* Add Tavily Research Tool documentation in multiple languages

- Introduced `TavilyResearchTool` documentation in English, Arabic, Korean, and Portuguese.
- Updated `docs.json` to include paths for the new documentation files.
- The `TavilyResearchTool` allows CrewAI agents to perform multi-step research tasks and generate cited reports using the Tavily Research API.

* Fix Tavily research CI failures

---------

Co-authored-by: lorenzejay <lorenzejaytech@gmail.com>
Co-authored-by: Evan Rimer <evan.rimer@tavily.com>
Co-authored-by: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com>
This commit is contained in:
Mani
2026-04-27 16:51:56 -04:00
committed by GitHub
parent 1337e6de34
commit 07364cf46f
26 changed files with 1296 additions and 13 deletions

View File

@@ -69,7 +69,7 @@ linkup-sdk = [
"linkup-sdk>=0.2.2",
]
tavily-python = [
"tavily-python>=0.5.4",
"tavily-python~=0.7.14",
]
hyperbrowser = [
"hyperbrowser>=0.18.0",

View File

@@ -197,6 +197,12 @@ from crewai_tools.tools.stagehand_tool.stagehand_tool import StagehandTool
from crewai_tools.tools.tavily_extractor_tool.tavily_extractor_tool import (
TavilyExtractorTool,
)
from crewai_tools.tools.tavily_get_research_tool.tavily_get_research_tool import (
TavilyGetResearchTool,
)
from crewai_tools.tools.tavily_research_tool.tavily_research_tool import (
TavilyResearchTool,
)
from crewai_tools.tools.tavily_search_tool.tavily_search_tool import TavilySearchTool
from crewai_tools.tools.txt_search_tool.txt_search_tool import TXTSearchTool
from crewai_tools.tools.vision_tool.vision_tool import VisionTool
@@ -310,6 +316,8 @@ __all__ = [
"StagehandTool",
"TXTSearchTool",
"TavilyExtractorTool",
"TavilyGetResearchTool",
"TavilyResearchTool",
"TavilySearchTool",
"VisionTool",
"WeaviateVectorSearchTool",

View File

@@ -184,6 +184,12 @@ from crewai_tools.tools.stagehand_tool.stagehand_tool import StagehandTool
from crewai_tools.tools.tavily_extractor_tool.tavily_extractor_tool import (
TavilyExtractorTool,
)
from crewai_tools.tools.tavily_get_research_tool.tavily_get_research_tool import (
TavilyGetResearchTool,
)
from crewai_tools.tools.tavily_research_tool.tavily_research_tool import (
TavilyResearchTool,
)
from crewai_tools.tools.tavily_search_tool.tavily_search_tool import TavilySearchTool
from crewai_tools.tools.txt_search_tool.txt_search_tool import TXTSearchTool
from crewai_tools.tools.vision_tool.vision_tool import VisionTool
@@ -293,6 +299,8 @@ __all__ = [
"StagehandTool",
"TXTSearchTool",
"TavilyExtractorTool",
"TavilyGetResearchTool",
"TavilyResearchTool",
"TavilySearchTool",
"VisionTool",
"WeaviateVectorSearchTool",

View File

@@ -9,7 +9,7 @@ The `TavilyExtractorTool` allows CrewAI agents to extract structured content fro
To use the `TavilyExtractorTool`, you need to install the `tavily-python` library:
```shell
pip install 'crewai[tools]' tavily-python
uv add 'crewai[tools]' tavily-python
```
You also need to set your Tavily API key as an environment variable:

View File

@@ -0,0 +1,44 @@
# Tavily Get Research Tool
## Description
The `TavilyGetResearchTool` provides an interface to Tavily's research status endpoint through the Tavily Python SDK. It retrieves the current status and results of an existing Tavily research task by `request_id`.
## Installation
To use the `TavilyGetResearchTool`, you need to install the `tavily-python` library:
```shell
uv add 'crewai[tools]' tavily-python
```
## Environment Variables
Ensure your Tavily API key is set as an environment variable:
```bash
export TAVILY_API_KEY='your_tavily_api_key'
```
## Example
```python
from crewai_tools import TavilyGetResearchTool
tavily_get_research_tool = TavilyGetResearchTool()
status_result = tavily_get_research_tool.run(
request_id="Your Request ID Here"
)
print(status_result)
```
## Arguments
The `TavilyGetResearchTool` accepts the following arguments during initialization or when calling the `run` method:
- `request_id` (str): Existing Tavily research request ID to retrieve.
## Response Format
The tool returns a JSON string containing the current research task status and any available results from Tavily.

View File

@@ -0,0 +1,120 @@
from __future__ import annotations
import json
import os
from typing import Any
from crewai.tools import BaseTool, EnvVar
from dotenv import load_dotenv
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
load_dotenv()
try:
from tavily import AsyncTavilyClient, TavilyClient # type: ignore[import-untyped]
TAVILY_AVAILABLE = True
except ImportError:
TAVILY_AVAILABLE = False
class TavilyGetResearchToolSchema(BaseModel):
"""Input schema for TavilyGetResearchTool."""
request_id: str = Field(
...,
description="Existing Tavily research request ID to fetch status and results for.",
)
class TavilyGetResearchTool(BaseTool):
"""Tool that uses the Tavily Research status endpoint to retrieve results."""
model_config = ConfigDict(arbitrary_types_allowed=True)
_client: Any | None = PrivateAttr(default=None)
_async_client: Any | None = PrivateAttr(default=None)
name: str = "Tavily Get Research"
description: str = (
"A tool that retrieves the status and results of an existing Tavily "
"research task by request ID. It returns Tavily responses as JSON."
)
args_schema: type[BaseModel] = TavilyGetResearchToolSchema
package_dependencies: list[str] = Field(default_factory=lambda: ["tavily-python"])
env_vars: list[EnvVar] = Field(
default_factory=lambda: [
EnvVar(
name="TAVILY_API_KEY",
description="API key for Tavily research service",
required=True,
),
]
)
def __init__(self, **kwargs: Any):
super().__init__(**kwargs)
if TAVILY_AVAILABLE:
api_key = os.getenv("TAVILY_API_KEY")
self._client = TavilyClient(api_key=api_key)
self._async_client = AsyncTavilyClient(api_key=api_key)
else:
try:
import subprocess
import click
except ImportError as e:
raise ImportError(
"The 'tavily-python' package is required. 'click' and "
"'subprocess' are also needed to assist with installation "
"if the package is missing. Please install 'tavily-python' "
"manually (e.g., 'pip install tavily-python') and ensure "
"'click' and 'subprocess' are available."
) from e
if click.confirm(
"You are missing the 'tavily-python' package, which is required "
"for TavilyGetResearchTool. Would you like to install it?"
):
try:
subprocess.run(["uv", "add", "tavily-python"], check=True) # noqa: S607
raise ImportError(
"'tavily-python' has been installed. Please restart your "
"Python application to use the TavilyGetResearchTool."
)
except subprocess.CalledProcessError as e:
raise ImportError(
f"Attempted to install 'tavily-python' but failed: {e}. "
"Please install it manually to use the TavilyGetResearchTool."
) from e
else:
raise ImportError(
"The 'tavily-python' package is required to use the "
"TavilyGetResearchTool. Please install it with: uv add tavily-python"
)
@staticmethod
def _stringify_response(response: Any) -> str:
if isinstance(response, str):
return response
return json.dumps(response, indent=2)
def _run(self, request_id: str) -> str:
"""Synchronously retrieves Tavily research task status and results."""
if not self._client:
raise ValueError(
"Tavily client is not initialized. Ensure 'tavily-python' is "
"installed and API key is set."
)
return self._stringify_response(self._client.get_research(request_id))
async def _arun(self, request_id: str) -> str:
"""Asynchronously retrieves Tavily research task status and results."""
if not self._async_client:
raise ValueError(
"Tavily async client is not initialized. Ensure 'tavily-python' is "
"installed and API key is set."
)
return self._stringify_response(
await self._async_client.get_research(request_id)
)

View File

@@ -0,0 +1,132 @@
# Tavily Research Tool
## Description
The `TavilyResearchTool` provides an interface to Tavily Research through the Tavily Python SDK. It creates research tasks from an `input` prompt and can optionally stream Server-Sent Events (SSE) when `stream=True`.
## Installation
To use the `TavilyResearchTool`, you need to install the `tavily-python` library:
```shell
uv add 'crewai[tools]' tavily-python
```
## Environment Variables
Ensure your Tavily API key is set as an environment variable:
```bash
export TAVILY_API_KEY='your_tavily_api_key'
```
## Example
Here's how to initialize and use the `TavilyResearchTool` within a CrewAI agent:
```python
from crewai import Agent, Task, Crew
from crewai_tools import TavilyResearchTool
# Initialize the tool
tavily_research_tool = TavilyResearchTool()
# Create an agent that uses the tool
researcher = Agent(
role="Research Analyst",
goal="Produce structured research reports",
backstory="An expert analyst who uses Tavily Research for deep web research.",
tools=[tavily_research_tool],
verbose=True,
)
# Create a task for the agent
research_task = Task(
description="Research the latest developments in AI infrastructure startups.",
expected_output="A detailed report with citations and supporting sources.",
agent=researcher,
)
# Run the crew
crew = Crew(
agents=[researcher],
tasks=[research_task],
verbose=2,
)
result = crew.kickoff()
print(result)
# Direct tool usage: create a structured research task
structured_result = tavily_research_tool.run(
input="Research the latest developments in AI infrastructure startups.",
model="pro",
output_schema={
"properties": {
"summary": {
"type": "string",
"description": "A concise summary of the research findings",
},
"key_trends": {
"type": "array",
"description": "The major trends identified in the research",
"items": {"type": "string"},
},
"companies": {
"type": "array",
"description": "Notable companies mentioned in the research",
"items": {
"type": "object",
"description": "A company entry",
"properties": {
"name": {
"type": "string",
"description": "The company name",
},
"focus": {
"type": "string",
"description": "The company's main area of focus",
},
"notable_update": {
"type": "string",
"description": "A notable recent update about the company",
},
},
"required": ["name", "focus", "notable_update"],
},
},
},
"required": ["summary", "key_trends", "companies"],
},
citation_format="apa",
)
print(structured_result)
# Direct tool usage: stream research updates
stream = tavily_research_tool.run(
input="Research the latest developments in AI infrastructure startups.",
model="mini",
stream=True,
)
for chunk in stream:
print(chunk.decode("utf-8", errors="replace"), end="")
```
## Arguments
The `TavilyResearchTool` accepts the following arguments during initialization or when calling the `run` method:
- `input` (str): The research task or question to investigate.
- `model` (Literal["mini", "pro", "auto"], optional): The Tavily research model to use. Defaults to `"auto"`.
- `output_schema` (dict[str, Any], optional): A JSON Schema used to structure the research output. Tavily expects top-level `properties` and optional `required` keys, and each property should include a `description`.
- `stream` (bool, optional): Whether to return Tavily's streaming SSE chunk generator. Defaults to `False`.
- `citation_format` (Literal["numbered", "mla", "apa", "chicago"], optional): Citation format for the report. Defaults to `"numbered"`.
## Response Format
The tool returns:
- A JSON string when creating a non-streaming research task
- A byte generator of SSE chunks when `stream=True`
Refer to the Tavily Research API documentation for the full response structure and streaming event format.

View File

@@ -0,0 +1,200 @@
from __future__ import annotations
from collections.abc import AsyncGenerator, Generator
import json
import os
from typing import Any, Literal, cast
from crewai.tools import BaseTool, EnvVar
from dotenv import load_dotenv
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
load_dotenv()
try:
from tavily import ( # type: ignore[import-untyped, import-not-found, unused-ignore]
AsyncTavilyClient,
TavilyClient,
)
TAVILY_AVAILABLE = True
except ImportError:
TAVILY_AVAILABLE = False
class TavilyResearchToolSchema(BaseModel):
"""Input schema for TavilyResearchTool."""
input: str = Field(
...,
description="The research task or question to investigate.",
)
model: Literal["mini", "pro", "auto"] = Field(
default="auto",
description="The model used by the Tavily research agent.",
)
output_schema: dict[str, Any] | None = Field(
default=None,
description="Optional JSON Schema that structures the research output.",
)
stream: bool = Field(
default=False,
description="Whether to stream research progress and results as SSE chunks.",
)
citation_format: Literal["numbered", "mla", "apa", "chicago"] = Field(
default="numbered",
description="Citation format for the research report.",
)
class TavilyResearchTool(BaseTool):
"""Tool that uses the Tavily Research API to create research tasks."""
model_config = ConfigDict(arbitrary_types_allowed=True)
_client: Any | None = PrivateAttr(default=None)
_async_client: Any | None = PrivateAttr(default=None)
name: str = "Tavily Research"
description: str = (
"A tool that creates Tavily research tasks and can stream research "
"progress and results. It returns Tavily responses as JSON or SSE chunks."
)
args_schema: type[BaseModel] = TavilyResearchToolSchema
model: Literal["mini", "pro", "auto"] = Field(
default="auto",
description="Default model used for new Tavily research tasks.",
)
output_schema: dict[str, Any] | None = Field(
default=None,
description="Default JSON Schema used to structure research output.",
)
stream: bool = Field(
default=False,
description="Whether new Tavily research tasks should stream responses by default.",
)
citation_format: Literal["numbered", "mla", "apa", "chicago"] = Field(
default="numbered",
description="Default citation format for Tavily research results.",
)
package_dependencies: list[str] = Field(default_factory=lambda: ["tavily-python"])
env_vars: list[EnvVar] = Field(
default_factory=lambda: [
EnvVar(
name="TAVILY_API_KEY",
description="API key for Tavily research service",
required=True,
),
]
)
def __init__(self, **kwargs: Any):
super().__init__(**kwargs)
if TAVILY_AVAILABLE:
api_key = os.getenv("TAVILY_API_KEY")
self._client = TavilyClient(api_key=api_key)
self._async_client = AsyncTavilyClient(api_key=api_key)
else:
try:
import subprocess
import click
except ImportError as e:
raise ImportError(
"The 'tavily-python' package is required. 'click' and "
"'subprocess' are also needed to assist with installation "
"if the package is missing. Please install 'tavily-python' "
"manually (e.g., 'pip install tavily-python') and ensure "
"'click' and 'subprocess' are available."
) from e
if click.confirm(
"You are missing the 'tavily-python' package, which is required "
"for TavilyResearchTool. Would you like to install it?"
):
try:
subprocess.run(["uv", "add", "tavily-python"], check=True) # noqa: S607
raise ImportError(
"'tavily-python' has been installed. Please restart your "
"Python application to use the TavilyResearchTool."
)
except subprocess.CalledProcessError as e:
raise ImportError(
f"Attempted to install 'tavily-python' but failed: {e}. "
"Please install it manually to use the TavilyResearchTool."
) from e
else:
raise ImportError(
"The 'tavily-python' package is required to use the "
"TavilyResearchTool. Please install it with: uv add tavily-python"
)
@staticmethod
def _stringify_response(response: Any) -> str:
if isinstance(response, str):
return response
return json.dumps(response, indent=2)
def _run(
self,
input: str,
model: Literal["mini", "pro", "auto"] | None = None,
output_schema: dict[str, Any] | None = None,
stream: bool | None = None,
citation_format: Literal["numbered", "mla", "apa", "chicago"] | None = None,
) -> str | Generator[bytes, None, None]:
"""Synchronously creates Tavily research tasks or streams results."""
if not self._client:
raise ValueError(
"Tavily client is not initialized. Ensure 'tavily-python' is "
"installed and API key is set."
)
use_stream = self.stream if stream is None else stream
result = self._client.research(
input=input,
model=self.model if model is None else model,
output_schema=self.output_schema
if output_schema is None
else output_schema,
stream=use_stream,
citation_format=(
self.citation_format if citation_format is None else citation_format
),
)
if use_stream:
return cast(Generator[bytes, None, None], result)
return self._stringify_response(result)
async def _arun(
self,
input: str,
model: Literal["mini", "pro", "auto"] | None = None,
output_schema: dict[str, Any] | None = None,
stream: bool | None = None,
citation_format: Literal["numbered", "mla", "apa", "chicago"] | None = None,
) -> str | AsyncGenerator[bytes, None]:
"""Asynchronously creates Tavily research tasks or streams results."""
if not self._async_client:
raise ValueError(
"Tavily async client is not initialized. Ensure 'tavily-python' is "
"installed and API key is set."
)
use_stream = self.stream if stream is None else stream
result = await self._async_client.research(
input=input,
model=self.model if model is None else model,
output_schema=self.output_schema
if output_schema is None
else output_schema,
stream=use_stream,
citation_format=(
self.citation_format if citation_format is None else citation_format
),
)
if use_stream:
return cast(AsyncGenerator[bytes, None], result)
return self._stringify_response(result)

View File

@@ -9,7 +9,7 @@ The `TavilySearchTool` provides an interface to the Tavily Search API, enabling
To use the `TavilySearchTool`, you need to install the `tavily-python` library:
```shell
pip install 'crewai[tools]' tavily-python
uv add 'crewai[tools]' tavily-python
```
## Environment Variables

View File

@@ -25039,6 +25039,243 @@
"type": "object"
}
},
{
"description": "A tool that retrieves the status and results of an existing Tavily research task by request ID. It returns Tavily responses as JSON.",
"env_vars": [
{
"default": null,
"description": "API key for Tavily research service",
"name": "TAVILY_API_KEY",
"required": true
}
],
"humanized_name": "Tavily Get Research",
"init_params_schema": {
"$defs": {
"EnvVar": {
"properties": {
"default": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Default"
},
"description": {
"title": "Description",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"required": {
"default": true,
"title": "Required",
"type": "boolean"
}
},
"required": [
"name",
"description"
],
"title": "EnvVar",
"type": "object"
}
},
"description": "Tool that uses the Tavily Research status endpoint to retrieve results.",
"properties": {},
"required": [],
"title": "TavilyGetResearchTool",
"type": "object"
},
"name": "TavilyGetResearchTool",
"package_dependencies": [
"tavily-python"
],
"run_params_schema": {
"description": "Input schema for TavilyGetResearchTool.",
"properties": {
"request_id": {
"description": "Existing Tavily research request ID to fetch status and results for.",
"title": "Request Id",
"type": "string"
}
},
"required": [
"request_id"
],
"title": "TavilyGetResearchToolSchema",
"type": "object"
}
},
{
"description": "A tool that creates Tavily research tasks and can stream research progress and results. It returns Tavily responses as JSON or SSE chunks.",
"env_vars": [
{
"default": null,
"description": "API key for Tavily research service",
"name": "TAVILY_API_KEY",
"required": true
}
],
"humanized_name": "Tavily Research",
"init_params_schema": {
"$defs": {
"EnvVar": {
"properties": {
"default": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Default"
},
"description": {
"title": "Description",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"required": {
"default": true,
"title": "Required",
"type": "boolean"
}
},
"required": [
"name",
"description"
],
"title": "EnvVar",
"type": "object"
}
},
"description": "Tool that uses the Tavily Research API to create research tasks.",
"properties": {
"citation_format": {
"default": "numbered",
"description": "Default citation format for Tavily research results.",
"enum": [
"numbered",
"mla",
"apa",
"chicago"
],
"title": "Citation Format",
"type": "string"
},
"model": {
"default": "auto",
"description": "Default model used for new Tavily research tasks.",
"enum": [
"mini",
"pro",
"auto"
],
"title": "Model",
"type": "string"
},
"output_schema": {
"anyOf": [
{
"additionalProperties": true,
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"description": "Default JSON Schema used to structure research output.",
"title": "Output Schema"
},
"stream": {
"default": false,
"description": "Whether new Tavily research tasks should stream responses by default.",
"title": "Stream",
"type": "boolean"
}
},
"required": [],
"title": "TavilyResearchTool",
"type": "object"
},
"name": "TavilyResearchTool",
"package_dependencies": [
"tavily-python"
],
"run_params_schema": {
"description": "Input schema for TavilyResearchTool.",
"properties": {
"citation_format": {
"default": "numbered",
"description": "Citation format for the research report.",
"enum": [
"numbered",
"mla",
"apa",
"chicago"
],
"title": "Citation Format",
"type": "string"
},
"input": {
"description": "The research task or question to investigate.",
"title": "Input",
"type": "string"
},
"model": {
"default": "auto",
"description": "The model used by the Tavily research agent.",
"enum": [
"mini",
"pro",
"auto"
],
"title": "Model",
"type": "string"
},
"output_schema": {
"anyOf": [
{
"additionalProperties": true,
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"description": "Optional JSON Schema that structures the research output.",
"title": "Output Schema"
},
"stream": {
"default": false,
"description": "Whether to stream research progress and results as SSE chunks.",
"title": "Stream",
"type": "boolean"
}
},
"required": [
"input"
],
"title": "TavilyResearchToolSchema",
"type": "object"
}
},
{
"description": "A tool that performs web searches using the Tavily Search API. It returns a JSON object containing the search results.",
"env_vars": [