mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 16:48:30 +00:00
* fix: remove kwargs from all (except mysql & pg) RagTools
The agent uses the tool description to decide what to propagate when a tool with **kwargs is found, but this often leads to failures during the tool invocation step.
This happens because the final description ends up like this:
```
CrewStructuredTool(name='Knowledge base', description='Tool Name: Knowledge base
Tool Arguments: {'query': {'description': None, 'type': 'str'}, 'kwargs': {'description': None, 'type': 'Any'}}
Tool Description: A knowledge base that can be used to answer questions.')
```
The agent then tries to infer and pass a kwargs parameter, which isn’t supported by the schema at all.
* feat: adding test to search tools
* feat: add db (chromadb folder) to .gitignore
* fix: fix github search integration
A few attributes were missing when calling the .add method: data_type and loader.
Also, update the query search according to the EmbedChain documentation, the query must include the type and repo keys
* fix: rollback YoutubeChannel paramenter
* chore: fix type hinting for CodeDocs search
* fix: ensure proper configuration when call `add`
According to the documentation, some search methods must be defined as either a loader or a data_type. This commit ensures that.
* build: add optional-dependencies for github and xml search
* test: mocking external requests from search_tool tests
* build: add pytest-recording as devDependencie
310 lines
11 KiB
Python
310 lines
11 KiB
Python
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import ANY, MagicMock
|
|
|
|
import pytest
|
|
from embedchain.models.data_type import DataType
|
|
|
|
from crewai_tools.tools import (
|
|
CodeDocsSearchTool,
|
|
CSVSearchTool,
|
|
DirectorySearchTool,
|
|
DOCXSearchTool,
|
|
GithubSearchTool,
|
|
JSONSearchTool,
|
|
MDXSearchTool,
|
|
PDFSearchTool,
|
|
TXTSearchTool,
|
|
WebsiteSearchTool,
|
|
XMLSearchTool,
|
|
YoutubeChannelSearchTool,
|
|
YoutubeVideoSearchTool,
|
|
)
|
|
from crewai_tools.tools.rag.rag_tool import Adapter
|
|
|
|
pytestmark = [pytest.mark.vcr(filter_headers=["authorization"])]
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_adapter():
|
|
mock_adapter = MagicMock(spec=Adapter)
|
|
return mock_adapter
|
|
|
|
|
|
def test_directory_search_tool():
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
test_file = Path(temp_dir) / "test.txt"
|
|
test_file.write_text("This is a test file for directory search")
|
|
|
|
tool = DirectorySearchTool(directory=temp_dir)
|
|
result = tool._run(search_query="test file")
|
|
assert "test file" in result.lower()
|
|
|
|
|
|
def test_pdf_search_tool(mock_adapter):
|
|
mock_adapter.query.return_value = "this is a test"
|
|
|
|
tool = PDFSearchTool(pdf="test.pdf", adapter=mock_adapter)
|
|
result = tool._run(query="test content")
|
|
assert "this is a test" in result.lower()
|
|
mock_adapter.add.assert_called_once_with("test.pdf", data_type=DataType.PDF_FILE)
|
|
mock_adapter.query.assert_called_once_with("test content")
|
|
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = PDFSearchTool(adapter=mock_adapter)
|
|
result = tool._run(pdf="test.pdf", query="test content")
|
|
assert "this is a test" in result.lower()
|
|
mock_adapter.add.assert_called_once_with("test.pdf", data_type=DataType.PDF_FILE)
|
|
mock_adapter.query.assert_called_once_with("test content")
|
|
|
|
|
|
def test_txt_search_tool():
|
|
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as temp_file:
|
|
temp_file.write(b"This is a test file for txt search")
|
|
temp_file_path = temp_file.name
|
|
|
|
try:
|
|
tool = TXTSearchTool()
|
|
tool.add(temp_file_path)
|
|
result = tool._run(search_query="test file")
|
|
assert "test file" in result.lower()
|
|
finally:
|
|
os.unlink(temp_file_path)
|
|
|
|
|
|
def test_docx_search_tool(mock_adapter):
|
|
mock_adapter.query.return_value = "this is a test"
|
|
|
|
tool = DOCXSearchTool(docx="test.docx", adapter=mock_adapter)
|
|
result = tool._run(search_query="test content")
|
|
assert "this is a test" in result.lower()
|
|
mock_adapter.add.assert_called_once_with("test.docx", data_type=DataType.DOCX)
|
|
mock_adapter.query.assert_called_once_with("test content")
|
|
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = DOCXSearchTool(adapter=mock_adapter)
|
|
result = tool._run(docx="test.docx", search_query="test content")
|
|
assert "this is a test" in result.lower()
|
|
mock_adapter.add.assert_called_once_with("test.docx", data_type=DataType.DOCX)
|
|
mock_adapter.query.assert_called_once_with("test content")
|
|
|
|
|
|
def test_json_search_tool():
|
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp_file:
|
|
temp_file.write(b'{"test": "This is a test JSON file"}')
|
|
temp_file_path = temp_file.name
|
|
|
|
try:
|
|
tool = JSONSearchTool()
|
|
result = tool._run(search_query="test JSON", json_path=temp_file_path)
|
|
assert "test json" in result.lower()
|
|
finally:
|
|
os.unlink(temp_file_path)
|
|
|
|
|
|
def test_xml_search_tool(mock_adapter):
|
|
mock_adapter.query.return_value = "this is a test"
|
|
|
|
tool = XMLSearchTool(adapter=mock_adapter)
|
|
result = tool._run(search_query="test XML", xml="test.xml")
|
|
assert "this is a test" in result.lower()
|
|
mock_adapter.add.assert_called_once_with("test.xml")
|
|
mock_adapter.query.assert_called_once_with("test XML")
|
|
|
|
|
|
def test_csv_search_tool():
|
|
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as temp_file:
|
|
temp_file.write(b"name,description\ntest,This is a test CSV file")
|
|
temp_file_path = temp_file.name
|
|
|
|
try:
|
|
tool = CSVSearchTool()
|
|
tool.add(temp_file_path)
|
|
result = tool._run(search_query="test CSV")
|
|
assert "test csv" in result.lower()
|
|
finally:
|
|
os.unlink(temp_file_path)
|
|
|
|
|
|
def test_mdx_search_tool():
|
|
with tempfile.NamedTemporaryFile(suffix=".mdx", delete=False) as temp_file:
|
|
temp_file.write(b"# Test MDX\nThis is a test MDX file")
|
|
temp_file_path = temp_file.name
|
|
|
|
try:
|
|
tool = MDXSearchTool()
|
|
tool.add(temp_file_path)
|
|
result = tool._run(search_query="test MDX")
|
|
assert "test mdx" in result.lower()
|
|
finally:
|
|
os.unlink(temp_file_path)
|
|
|
|
|
|
def test_website_search_tool(mock_adapter):
|
|
mock_adapter.query.return_value = "this is a test"
|
|
|
|
website = "https://crewai.com"
|
|
search_query = "what is crewai?"
|
|
tool = WebsiteSearchTool(website=website, adapter=mock_adapter)
|
|
result = tool._run(search_query=search_query)
|
|
|
|
mock_adapter.query.assert_called_once_with("what is crewai?")
|
|
mock_adapter.add.assert_called_once_with(website, data_type=DataType.WEB_PAGE)
|
|
|
|
assert "this is a test" in result.lower()
|
|
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = WebsiteSearchTool(adapter=mock_adapter)
|
|
result = tool._run(website=website, search_query=search_query)
|
|
|
|
mock_adapter.query.assert_called_once_with("what is crewai?")
|
|
mock_adapter.add.assert_called_once_with(website, data_type=DataType.WEB_PAGE)
|
|
|
|
assert "this is a test" in result.lower()
|
|
|
|
|
|
def test_youtube_video_search_tool(mock_adapter):
|
|
mock_adapter.query.return_value = "some video description"
|
|
|
|
youtube_video_url = "https://www.youtube.com/watch?v=sample-video-id"
|
|
search_query = "what is the video about?"
|
|
tool = YoutubeVideoSearchTool(
|
|
youtube_video_url=youtube_video_url,
|
|
adapter=mock_adapter,
|
|
)
|
|
result = tool._run(search_query=search_query)
|
|
assert "some video description" in result
|
|
|
|
mock_adapter.add.assert_called_once_with(
|
|
youtube_video_url, data_type=DataType.YOUTUBE_VIDEO
|
|
)
|
|
mock_adapter.query.assert_called_once_with(search_query)
|
|
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = YoutubeVideoSearchTool(adapter=mock_adapter)
|
|
result = tool._run(youtube_video_url=youtube_video_url, search_query=search_query)
|
|
assert "some video description" in result
|
|
|
|
mock_adapter.add.assert_called_once_with(
|
|
youtube_video_url, data_type=DataType.YOUTUBE_VIDEO
|
|
)
|
|
mock_adapter.query.assert_called_once_with(search_query)
|
|
|
|
|
|
def test_youtube_channel_search_tool(mock_adapter):
|
|
mock_adapter.query.return_value = "channel description"
|
|
|
|
youtube_channel_handle = "@crewai"
|
|
search_query = "what is the channel about?"
|
|
tool = YoutubeChannelSearchTool(
|
|
youtube_channel_handle=youtube_channel_handle, adapter=mock_adapter
|
|
)
|
|
result = tool._run(search_query=search_query)
|
|
assert "channel description" in result
|
|
mock_adapter.add.assert_called_once_with(
|
|
youtube_channel_handle, data_type=DataType.YOUTUBE_CHANNEL
|
|
)
|
|
mock_adapter.query.assert_called_once_with(search_query)
|
|
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = YoutubeChannelSearchTool(adapter=mock_adapter)
|
|
result = tool._run(
|
|
youtube_channel_handle=youtube_channel_handle, search_query=search_query
|
|
)
|
|
assert "channel description" in result
|
|
|
|
mock_adapter.add.assert_called_once_with(
|
|
youtube_channel_handle, data_type=DataType.YOUTUBE_CHANNEL
|
|
)
|
|
mock_adapter.query.assert_called_once_with(search_query)
|
|
|
|
|
|
def test_code_docs_search_tool(mock_adapter):
|
|
mock_adapter.query.return_value = "test documentation"
|
|
|
|
docs_url = "https://crewai.com/any-docs-url"
|
|
search_query = "test documentation"
|
|
tool = CodeDocsSearchTool(docs_url=docs_url, adapter=mock_adapter)
|
|
result = tool._run(search_query=search_query)
|
|
assert "test documentation" in result
|
|
mock_adapter.add.assert_called_once_with(docs_url, data_type=DataType.DOCS_SITE)
|
|
mock_adapter.query.assert_called_once_with(search_query)
|
|
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = CodeDocsSearchTool(adapter=mock_adapter)
|
|
result = tool._run(docs_url=docs_url, search_query=search_query)
|
|
assert "test documentation" in result
|
|
mock_adapter.add.assert_called_once_with(docs_url, data_type=DataType.DOCS_SITE)
|
|
mock_adapter.query.assert_called_once_with(search_query)
|
|
|
|
|
|
def test_github_search_tool(mock_adapter):
|
|
mock_adapter.query.return_value = "repo description"
|
|
|
|
# ensure the provided repo and content types are used after initialization
|
|
tool = GithubSearchTool(
|
|
gh_token="test_token",
|
|
github_repo="crewai/crewai",
|
|
content_types=["code"],
|
|
adapter=mock_adapter,
|
|
)
|
|
result = tool._run(search_query="tell me about crewai repo")
|
|
assert "repo description" in result
|
|
mock_adapter.add.assert_called_once_with(
|
|
"repo:crewai/crewai type:code", data_type="github", loader=ANY
|
|
)
|
|
mock_adapter.query.assert_called_once_with("tell me about crewai repo")
|
|
|
|
# ensure content types provided by run call is used
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = GithubSearchTool(gh_token="test_token", adapter=mock_adapter)
|
|
result = tool._run(
|
|
github_repo="crewai/crewai",
|
|
content_types=["code", "issue"],
|
|
search_query="tell me about crewai repo",
|
|
)
|
|
assert "repo description" in result
|
|
mock_adapter.add.assert_called_once_with(
|
|
"repo:crewai/crewai type:code,issue", data_type="github", loader=ANY
|
|
)
|
|
mock_adapter.query.assert_called_once_with("tell me about crewai repo")
|
|
|
|
# ensure default content types are used if not provided
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = GithubSearchTool(gh_token="test_token", adapter=mock_adapter)
|
|
result = tool._run(
|
|
github_repo="crewai/crewai",
|
|
search_query="tell me about crewai repo",
|
|
)
|
|
assert "repo description" in result
|
|
mock_adapter.add.assert_called_once_with(
|
|
"repo:crewai/crewai type:code,repo,pr,issue", data_type="github", loader=ANY
|
|
)
|
|
mock_adapter.query.assert_called_once_with("tell me about crewai repo")
|
|
|
|
# ensure nothing is added if no repo is provided
|
|
mock_adapter.query.reset_mock()
|
|
mock_adapter.add.reset_mock()
|
|
|
|
tool = GithubSearchTool(gh_token="test_token", adapter=mock_adapter)
|
|
result = tool._run(search_query="tell me about crewai repo")
|
|
mock_adapter.add.assert_not_called()
|
|
mock_adapter.query.assert_called_once_with("tell me about crewai repo")
|