Compare commits

...

7 Commits

Author SHA1 Message Date
Lucas Gomide
29dc2c2429 refactor: use console.print instead of print 2025-06-24 12:38:17 -03:00
Lucas Gomide
4bfafe00ee feat: skip cli/templates folder while looking for Crew 2025-06-24 12:30:29 -03:00
Lucas Gomide
a20ec5ed5b feat: improve Crew search while resetting their memories
Some memories couldn't be reset due to their reliance on relative external sources like `PDFKnowledge`. This was caused by the need to run the reset memories command from the `src` directory, which could break when external files weren't accessible from that path.

This commit allows the reset command to be executed from the root of the project — the same location typically used to run a crew — improving compatibility and reducing friction.
2025-06-24 12:30:29 -03:00
Lucas Gomide
01e208ee77 test: add tests to test get_crews 2025-06-24 12:30:28 -03:00
Rostyslav Borovyk
c96d4a6823 Add Oxylabs Web Scraping tools (#2905)
Some checks failed
Notify Downstream / notify-downstream (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
* Add Oxylabs tools

* Review updates

* Review updates

---------

Co-authored-by: Tony Kipkemboi <iamtonykipkemboi@gmail.com>
2025-06-23 13:58:16 -04:00
Lucas Gomide
59032817c7 docs: update recommendation filters for MCP and Enterprise tools (#3041)
Some checks failed
Notify Downstream / notify-downstream (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
2025-06-20 13:35:26 -04:00
Lucas Gomide
e9d8a853ea feat: support to initialize a tool from defined Tool attributes (#3023)
* feat: support to initialize a tool from defined Tool attributes

* fix: ensure Agent is able to load a list of Tools dynamically
2025-06-20 10:53:37 -04:00
9 changed files with 469 additions and 57 deletions

View File

@@ -134,7 +134,7 @@
"tools/web-scraping/stagehandtool",
"tools/web-scraping/firecrawlcrawlwebsitetool",
"tools/web-scraping/firecrawlscrapewebsitetool",
"tools/web-scraping/firecrawlsearchtool"
"tools/web-scraping/oxylabsscraperstool"
]
},
{

View File

@@ -124,7 +124,7 @@ from crewai_tools import CrewaiEnterpriseTools
enterprise_tools = CrewaiEnterpriseTools(
actions_list=["gmail_find_email"] # only gmail_find_email tool will be available
)
gmail_tool = enterprise_tools[0]
gmail_tool = enterprise_tools["gmail_find_email"]
gmail_agent = Agent(
role="Gmail Manager",

View File

@@ -85,6 +85,22 @@ with MCPServerAdapter(server_params) as mcp_tools:
```
This general pattern shows how to integrate tools. For specific examples tailored to each transport, refer to the detailed guides below.
## Filtering Tools
```python
with MCPServerAdapter(server_params) as mcp_tools:
print(f"Available tools: {[tool.name for tool in mcp_tools]}")
my_agent = Agent(
role="MCP Tool User",
goal="Utilize tools from an MCP server.",
backstory="I can connect to MCP servers and use their tools.",
tools=mcp_tools["tool_name"], # Pass the loaded tools to your agent
reasoning=True,
verbose=True
)
# ... rest of your crew setup ...
```
## Explore MCP Integrations
<CardGroup cols={2}>

View File

@@ -56,6 +56,10 @@ These tools enable your agents to interact with the web, extract data from websi
<Card title="Stagehand Tool" icon="hand" href="/tools/web-scraping/stagehandtool">
Intelligent browser automation with natural language commands.
</Card>
<Card title="Oxylabs Scraper Tool" icon="globe" href="/tools/web-scraping/oxylabsscraperstool">
Access web data at scale with Oxylabs.
</Card>
</CardGroup>
## **Common Use Cases**
@@ -100,4 +104,4 @@ agent = Agent(
- **JavaScript-Heavy Sites**: Use `SeleniumScrapingTool` for dynamic content
- **Scale & Performance**: Use `FirecrawlScrapeWebsiteTool` for high-volume scraping
- **Cloud Infrastructure**: Use `BrowserBaseLoadTool` for scalable browser automation
- **Complex Workflows**: Use `StagehandTool` for intelligent browser interactions
- **Complex Workflows**: Use `StagehandTool` for intelligent browser interactions

View File

@@ -0,0 +1,236 @@
---
title: Oxylabs Scrapers
description: >
Oxylabs Scrapers allow to easily access the information from the respective sources. Please see the list of available sources below:
- `Amazon Product`
- `Amazon Search`
- `Google Seach`
- `Universal`
icon: globe
---
## Installation
Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io).
```shell
pip install 'crewai[tools]' oxylabs
```
Check [Oxylabs Documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/targets) to get more information about API parameters.
# `OxylabsAmazonProductScraperTool`
### Example
```python
from crewai_tools import OxylabsAmazonProductScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsAmazonProductScraperTool()
result = tool.run(query="AAAAABBBBCC")
print(result)
```
### Parameters
- `query` - 10-symbol ASIN code.
- `domain` - domain localization for Amazon.
- `geo_location` - the _Deliver to_ location.
- `user_agent_type` - device type and browser.
- `render` - enables JavaScript rendering when set to `html`.
- `callback_url` - URL to your callback endpoint.
- `context` - Additional advanced settings and controls for specialized requirements.
- `parse` - returns parsed data when set to true.
- `parsing_instructions` - define your own parsing and data transformation logic that will be executed on an HTML scraping result.
### Advanced example
```python
from crewai_tools import OxylabsAmazonProductScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsAmazonProductScraperTool(
config={
"domain": "com",
"parse": True,
"context": [
{
"key": "autoselect_variant",
"value": True
}
]
}
)
result = tool.run(query="AAAAABBBBCC")
print(result)
```
# `OxylabsAmazonSearchScraperTool`
### Example
```python
from crewai_tools import OxylabsAmazonSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsAmazonSearchScraperTool()
result = tool.run(query="headsets")
print(result)
```
### Parameters
- `query` - Amazon search term.
- `domain` - Domain localization for Bestbuy.
- `start_page` - starting page number.
- `pages` - number of pages to retrieve.
- `geo_location` - the _Deliver to_ location.
- `user_agent_type` - device type and browser.
- `render` - enables JavaScript rendering when set to `html`.
- `callback_url` - URL to your callback endpoint.
- `context` - Additional advanced settings and controls for specialized requirements.
- `parse` - returns parsed data when set to true.
- `parsing_instructions` - define your own parsing and data transformation logic that will be executed on an HTML scraping result.
### Advanced example
```python
from crewai_tools import OxylabsAmazonSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsAmazonSearchScraperTool(
config={
"domain": 'nl',
"start_page": 2,
"pages": 2,
"parse": True,
"context": [
{'key': 'category_id', 'value': 16391693031}
],
}
)
result = tool.run(query='nirvana tshirt')
print(result)
```
# `OxylabsGoogleSearchScraperTool`
### Example
```python
from crewai_tools import OxylabsGoogleSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsGoogleSearchScraperTool()
result = tool.run(query="iPhone 16")
print(result)
```
### Parameters
- `query` - search keyword.
- `domain` - domain localization for Google.
- `start_page` - starting page number.
- `pages` - number of pages to retrieve.
- `limit` - number of results to retrieve in each page.
- `locale` - `Accept-Language` header value which changes your Google search page web interface language.
- `geo_location` - the geographical location that the result should be adapted for. Using this parameter correctly is extremely important to get the right data.
- `user_agent_type` - device type and browser.
- `render` - enables JavaScript rendering when set to `html`.
- `callback_url` - URL to your callback endpoint.
- `context` - Additional advanced settings and controls for specialized requirements.
- `parse` - returns parsed data when set to true.
- `parsing_instructions` - define your own parsing and data transformation logic that will be executed on an HTML scraping result.
### Advanced example
```python
from crewai_tools import OxylabsGoogleSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsGoogleSearchScraperTool(
config={
"parse": True,
"geo_location": "Paris, France",
"user_agent_type": "tablet",
}
)
result = tool.run(query="iPhone 16")
print(result)
```
# `OxylabsUniversalScraperTool`
### Example
```python
from crewai_tools import OxylabsUniversalScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsUniversalScraperTool()
result = tool.run(url="https://ip.oxylabs.io")
print(result)
```
### Parameters
- `url` - website url to scrape.
- `user_agent_type` - device type and browser.
- `geo_location` - sets the proxy's geolocation to retrieve data.
- `render` - enables JavaScript rendering when set to `html`.
- `callback_url` - URL to your callback endpoint.
- `context` - Additional advanced settings and controls for specialized requirements.
- `parse` - returns parsed data when set to `true`, as long as a dedicated parser exists for the submitted URL's page type.
- `parsing_instructions` - define your own parsing and data transformation logic that will be executed on an HTML scraping result.
### Advanced example
```python
from crewai_tools import OxylabsUniversalScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsUniversalScraperTool(
config={
"render": "html",
"user_agent_type": "mobile",
"context": [
{"key": "force_headers", "value": True},
{"key": "force_cookies", "value": True},
{
"key": "headers",
"value": {
"Custom-Header-Name": "custom header content",
},
},
{
"key": "cookies",
"value": [
{"key": "NID", "value": "1234567890"},
{"key": "1P JAR", "value": "0987654321"},
],
},
{"key": "http_method", "value": "get"},
{"key": "follow_redirects", "value": True},
{"key": "successful_status_codes", "value": [808, 909]},
],
}
)
result = tool.run(url="https://ip.oxylabs.io")
print(result)
```

View File

@@ -94,17 +94,18 @@ def _get_project_attribute(
attribute = _get_nested_value(pyproject_content, keys)
except FileNotFoundError:
print(f"Error: {pyproject_path} not found.")
console.print(f"Error: {pyproject_path} not found.", style="bold red")
except KeyError:
print(f"Error: {pyproject_path} is not a valid pyproject.toml file.")
console.print(f"Error: {pyproject_path} is not a valid pyproject.toml file.", style="bold red")
except tomllib.TOMLDecodeError if sys.version_info >= (3, 11) else Exception as e: # type: ignore
print(
console.print(
f"Error: {pyproject_path} is not a valid TOML file."
if sys.version_info >= (3, 11)
else f"Error reading the pyproject.toml file: {e}"
else f"Error reading the pyproject.toml file: {e}",
style="bold red",
)
except Exception as e:
print(f"Error reading the pyproject.toml file: {e}")
console.print(f"Error reading the pyproject.toml file: {e}", style="bold red")
if require and not attribute:
console.print(
@@ -137,9 +138,9 @@ def fetch_and_json_env_file(env_file_path: str = ".env") -> dict:
return env_dict
except FileNotFoundError:
print(f"Error: {env_file_path} not found.")
console.print(f"Error: {env_file_path} not found.", style="bold red")
except Exception as e:
print(f"Error reading the .env file: {e}")
console.print(f"Error reading the .env file: {e}", style="bold red")
return {}
@@ -255,50 +256,69 @@ def write_env_file(folder_path, env_vars):
def get_crews(crew_path: str = "crew.py", require: bool = False) -> list[Crew]:
"""Get the crew instances from the a file."""
"""Get the crew instances from a file."""
crew_instances = []
try:
import importlib.util
for root, _, files in os.walk("."):
if crew_path in files:
crew_os_path = os.path.join(root, crew_path)
try:
spec = importlib.util.spec_from_file_location(
"crew_module", crew_os_path
)
if not spec or not spec.loader:
continue
module = importlib.util.module_from_spec(spec)
# Add the current directory to sys.path to ensure imports resolve correctly
current_dir = os.getcwd()
if current_dir not in sys.path:
sys.path.insert(0, current_dir)
# If we're not in src directory but there's a src directory, add it to path
src_dir = os.path.join(current_dir, "src")
if os.path.isdir(src_dir) and src_dir not in sys.path:
sys.path.insert(0, src_dir)
# Search in both current directory and src directory if it exists
search_paths = [".", "src"] if os.path.isdir("src") else ["."]
for search_path in search_paths:
for root, _, files in os.walk(search_path):
if crew_path in files and "cli/templates" not in root:
crew_os_path = os.path.join(root, crew_path)
try:
sys.modules[spec.name] = module
spec.loader.exec_module(module)
for attr_name in dir(module):
module_attr = getattr(module, attr_name)
try:
crew_instances.extend(fetch_crews(module_attr))
except Exception as e:
print(f"Error processing attribute {attr_name}: {e}")
continue
except Exception as exec_error:
print(f"Error executing module: {exec_error}")
import traceback
print(f"Traceback: {traceback.format_exc()}")
except (ImportError, AttributeError) as e:
if require:
console.print(
f"Error importing crew from {crew_path}: {str(e)}",
style="bold red",
spec = importlib.util.spec_from_file_location(
"crew_module", crew_os_path
)
if not spec or not spec.loader:
continue
module = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = module
try:
spec.loader.exec_module(module)
for attr_name in dir(module):
module_attr = getattr(module, attr_name)
try:
crew_instances.extend(fetch_crews(module_attr))
except Exception as e:
console.print(f"Error processing attribute {attr_name}: {e}", style="bold red")
continue
# If we found crew instances, break out of the loop
if crew_instances:
break
except Exception as exec_error:
console.print(f"Error executing module: {exec_error}", style="bold red")
except (ImportError, AttributeError) as e:
if require:
console.print(
f"Error importing crew from {crew_path}: {str(e)}",
style="bold red",
)
continue
# If we found crew instances in this search path, break out of the search paths loop
if crew_instances:
break
if require:
if require and not crew_instances:
console.print("No valid Crew instance found in crew.py", style="bold red")
raise SystemExit
@@ -318,11 +338,15 @@ def get_crew_instance(module_attr) -> Crew | None:
and module_attr.is_crew_class
):
return module_attr().crew()
if (ismethod(module_attr) or isfunction(module_attr)) and get_type_hints(
module_attr
).get("return") is Crew:
return module_attr()
elif isinstance(module_attr, Crew):
try:
if (ismethod(module_attr) or isfunction(module_attr)) and get_type_hints(
module_attr
).get("return") is Crew:
return module_attr()
except Exception:
return None
if isinstance(module_attr, Crew):
return module_attr
else:
return None
@@ -402,7 +426,8 @@ def _load_tools_from_init(init_file: Path) -> list[dict[str, Any]]:
if not hasattr(module, "__all__"):
console.print(
f"[bold yellow]Warning: No __all__ defined in {init_file}[/bold yellow]"
f"Warning: No __all__ defined in {init_file}",
style="bold yellow",
)
raise SystemExit(1)

View File

@@ -476,7 +476,14 @@ def load_agent_from_repository(from_repository: str) -> Dict[str, Any]:
try:
module = importlib.import_module(tool["module"])
tool_class = getattr(module, tool["name"])
attributes[key].append(tool_class())
tool_value = tool_class(**tool["init_params"])
if isinstance(tool_value, list):
attributes[key].extend(tool_value)
else:
attributes[key].append(tool_value)
except Exception as e:
raise AgentRepositoryError(
f"Tool {tool['name']} could not be loaded: {e}"

View File

@@ -2099,7 +2099,7 @@ def mock_get_auth_token():
@patch("crewai.cli.plus_api.PlusAPI.get_agent")
def test_agent_from_repository(mock_get_agent, mock_get_auth_token):
from crewai_tools import SerperDevTool, XMLSearchTool
from crewai_tools import SerperDevTool, XMLSearchTool, CSVSearchTool, EnterpriseActionTool
mock_get_response = MagicMock()
mock_get_response.status_code = 200
@@ -2108,19 +2108,42 @@ def test_agent_from_repository(mock_get_agent, mock_get_auth_token):
"goal": "test goal",
"backstory": "test backstory",
"tools": [
{"module": "crewai_tools", "name": "SerperDevTool"},
{"module": "crewai_tools", "name": "XMLSearchTool"},
{"module": "crewai_tools", "name": "SerperDevTool", "init_params": {"n_results": 30}},
{"module": "crewai_tools", "name": "XMLSearchTool", "init_params": {"summarize": True}},
{"module": "crewai_tools", "name": "CSVSearchTool", "init_params": {}},
# using a tools that returns a list of BaseTools
{"module": "crewai_tools", "name": "CrewaiEnterpriseTools", "init_params": {"actions_list": [], "enterprise_token": "test_key"}},
],
}
mock_get_agent.return_value = mock_get_response
agent = Agent(from_repository="test_agent")
tool_action = EnterpriseActionTool(
name="test_name",
description="test_description",
enterprise_action_token="test_token",
action_name="test_action_name",
action_schema={"test": "test"},
)
with patch("crewai_tools.CrewaiEnterpriseTools", return_value=[tool_action]):
agent = Agent(from_repository="test_agent")
assert agent.role == "test role"
assert agent.goal == "test goal"
assert agent.backstory == "test backstory"
assert len(agent.tools) == 2
assert len(agent.tools) == 4
assert isinstance(agent.tools[0], SerperDevTool)
assert agent.tools[0].n_results == 30
assert isinstance(agent.tools[1], XMLSearchTool)
assert agent.tools[1].summarize
assert isinstance(agent.tools[2], CSVSearchTool)
assert not agent.tools[2].summarize
assert isinstance(agent.tools[3], EnterpriseActionTool)
assert agent.tools[3].name == "test_name"
@patch("crewai.cli.plus_api.PlusAPI.get_agent")
@@ -2133,7 +2156,7 @@ def test_agent_from_repository_override_attributes(mock_get_agent, mock_get_auth
"role": "test role",
"goal": "test goal",
"backstory": "test backstory",
"tools": [{"name": "SerperDevTool", "module": "crewai_tools"}],
"tools": [{"name": "SerperDevTool", "module": "crewai_tools", "init_params": {}}],
}
mock_get_agent.return_value = mock_get_response
agent = Agent(from_repository="test_agent", role="Custom Role")

View File

@@ -261,3 +261,104 @@ __all__ = ['MyTool']
captured = capsys.readouterr()
assert "was never closed" in captured.out
@pytest.fixture
def mock_crew():
from crewai.crew import Crew
class MockCrew(Crew):
def __init__(self):
pass
return MockCrew()
@pytest.fixture
def temp_crew_project():
with tempfile.TemporaryDirectory() as temp_dir:
old_cwd = os.getcwd()
os.chdir(temp_dir)
crew_content = """
from crewai.crew import Crew
from crewai.agent import Agent
def create_crew() -> Crew:
agent = Agent(role="test", goal="test", backstory="test")
return Crew(agents=[agent], tasks=[])
# Direct crew instance
direct_crew = Crew(agents=[], tasks=[])
"""
with open("crew.py", "w") as f:
f.write(crew_content)
os.makedirs("src", exist_ok=True)
with open(os.path.join("src", "crew.py"), "w") as f:
f.write(crew_content)
# Create a src/templates directory that should be ignored
os.makedirs(os.path.join("src", "templates"), exist_ok=True)
with open(os.path.join("src", "templates", "crew.py"), "w") as f:
f.write("# This should be ignored")
yield temp_dir
os.chdir(old_cwd)
def test_get_crews_finds_valid_crews(temp_crew_project, monkeypatch, mock_crew):
def mock_fetch_crews(module_attr):
return [mock_crew]
monkeypatch.setattr(utils, "fetch_crews", mock_fetch_crews)
crews = utils.get_crews()
assert len(crews) > 0
assert mock_crew in crews
def test_get_crews_with_nonexistent_file(temp_crew_project):
crews = utils.get_crews(crew_path="nonexistent.py", require=False)
assert len(crews) == 0
def test_get_crews_with_required_nonexistent_file(temp_crew_project, capsys):
with pytest.raises(SystemExit):
utils.get_crews(crew_path="nonexistent.py", require=True)
captured = capsys.readouterr()
assert "No valid Crew instance found" in captured.out
def test_get_crews_with_invalid_module(temp_crew_project, capsys):
with open("crew.py", "w") as f:
f.write("import nonexistent_module\n")
crews = utils.get_crews(crew_path="crew.py", require=False)
assert len(crews) == 0
with pytest.raises(SystemExit):
utils.get_crews(crew_path="crew.py", require=True)
captured = capsys.readouterr()
assert "Error" in captured.out
def test_get_crews_ignores_template_directories(temp_crew_project, monkeypatch, mock_crew):
template_crew_detected = False
def mock_fetch_crews(module_attr):
nonlocal template_crew_detected
if hasattr(module_attr, "__file__") and "templates" in module_attr.__file__:
template_crew_detected = True
return [mock_crew]
monkeypatch.setattr(utils, "fetch_crews", mock_fetch_crews)
utils.get_crews()
assert not template_crew_detected