mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 12:28:30 +00:00
Compare commits
5 Commits
lg-agent-i
...
devin/1750
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f28e3e0be8 | ||
|
|
10a55bd210 | ||
|
|
c96d4a6823 | ||
|
|
59032817c7 | ||
|
|
e9d8a853ea |
@@ -285,25 +285,32 @@ Watch this video tutorial for a step-by-step demonstration of deploying your cre
|
||||
|
||||
### 11. API Keys
|
||||
|
||||
When running ```crewai create crew``` command, the CLI will first show you the top 5 most common LLM providers and ask you to select one.
|
||||
When running ```crewai create crew``` command, the CLI will show you a list of available LLM providers to choose from, followed by model selection for your chosen provider.
|
||||
|
||||
Once you've selected an LLM provider, you will be prompted for API keys.
|
||||
Once you've selected an LLM provider and model, you will be prompted for API keys.
|
||||
|
||||
#### Initial API key providers
|
||||
#### Available LLM Providers
|
||||
|
||||
The CLI will initially prompt for API keys for the following services:
|
||||
The CLI will show you the following LLM providers to choose from:
|
||||
|
||||
* OpenAI
|
||||
* Groq
|
||||
* Anthropic
|
||||
* Google Gemini
|
||||
* NVIDIA NIM
|
||||
* Groq
|
||||
* Hugging Face
|
||||
* Ollama
|
||||
* Watson
|
||||
* AWS Bedrock
|
||||
* Azure
|
||||
* Cerebras
|
||||
* SambaNova
|
||||
|
||||
When you select a provider, the CLI will prompt you to enter your API key.
|
||||
When you select a provider, the CLI will then show you available models for that provider and prompt you to enter your API key.
|
||||
|
||||
#### Other Options
|
||||
|
||||
If you select option 6, you will be able to select from a list of LiteLLM supported providers.
|
||||
If you select "other", you will be able to select from a list of LiteLLM supported providers.
|
||||
|
||||
When you select a provider, the CLI will prompt you to enter the Key name and the API key.
|
||||
|
||||
|
||||
@@ -134,7 +134,7 @@
|
||||
"tools/web-scraping/stagehandtool",
|
||||
"tools/web-scraping/firecrawlcrawlwebsitetool",
|
||||
"tools/web-scraping/firecrawlscrapewebsitetool",
|
||||
"tools/web-scraping/firecrawlsearchtool"
|
||||
"tools/web-scraping/oxylabsscraperstool"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -124,7 +124,7 @@ from crewai_tools import CrewaiEnterpriseTools
|
||||
enterprise_tools = CrewaiEnterpriseTools(
|
||||
actions_list=["gmail_find_email"] # only gmail_find_email tool will be available
|
||||
)
|
||||
gmail_tool = enterprise_tools[0]
|
||||
gmail_tool = enterprise_tools["gmail_find_email"]
|
||||
|
||||
gmail_agent = Agent(
|
||||
role="Gmail Manager",
|
||||
|
||||
@@ -85,6 +85,22 @@ with MCPServerAdapter(server_params) as mcp_tools:
|
||||
```
|
||||
This general pattern shows how to integrate tools. For specific examples tailored to each transport, refer to the detailed guides below.
|
||||
|
||||
## Filtering Tools
|
||||
|
||||
```python
|
||||
with MCPServerAdapter(server_params) as mcp_tools:
|
||||
print(f"Available tools: {[tool.name for tool in mcp_tools]}")
|
||||
|
||||
my_agent = Agent(
|
||||
role="MCP Tool User",
|
||||
goal="Utilize tools from an MCP server.",
|
||||
backstory="I can connect to MCP servers and use their tools.",
|
||||
tools=mcp_tools["tool_name"], # Pass the loaded tools to your agent
|
||||
reasoning=True,
|
||||
verbose=True
|
||||
)
|
||||
# ... rest of your crew setup ...
|
||||
```
|
||||
## Explore MCP Integrations
|
||||
|
||||
<CardGroup cols={2}>
|
||||
|
||||
@@ -56,6 +56,10 @@ These tools enable your agents to interact with the web, extract data from websi
|
||||
<Card title="Stagehand Tool" icon="hand" href="/tools/web-scraping/stagehandtool">
|
||||
Intelligent browser automation with natural language commands.
|
||||
</Card>
|
||||
|
||||
<Card title="Oxylabs Scraper Tool" icon="globe" href="/tools/web-scraping/oxylabsscraperstool">
|
||||
Access web data at scale with Oxylabs.
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## **Common Use Cases**
|
||||
@@ -100,4 +104,4 @@ agent = Agent(
|
||||
- **JavaScript-Heavy Sites**: Use `SeleniumScrapingTool` for dynamic content
|
||||
- **Scale & Performance**: Use `FirecrawlScrapeWebsiteTool` for high-volume scraping
|
||||
- **Cloud Infrastructure**: Use `BrowserBaseLoadTool` for scalable browser automation
|
||||
- **Complex Workflows**: Use `StagehandTool` for intelligent browser interactions
|
||||
- **Complex Workflows**: Use `StagehandTool` for intelligent browser interactions
|
||||
|
||||
236
docs/tools/web-scraping/oxylabsscraperstool.mdx
Normal file
236
docs/tools/web-scraping/oxylabsscraperstool.mdx
Normal file
@@ -0,0 +1,236 @@
|
||||
---
|
||||
title: Oxylabs Scrapers
|
||||
description: >
|
||||
Oxylabs Scrapers allow to easily access the information from the respective sources. Please see the list of available sources below:
|
||||
- `Amazon Product`
|
||||
- `Amazon Search`
|
||||
- `Google Seach`
|
||||
- `Universal`
|
||||
icon: globe
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io).
|
||||
```shell
|
||||
pip install 'crewai[tools]' oxylabs
|
||||
```
|
||||
Check [Oxylabs Documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/targets) to get more information about API parameters.
|
||||
|
||||
# `OxylabsAmazonProductScraperTool`
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsAmazonProductScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsAmazonProductScraperTool()
|
||||
|
||||
result = tool.run(query="AAAAABBBBCC")
|
||||
|
||||
print(result)
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- `query` - 10-symbol ASIN code.
|
||||
- `domain` - domain localization for Amazon.
|
||||
- `geo_location` - the _Deliver to_ location.
|
||||
- `user_agent_type` - device type and browser.
|
||||
- `render` - enables JavaScript rendering when set to `html`.
|
||||
- `callback_url` - URL to your callback endpoint.
|
||||
- `context` - Additional advanced settings and controls for specialized requirements.
|
||||
- `parse` - returns parsed data when set to true.
|
||||
- `parsing_instructions` - define your own parsing and data transformation logic that will be executed on an HTML scraping result.
|
||||
|
||||
### Advanced example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsAmazonProductScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsAmazonProductScraperTool(
|
||||
config={
|
||||
"domain": "com",
|
||||
"parse": True,
|
||||
"context": [
|
||||
{
|
||||
"key": "autoselect_variant",
|
||||
"value": True
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
result = tool.run(query="AAAAABBBBCC")
|
||||
|
||||
print(result)
|
||||
```
|
||||
|
||||
# `OxylabsAmazonSearchScraperTool`
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsAmazonSearchScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsAmazonSearchScraperTool()
|
||||
|
||||
result = tool.run(query="headsets")
|
||||
|
||||
print(result)
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- `query` - Amazon search term.
|
||||
- `domain` - Domain localization for Bestbuy.
|
||||
- `start_page` - starting page number.
|
||||
- `pages` - number of pages to retrieve.
|
||||
- `geo_location` - the _Deliver to_ location.
|
||||
- `user_agent_type` - device type and browser.
|
||||
- `render` - enables JavaScript rendering when set to `html`.
|
||||
- `callback_url` - URL to your callback endpoint.
|
||||
- `context` - Additional advanced settings and controls for specialized requirements.
|
||||
- `parse` - returns parsed data when set to true.
|
||||
- `parsing_instructions` - define your own parsing and data transformation logic that will be executed on an HTML scraping result.
|
||||
|
||||
### Advanced example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsAmazonSearchScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsAmazonSearchScraperTool(
|
||||
config={
|
||||
"domain": 'nl',
|
||||
"start_page": 2,
|
||||
"pages": 2,
|
||||
"parse": True,
|
||||
"context": [
|
||||
{'key': 'category_id', 'value': 16391693031}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
result = tool.run(query='nirvana tshirt')
|
||||
|
||||
print(result)
|
||||
```
|
||||
|
||||
# `OxylabsGoogleSearchScraperTool`
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsGoogleSearchScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsGoogleSearchScraperTool()
|
||||
|
||||
result = tool.run(query="iPhone 16")
|
||||
|
||||
print(result)
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- `query` - search keyword.
|
||||
- `domain` - domain localization for Google.
|
||||
- `start_page` - starting page number.
|
||||
- `pages` - number of pages to retrieve.
|
||||
- `limit` - number of results to retrieve in each page.
|
||||
- `locale` - `Accept-Language` header value which changes your Google search page web interface language.
|
||||
- `geo_location` - the geographical location that the result should be adapted for. Using this parameter correctly is extremely important to get the right data.
|
||||
- `user_agent_type` - device type and browser.
|
||||
- `render` - enables JavaScript rendering when set to `html`.
|
||||
- `callback_url` - URL to your callback endpoint.
|
||||
- `context` - Additional advanced settings and controls for specialized requirements.
|
||||
- `parse` - returns parsed data when set to true.
|
||||
- `parsing_instructions` - define your own parsing and data transformation logic that will be executed on an HTML scraping result.
|
||||
|
||||
### Advanced example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsGoogleSearchScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsGoogleSearchScraperTool(
|
||||
config={
|
||||
"parse": True,
|
||||
"geo_location": "Paris, France",
|
||||
"user_agent_type": "tablet",
|
||||
}
|
||||
)
|
||||
|
||||
result = tool.run(query="iPhone 16")
|
||||
|
||||
print(result)
|
||||
```
|
||||
|
||||
# `OxylabsUniversalScraperTool`
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsUniversalScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsUniversalScraperTool()
|
||||
|
||||
result = tool.run(url="https://ip.oxylabs.io")
|
||||
|
||||
print(result)
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- `url` - website url to scrape.
|
||||
- `user_agent_type` - device type and browser.
|
||||
- `geo_location` - sets the proxy's geolocation to retrieve data.
|
||||
- `render` - enables JavaScript rendering when set to `html`.
|
||||
- `callback_url` - URL to your callback endpoint.
|
||||
- `context` - Additional advanced settings and controls for specialized requirements.
|
||||
- `parse` - returns parsed data when set to `true`, as long as a dedicated parser exists for the submitted URL's page type.
|
||||
- `parsing_instructions` - define your own parsing and data transformation logic that will be executed on an HTML scraping result.
|
||||
|
||||
|
||||
### Advanced example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsUniversalScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsUniversalScraperTool(
|
||||
config={
|
||||
"render": "html",
|
||||
"user_agent_type": "mobile",
|
||||
"context": [
|
||||
{"key": "force_headers", "value": True},
|
||||
{"key": "force_cookies", "value": True},
|
||||
{
|
||||
"key": "headers",
|
||||
"value": {
|
||||
"Custom-Header-Name": "custom header content",
|
||||
},
|
||||
},
|
||||
{
|
||||
"key": "cookies",
|
||||
"value": [
|
||||
{"key": "NID", "value": "1234567890"},
|
||||
{"key": "1P JAR", "value": "0987654321"},
|
||||
],
|
||||
},
|
||||
{"key": "http_method", "value": "get"},
|
||||
{"key": "follow_redirects", "value": True},
|
||||
{"key": "successful_status_codes", "value": [808, 909]},
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
result = tool.run(url="https://ip.oxylabs.io")
|
||||
|
||||
print(result)
|
||||
```
|
||||
@@ -476,7 +476,14 @@ def load_agent_from_repository(from_repository: str) -> Dict[str, Any]:
|
||||
try:
|
||||
module = importlib.import_module(tool["module"])
|
||||
tool_class = getattr(module, tool["name"])
|
||||
attributes[key].append(tool_class())
|
||||
|
||||
tool_value = tool_class(**tool["init_params"])
|
||||
|
||||
if isinstance(tool_value, list):
|
||||
attributes[key].extend(tool_value)
|
||||
else:
|
||||
attributes[key].append(tool_value)
|
||||
|
||||
except Exception as e:
|
||||
raise AgentRepositoryError(
|
||||
f"Tool {tool['name']} could not be loaded: {e}"
|
||||
|
||||
@@ -2099,7 +2099,7 @@ def mock_get_auth_token():
|
||||
|
||||
@patch("crewai.cli.plus_api.PlusAPI.get_agent")
|
||||
def test_agent_from_repository(mock_get_agent, mock_get_auth_token):
|
||||
from crewai_tools import SerperDevTool, XMLSearchTool
|
||||
from crewai_tools import SerperDevTool, XMLSearchTool, CSVSearchTool, EnterpriseActionTool
|
||||
|
||||
mock_get_response = MagicMock()
|
||||
mock_get_response.status_code = 200
|
||||
@@ -2108,19 +2108,42 @@ def test_agent_from_repository(mock_get_agent, mock_get_auth_token):
|
||||
"goal": "test goal",
|
||||
"backstory": "test backstory",
|
||||
"tools": [
|
||||
{"module": "crewai_tools", "name": "SerperDevTool"},
|
||||
{"module": "crewai_tools", "name": "XMLSearchTool"},
|
||||
{"module": "crewai_tools", "name": "SerperDevTool", "init_params": {"n_results": 30}},
|
||||
{"module": "crewai_tools", "name": "XMLSearchTool", "init_params": {"summarize": True}},
|
||||
{"module": "crewai_tools", "name": "CSVSearchTool", "init_params": {}},
|
||||
|
||||
# using a tools that returns a list of BaseTools
|
||||
{"module": "crewai_tools", "name": "CrewaiEnterpriseTools", "init_params": {"actions_list": [], "enterprise_token": "test_key"}},
|
||||
],
|
||||
}
|
||||
mock_get_agent.return_value = mock_get_response
|
||||
agent = Agent(from_repository="test_agent")
|
||||
|
||||
tool_action = EnterpriseActionTool(
|
||||
name="test_name",
|
||||
description="test_description",
|
||||
enterprise_action_token="test_token",
|
||||
action_name="test_action_name",
|
||||
action_schema={"test": "test"},
|
||||
)
|
||||
|
||||
with patch("crewai_tools.CrewaiEnterpriseTools", return_value=[tool_action]):
|
||||
agent = Agent(from_repository="test_agent")
|
||||
|
||||
assert agent.role == "test role"
|
||||
assert agent.goal == "test goal"
|
||||
assert agent.backstory == "test backstory"
|
||||
assert len(agent.tools) == 2
|
||||
assert len(agent.tools) == 4
|
||||
|
||||
assert isinstance(agent.tools[0], SerperDevTool)
|
||||
assert agent.tools[0].n_results == 30
|
||||
assert isinstance(agent.tools[1], XMLSearchTool)
|
||||
assert agent.tools[1].summarize
|
||||
|
||||
assert isinstance(agent.tools[2], CSVSearchTool)
|
||||
assert not agent.tools[2].summarize
|
||||
|
||||
assert isinstance(agent.tools[3], EnterpriseActionTool)
|
||||
assert agent.tools[3].name == "test_name"
|
||||
|
||||
|
||||
@patch("crewai.cli.plus_api.PlusAPI.get_agent")
|
||||
@@ -2133,7 +2156,7 @@ def test_agent_from_repository_override_attributes(mock_get_agent, mock_get_auth
|
||||
"role": "test role",
|
||||
"goal": "test goal",
|
||||
"backstory": "test backstory",
|
||||
"tools": [{"name": "SerperDevTool", "module": "crewai_tools"}],
|
||||
"tools": [{"name": "SerperDevTool", "module": "crewai_tools", "init_params": {}}],
|
||||
}
|
||||
mock_get_agent.return_value = mock_get_response
|
||||
agent = Agent(from_repository="test_agent", role="Custom Role")
|
||||
|
||||
43
tests/test_cli_documentation_sync.py
Normal file
43
tests/test_cli_documentation_sync.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from pathlib import Path
|
||||
|
||||
from crewai.cli.constants import PROVIDERS
|
||||
|
||||
|
||||
def test_cli_documentation_matches_providers():
|
||||
"""Test that CLI documentation accurately reflects the available providers."""
|
||||
docs_path = Path(__file__).parent.parent / "docs" / "concepts" / "cli.mdx"
|
||||
with open(docs_path, 'r') as f:
|
||||
docs_content = f.read()
|
||||
|
||||
assert "top 5" not in docs_content.lower(), "Documentation should not mention 'top 5' providers"
|
||||
assert "5 most common" not in docs_content.lower(), "Documentation should not mention '5 most common' providers"
|
||||
|
||||
assert "list of available LLM providers" in docs_content or "following LLM providers" in docs_content, \
|
||||
"Documentation should mention the availability of multiple LLM providers"
|
||||
|
||||
assert len(PROVIDERS) > 5, f"Expected more than 5 providers, but found {len(PROVIDERS)}"
|
||||
|
||||
key_providers = ["OpenAI", "Anthropic", "Gemini"]
|
||||
for provider in key_providers:
|
||||
assert provider in docs_content, f"Key provider {provider} should be mentioned in documentation"
|
||||
|
||||
|
||||
def test_providers_list_matches_constants():
|
||||
"""Test that the actual PROVIDERS list has the expected providers."""
|
||||
expected_providers = [
|
||||
"openai",
|
||||
"anthropic",
|
||||
"gemini",
|
||||
"nvidia_nim",
|
||||
"groq",
|
||||
"huggingface",
|
||||
"ollama",
|
||||
"watson",
|
||||
"bedrock",
|
||||
"azure",
|
||||
"cerebras",
|
||||
"sambanova",
|
||||
]
|
||||
|
||||
assert PROVIDERS == expected_providers, f"PROVIDERS list has changed. Expected {expected_providers}, got {PROVIDERS}"
|
||||
assert len(PROVIDERS) == 12, f"Expected 12 providers, but found {len(PROVIDERS)}"
|
||||
Reference in New Issue
Block a user