mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-01 23:32:39 +00:00
Squashed 'packages/tools/' content from commit 78317b9c
git-subtree-dir: packages/tools git-subtree-split: 78317b9c127f18bd040c1d77e3c0840cdc9a5b38
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
# OxylabsGoogleSearchScraperTool
|
||||
|
||||
Scrape any website with `OxylabsGoogleSearchScraperTool`
|
||||
|
||||
## Installation
|
||||
|
||||
```
|
||||
pip install 'crewai[tools]' oxylabs
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsGoogleSearchScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsGoogleSearchScraperTool()
|
||||
|
||||
result = tool.run(query="iPhone 16")
|
||||
|
||||
print(result)
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `username`: Oxylabs username.
|
||||
- `password`: Oxylabs password.
|
||||
|
||||
Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io).
|
||||
|
||||
## Advanced example
|
||||
|
||||
Check out the Oxylabs [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/google/search/search) to get the full list of parameters.
|
||||
|
||||
```python
|
||||
from crewai_tools import OxylabsGoogleSearchScraperTool
|
||||
|
||||
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
|
||||
tool = OxylabsGoogleSearchScraperTool(
|
||||
config={
|
||||
"parse": True,
|
||||
"geo_location": "Paris, France",
|
||||
"user_agent_type": "tablet",
|
||||
}
|
||||
)
|
||||
|
||||
result = tool.run(query="iPhone 16")
|
||||
|
||||
print(result)
|
||||
```
|
||||
@@ -0,0 +1,160 @@
|
||||
import json
|
||||
import os
|
||||
from importlib.metadata import version
|
||||
from platform import architecture, python_version
|
||||
from typing import Any, List, Type
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
try:
|
||||
from oxylabs import RealtimeClient
|
||||
from oxylabs.sources.response import Response as OxylabsResponse
|
||||
|
||||
OXYLABS_AVAILABLE = True
|
||||
except ImportError:
|
||||
RealtimeClient = Any
|
||||
OxylabsResponse = Any
|
||||
|
||||
OXYLABS_AVAILABLE = False
|
||||
|
||||
|
||||
__all__ = ["OxylabsGoogleSearchScraperTool", "OxylabsGoogleSearchScraperConfig"]
|
||||
|
||||
|
||||
class OxylabsGoogleSearchScraperArgs(BaseModel):
|
||||
query: str = Field(description="Search query")
|
||||
|
||||
|
||||
class OxylabsGoogleSearchScraperConfig(BaseModel):
|
||||
"""
|
||||
Google Search Scraper configuration options:
|
||||
https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/google/search/search
|
||||
"""
|
||||
|
||||
domain: str | None = Field(
|
||||
None, description="The domain to limit the search results to."
|
||||
)
|
||||
start_page: int | None = Field(None, description="The starting page number.")
|
||||
pages: int | None = Field(None, description="The number of pages to scrape.")
|
||||
limit: int | None = Field(
|
||||
None, description="Number of results to retrieve in each page."
|
||||
)
|
||||
geo_location: str | None = Field(None, description="The Deliver to location.")
|
||||
user_agent_type: str | None = Field(None, description="Device type and browser.")
|
||||
render: str | None = Field(None, description="Enables JavaScript rendering.")
|
||||
callback_url: str | None = Field(None, description="URL to your callback endpoint.")
|
||||
context: list | None = Field(
|
||||
None,
|
||||
description="Additional advanced settings and controls for specialized requirements.",
|
||||
)
|
||||
parse: bool | None = Field(None, description="True will return structured data.")
|
||||
parsing_instructions: dict | None = Field(
|
||||
None, description="Instructions for parsing the results."
|
||||
)
|
||||
|
||||
|
||||
class OxylabsGoogleSearchScraperTool(BaseTool):
|
||||
"""
|
||||
Scrape Google Search results with OxylabsGoogleSearchScraperTool.
|
||||
|
||||
Get Oxylabs account:
|
||||
https://dashboard.oxylabs.io/en
|
||||
|
||||
Args:
|
||||
username (str): Oxylabs username.
|
||||
password (str): Oxylabs password.
|
||||
config: Configuration options. See ``OxylabsGoogleSearchScraperConfig``
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
validate_assignment=True,
|
||||
)
|
||||
name: str = "Oxylabs Google Search Scraper tool"
|
||||
description: str = "Scrape Google Search results with Oxylabs Google Search Scraper"
|
||||
args_schema: Type[BaseModel] = OxylabsGoogleSearchScraperArgs
|
||||
|
||||
oxylabs_api: RealtimeClient
|
||||
config: OxylabsGoogleSearchScraperConfig
|
||||
package_dependencies: List[str] = ["oxylabs"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True),
|
||||
EnvVar(name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True),
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
config: OxylabsGoogleSearchScraperConfig
|
||||
| dict = OxylabsGoogleSearchScraperConfig(),
|
||||
**kwargs,
|
||||
):
|
||||
bits, _ = architecture()
|
||||
sdk_type = (
|
||||
f"oxylabs-crewai-sdk-python/"
|
||||
f"{version('crewai')} "
|
||||
f"({python_version()}; {bits})"
|
||||
)
|
||||
|
||||
if username is None or password is None:
|
||||
username, password = self._get_credentials_from_env()
|
||||
|
||||
if OXYLABS_AVAILABLE:
|
||||
# import RealtimeClient to make it accessible for the current scope
|
||||
from oxylabs import RealtimeClient
|
||||
|
||||
kwargs["oxylabs_api"] = RealtimeClient(
|
||||
username=username,
|
||||
password=password,
|
||||
sdk_type=sdk_type,
|
||||
)
|
||||
else:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"You are missing the 'oxylabs' package. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
subprocess.run(["uv", "add", "oxylabs"], check=True)
|
||||
from oxylabs import RealtimeClient
|
||||
|
||||
kwargs["oxylabs_api"] = RealtimeClient(
|
||||
username=username,
|
||||
password=password,
|
||||
sdk_type=sdk_type,
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
raise ImportError("Failed to install oxylabs package")
|
||||
else:
|
||||
raise ImportError(
|
||||
"`oxylabs` package not found, please run `uv add oxylabs`"
|
||||
)
|
||||
|
||||
super().__init__(config=config, **kwargs)
|
||||
|
||||
def _get_credentials_from_env(self) -> tuple[str, str]:
|
||||
username = os.environ.get("OXYLABS_USERNAME")
|
||||
password = os.environ.get("OXYLABS_PASSWORD")
|
||||
if not username or not password:
|
||||
raise ValueError(
|
||||
"You must pass oxylabs username and password when instantiating the tool "
|
||||
"or specify OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables"
|
||||
)
|
||||
return username, password
|
||||
|
||||
def _run(self, query: str, **kwargs) -> str:
|
||||
response = self.oxylabs_api.google.scrape_search(
|
||||
query,
|
||||
**self.config.model_dump(exclude_none=True),
|
||||
)
|
||||
|
||||
content = response.results[0].content
|
||||
|
||||
if isinstance(content, dict):
|
||||
return json.dumps(content)
|
||||
|
||||
return content
|
||||
Reference in New Issue
Block a user