Squashed 'packages/tools/' content from commit 78317b9c

git-subtree-dir: packages/tools
git-subtree-split: 78317b9c127f18bd040c1d77e3c0840cdc9a5b38
This commit is contained in:
Greyson Lalonde
2025-09-12 21:58:02 -04:00
commit e16606672a
303 changed files with 49010 additions and 0 deletions

View File

@@ -0,0 +1,50 @@
# OxylabsGoogleSearchScraperTool
Scrape any website with `OxylabsGoogleSearchScraperTool`
## Installation
```
pip install 'crewai[tools]' oxylabs
```
## Example
```python
from crewai_tools import OxylabsGoogleSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsGoogleSearchScraperTool()
result = tool.run(query="iPhone 16")
print(result)
```
## Arguments
- `username`: Oxylabs username.
- `password`: Oxylabs password.
Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io).
## Advanced example
Check out the Oxylabs [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/google/search/search) to get the full list of parameters.
```python
from crewai_tools import OxylabsGoogleSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsGoogleSearchScraperTool(
config={
"parse": True,
"geo_location": "Paris, France",
"user_agent_type": "tablet",
}
)
result = tool.run(query="iPhone 16")
print(result)
```

View File

@@ -0,0 +1,160 @@
import json
import os
from importlib.metadata import version
from platform import architecture, python_version
from typing import Any, List, Type
from crewai.tools import BaseTool, EnvVar
from pydantic import BaseModel, ConfigDict, Field
try:
from oxylabs import RealtimeClient
from oxylabs.sources.response import Response as OxylabsResponse
OXYLABS_AVAILABLE = True
except ImportError:
RealtimeClient = Any
OxylabsResponse = Any
OXYLABS_AVAILABLE = False
__all__ = ["OxylabsGoogleSearchScraperTool", "OxylabsGoogleSearchScraperConfig"]
class OxylabsGoogleSearchScraperArgs(BaseModel):
query: str = Field(description="Search query")
class OxylabsGoogleSearchScraperConfig(BaseModel):
"""
Google Search Scraper configuration options:
https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/google/search/search
"""
domain: str | None = Field(
None, description="The domain to limit the search results to."
)
start_page: int | None = Field(None, description="The starting page number.")
pages: int | None = Field(None, description="The number of pages to scrape.")
limit: int | None = Field(
None, description="Number of results to retrieve in each page."
)
geo_location: str | None = Field(None, description="The Deliver to location.")
user_agent_type: str | None = Field(None, description="Device type and browser.")
render: str | None = Field(None, description="Enables JavaScript rendering.")
callback_url: str | None = Field(None, description="URL to your callback endpoint.")
context: list | None = Field(
None,
description="Additional advanced settings and controls for specialized requirements.",
)
parse: bool | None = Field(None, description="True will return structured data.")
parsing_instructions: dict | None = Field(
None, description="Instructions for parsing the results."
)
class OxylabsGoogleSearchScraperTool(BaseTool):
"""
Scrape Google Search results with OxylabsGoogleSearchScraperTool.
Get Oxylabs account:
https://dashboard.oxylabs.io/en
Args:
username (str): Oxylabs username.
password (str): Oxylabs password.
config: Configuration options. See ``OxylabsGoogleSearchScraperConfig``
"""
model_config = ConfigDict(
arbitrary_types_allowed=True,
validate_assignment=True,
)
name: str = "Oxylabs Google Search Scraper tool"
description: str = "Scrape Google Search results with Oxylabs Google Search Scraper"
args_schema: Type[BaseModel] = OxylabsGoogleSearchScraperArgs
oxylabs_api: RealtimeClient
config: OxylabsGoogleSearchScraperConfig
package_dependencies: List[str] = ["oxylabs"]
env_vars: List[EnvVar] = [
EnvVar(name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True),
EnvVar(name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True),
]
def __init__(
self,
username: str | None = None,
password: str | None = None,
config: OxylabsGoogleSearchScraperConfig
| dict = OxylabsGoogleSearchScraperConfig(),
**kwargs,
):
bits, _ = architecture()
sdk_type = (
f"oxylabs-crewai-sdk-python/"
f"{version('crewai')} "
f"({python_version()}; {bits})"
)
if username is None or password is None:
username, password = self._get_credentials_from_env()
if OXYLABS_AVAILABLE:
# import RealtimeClient to make it accessible for the current scope
from oxylabs import RealtimeClient
kwargs["oxylabs_api"] = RealtimeClient(
username=username,
password=password,
sdk_type=sdk_type,
)
else:
import click
if click.confirm(
"You are missing the 'oxylabs' package. Would you like to install it?"
):
import subprocess
try:
subprocess.run(["uv", "add", "oxylabs"], check=True)
from oxylabs import RealtimeClient
kwargs["oxylabs_api"] = RealtimeClient(
username=username,
password=password,
sdk_type=sdk_type,
)
except subprocess.CalledProcessError:
raise ImportError("Failed to install oxylabs package")
else:
raise ImportError(
"`oxylabs` package not found, please run `uv add oxylabs`"
)
super().__init__(config=config, **kwargs)
def _get_credentials_from_env(self) -> tuple[str, str]:
username = os.environ.get("OXYLABS_USERNAME")
password = os.environ.get("OXYLABS_PASSWORD")
if not username or not password:
raise ValueError(
"You must pass oxylabs username and password when instantiating the tool "
"or specify OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables"
)
return username, password
def _run(self, query: str, **kwargs) -> str:
response = self.oxylabs_api.google.scrape_search(
query,
**self.config.model_dump(exclude_none=True),
)
content = response.results[0].content
if isinstance(content, dict):
return json.dumps(content)
return content