Amazing product
+Price $14.99
+Good product
+Price $9.99
+diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index f49e4149b..36624f355 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -37,6 +37,10 @@ from .tools import ( MultiOnTool, MySQLSearchTool, NL2SQLTool, + OxylabsUniversalScraperTool, + OxylabsGoogleSearchScraperTool, + OxylabsAmazonProductScraperTool, + OxylabsAmazonSearchScraperTool, PatronusEvalTool, PatronusLocalEvaluatorTool, PatronusPredefinedCriteriaEvalTool, diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index c10f152ef..957d2f1e2 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -32,6 +32,18 @@ from .mdx_search_tool.mdx_search_tool import MDXSearchTool from .multion_tool.multion_tool import MultiOnTool from .mysql_search_tool.mysql_search_tool import MySQLSearchTool from .nl2sql.nl2sql_tool import NL2SQLTool +from .oxylabs_universal_scraper_tool.oxylabs_universal_scraper_tool import ( + OxylabsUniversalScraperTool, +) +from .oxylabs_google_search_scraper_tool.oxylabs_google_search_scraper_tool import ( + OxylabsGoogleSearchScraperTool, +) +from .oxylabs_amazon_product_scraper_tool.oxylabs_amazon_product_scraper_tool import ( + OxylabsAmazonProductScraperTool, +) +from .oxylabs_amazon_search_scraper_tool.oxylabs_amazon_search_scraper_tool import ( + OxylabsAmazonSearchScraperTool, +) from .patronus_eval_tool import ( PatronusEvalTool, PatronusLocalEvaluatorTool, diff --git a/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/README.md b/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/README.md new file mode 100644 index 000000000..f87c70c19 --- /dev/null +++ b/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/README.md @@ -0,0 +1,55 @@ +# OxylabsAmazonProductScraperTool + +Scrape any website with `OxylabsAmazonProductScraperTool` + +## Installation + +``` +pip install 'crewai[tools]' oxylabs +``` + +## Example + +```python +from crewai_tools import OxylabsAmazonProductScraperTool + +# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set +tool = OxylabsAmazonProductScraperTool() + +result = tool.run(query="AAAAABBBBCC") + +print(result) +``` + +## Arguments + +- `username`: Oxylabs username. +- `password`: Oxylabs password. + +Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io). + +## Advanced example + +Check out the Oxylabs [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/product) to get the full list of parameters. + +```python +from crewai_tools import OxylabsAmazonProductScraperTool + +# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set +tool = OxylabsAmazonProductScraperTool( + config={ + "domain": "com", + "parse": True, + "context": [ + { + "key": "autoselect_variant", + "value": True + } + ] + } +) + +result = tool.run(query="AAAAABBBBCC") + +print(result) +``` diff --git a/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/oxylabs_amazon_product_scraper_tool.py b/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/oxylabs_amazon_product_scraper_tool.py new file mode 100644 index 000000000..d763fa86f --- /dev/null +++ b/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/oxylabs_amazon_product_scraper_tool.py @@ -0,0 +1,151 @@ +import json +import os +from importlib.metadata import version +from platform import architecture, python_version +from typing import Any, List, Type + +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field + +try: + from oxylabs import RealtimeClient + from oxylabs.sources.response import Response as OxylabsResponse + + OXYLABS_AVAILABLE = True +except ImportError: + RealtimeClient = Any + OxylabsResponse = Any + + OXYLABS_AVAILABLE = False + + +__all__ = ["OxylabsAmazonProductScraperTool", "OxylabsAmazonProductScraperConfig"] + + +class OxylabsAmazonProductScraperArgs(BaseModel): + query: str = Field(description="Amazon product ASIN") + + +class OxylabsAmazonProductScraperConfig(BaseModel): + """ + Amazon Product Scraper configuration options: + https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/product + """ + + domain: str | None = Field( + None, description="The domain to limit the search results to." + ) + geo_location: str | None = Field(None, description="The Deliver to location.") + user_agent_type: str | None = Field(None, description="Device type and browser.") + render: str | None = Field(None, description="Enables JavaScript rendering.") + callback_url: str | None = Field(None, description="URL to your callback endpoint.") + context: list | None = Field( + None, + description="Additional advanced settings and controls for specialized requirements.", + ) + parse: bool | None = Field(None, description="True will return structured data.") + parsing_instructions: dict | None = Field( + None, description="Instructions for parsing the results." + ) + + +class OxylabsAmazonProductScraperTool(BaseTool): + """ + Scrape Amazon product pages with OxylabsAmazonProductScraperTool. + + Get Oxylabs account: + https://dashboard.oxylabs.io/en + + Args: + username (str): Oxylabs username. + password (str): Oxylabs password. + config: Configuration options. See ``OxylabsAmazonProductScraperConfig`` + """ + + model_config = ConfigDict( + arbitrary_types_allowed=True, + validate_assignment=True, + ) + name: str = "Oxylabs Amazon Product Scraper tool" + description: str = "Scrape Amazon product pages with Oxylabs Amazon Product Scraper" + args_schema: Type[BaseModel] = OxylabsAmazonProductScraperArgs + + oxylabs_api: RealtimeClient + config: OxylabsAmazonProductScraperConfig + package_dependencies: List[str] = ["oxylabs"] + + def __init__( + self, + username: str | None = None, + password: str | None = None, + config: OxylabsAmazonProductScraperConfig + | dict = OxylabsAmazonProductScraperConfig(), + **kwargs, + ) -> None: + bits, _ = architecture() + sdk_type = ( + f"oxylabs-crewai-sdk-python/" + f"{version('crewai')} " + f"({python_version()}; {bits})" + ) + + if username is None or password is None: + username, password = self._get_credentials_from_env() + + if OXYLABS_AVAILABLE: + # import RealtimeClient to make it accessible for the current scope + from oxylabs import RealtimeClient + + kwargs["oxylabs_api"] = RealtimeClient( + username=username, + password=password, + sdk_type=sdk_type, + ) + else: + import click + + if click.confirm( + "You are missing the 'oxylabs' package. Would you like to install it?" + ): + import subprocess + + try: + subprocess.run(["uv", "add", "oxylabs"], check=True) + from oxylabs import RealtimeClient + + kwargs["oxylabs_api"] = RealtimeClient( + username=username, + password=password, + sdk_type=sdk_type, + ) + except subprocess.CalledProcessError: + raise ImportError("Failed to install oxylabs package") + else: + raise ImportError( + "`oxylabs` package not found, please run `uv add oxylabs`" + ) + + super().__init__(config=config, **kwargs) + + def _get_credentials_from_env(self) -> tuple[str, str]: + username = os.environ.get("OXYLABS_USERNAME") + password = os.environ.get("OXYLABS_PASSWORD") + if not username or not password: + raise ValueError( + "You must pass oxylabs username and password when instantiating the tool " + "or specify OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables" + ) + return username, password + + def _run(self, query: str) -> str: + response = self.oxylabs_api.amazon.scrape_product( + query, + **self.config.model_dump(exclude_none=True), + ) + + content = response.results[0].content + + if isinstance(content, dict): + return json.dumps(content) + + return content diff --git a/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/README.md b/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/README.md new file mode 100644 index 000000000..b0e2ef7b0 --- /dev/null +++ b/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/README.md @@ -0,0 +1,54 @@ +# OxylabsAmazonSearchScraperTool + +Scrape any website with `OxylabsAmazonSearchScraperTool` + +## Installation + +``` +pip install 'crewai[tools]' oxylabs +``` + +## Example + +```python +from crewai_tools import OxylabsAmazonSearchScraperTool + +# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set +tool = OxylabsAmazonSearchScraperTool() + +result = tool.run(query="headsets") + +print(result) +``` + +## Arguments + +- `username`: Oxylabs username. +- `password`: Oxylabs password. + +Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io). + +## Advanced example + +Check out the Oxylabs [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/search) to get the full list of parameters. + +```python +from crewai_tools import OxylabsAmazonSearchScraperTool + +# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set +tool = OxylabsAmazonSearchScraperTool( + config={ + "domain": 'nl', + "start_page": 2, + "pages": 2, + "parse": True, + "context": [ + {'key': 'category_id', 'value': 16391693031} + ], + } +) + +result = tool.run(query='nirvana tshirt') + +print(result) +``` diff --git a/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/oxylabs_amazon_search_scraper_tool.py b/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/oxylabs_amazon_search_scraper_tool.py new file mode 100644 index 000000000..9a113e93a --- /dev/null +++ b/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/oxylabs_amazon_search_scraper_tool.py @@ -0,0 +1,153 @@ +import json +import os +from importlib.metadata import version +from platform import architecture, python_version +from typing import Any, List, Type + +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field + +try: + from oxylabs import RealtimeClient + from oxylabs.sources.response import Response as OxylabsResponse + + OXYLABS_AVAILABLE = True +except ImportError: + RealtimeClient = Any + OxylabsResponse = Any + + OXYLABS_AVAILABLE = False + + +__all__ = ["OxylabsAmazonSearchScraperTool", "OxylabsAmazonSearchScraperConfig"] + + +class OxylabsAmazonSearchScraperArgs(BaseModel): + query: str = Field(description="Amazon search term") + + +class OxylabsAmazonSearchScraperConfig(BaseModel): + """ + Amazon Search Scraper configuration options: + https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/search + """ + + domain: str | None = Field( + None, description="The domain to limit the search results to." + ) + start_page: int | None = Field(None, description="The starting page number.") + pages: int | None = Field(None, description="The number of pages to scrape.") + geo_location: str | None = Field(None, description="The Deliver to location.") + user_agent_type: str | None = Field(None, description="Device type and browser.") + render: str | None = Field(None, description="Enables JavaScript rendering.") + callback_url: str | None = Field(None, description="URL to your callback endpoint.") + context: list | None = Field( + None, + description="Additional advanced settings and controls for specialized requirements.", + ) + parse: bool | None = Field(None, description="True will return structured data.") + parsing_instructions: dict | None = Field( + None, description="Instructions for parsing the results." + ) + + +class OxylabsAmazonSearchScraperTool(BaseTool): + """ + Scrape Amazon search results with OxylabsAmazonSearchScraperTool. + + Get Oxylabs account: + https://dashboard.oxylabs.io/en + + Args: + username (str): Oxylabs username. + password (str): Oxylabs password. + config: Configuration options. See ``OxylabsAmazonSearchScraperConfig`` + """ + + model_config = ConfigDict( + arbitrary_types_allowed=True, + validate_assignment=True, + ) + name: str = "Oxylabs Amazon Search Scraper tool" + description: str = "Scrape Amazon search results with Oxylabs Amazon Search Scraper" + args_schema: Type[BaseModel] = OxylabsAmazonSearchScraperArgs + + oxylabs_api: RealtimeClient + config: OxylabsAmazonSearchScraperConfig + package_dependencies: List[str] = ["oxylabs"] + + def __init__( + self, + username: str | None = None, + password: str | None = None, + config: OxylabsAmazonSearchScraperConfig + | dict = OxylabsAmazonSearchScraperConfig(), + **kwargs, + ): + bits, _ = architecture() + sdk_type = ( + f"oxylabs-crewai-sdk-python/" + f"{version('crewai')} " + f"({python_version()}; {bits})" + ) + + if username is None or password is None: + username, password = self._get_credentials_from_env() + + if OXYLABS_AVAILABLE: + # import RealtimeClient to make it accessible for the current scope + from oxylabs import RealtimeClient + + kwargs["oxylabs_api"] = RealtimeClient( + username=username, + password=password, + sdk_type=sdk_type, + ) + else: + import click + + if click.confirm( + "You are missing the 'oxylabs' package. Would you like to install it?" + ): + import subprocess + + try: + subprocess.run(["uv", "add", "oxylabs"], check=True) + from oxylabs import RealtimeClient + + kwargs["oxylabs_api"] = RealtimeClient( + username=username, + password=password, + sdk_type=sdk_type, + ) + except subprocess.CalledProcessError: + raise ImportError("Failed to install oxylabs package") + else: + raise ImportError( + "`oxylabs` package not found, please run `uv add oxylabs`" + ) + + super().__init__(config=config, **kwargs) + + def _get_credentials_from_env(self) -> tuple[str, str]: + username = os.environ.get("OXYLABS_USERNAME") + password = os.environ.get("OXYLABS_PASSWORD") + if not username or not password: + raise ValueError( + "You must pass oxylabs username and password when instantiating the tool " + "or specify OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables" + ) + return username, password + + def _run(self, query: str) -> str: + response = self.oxylabs_api.amazon.scrape_search( + query, + **self.config.model_dump(exclude_none=True), + ) + + content = response.results[0].content + + if isinstance(content, dict): + return json.dumps(content) + + return content diff --git a/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/README.md b/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/README.md new file mode 100644 index 000000000..e9448d2db --- /dev/null +++ b/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/README.md @@ -0,0 +1,50 @@ +# OxylabsGoogleSearchScraperTool + +Scrape any website with `OxylabsGoogleSearchScraperTool` + +## Installation + +``` +pip install 'crewai[tools]' oxylabs +``` + +## Example + +```python +from crewai_tools import OxylabsGoogleSearchScraperTool + +# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set +tool = OxylabsGoogleSearchScraperTool() + +result = tool.run(query="iPhone 16") + +print(result) +``` + +## Arguments + +- `username`: Oxylabs username. +- `password`: Oxylabs password. + +Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io). + +## Advanced example + +Check out the Oxylabs [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/google/search/search) to get the full list of parameters. + +```python +from crewai_tools import OxylabsGoogleSearchScraperTool + +# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set +tool = OxylabsGoogleSearchScraperTool( + config={ + "parse": True, + "geo_location": "Paris, France", + "user_agent_type": "tablet", + } +) + +result = tool.run(query="iPhone 16") + +print(result) +``` diff --git a/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/oxylabs_google_search_scraper_tool.py b/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/oxylabs_google_search_scraper_tool.py new file mode 100644 index 000000000..7de1aaa2d --- /dev/null +++ b/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/oxylabs_google_search_scraper_tool.py @@ -0,0 +1,156 @@ +import json +import os +from importlib.metadata import version +from platform import architecture, python_version +from typing import Any, List, Type + +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field + +try: + from oxylabs import RealtimeClient + from oxylabs.sources.response import Response as OxylabsResponse + + OXYLABS_AVAILABLE = True +except ImportError: + RealtimeClient = Any + OxylabsResponse = Any + + OXYLABS_AVAILABLE = False + + +__all__ = ["OxylabsGoogleSearchScraperTool", "OxylabsGoogleSearchScraperConfig"] + + +class OxylabsGoogleSearchScraperArgs(BaseModel): + query: str = Field(description="Search query") + + +class OxylabsGoogleSearchScraperConfig(BaseModel): + """ + Google Search Scraper configuration options: + https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/google/search/search + """ + + domain: str | None = Field( + None, description="The domain to limit the search results to." + ) + start_page: int | None = Field(None, description="The starting page number.") + pages: int | None = Field(None, description="The number of pages to scrape.") + limit: int | None = Field( + None, description="Number of results to retrieve in each page." + ) + geo_location: str | None = Field(None, description="The Deliver to location.") + user_agent_type: str | None = Field(None, description="Device type and browser.") + render: str | None = Field(None, description="Enables JavaScript rendering.") + callback_url: str | None = Field(None, description="URL to your callback endpoint.") + context: list | None = Field( + None, + description="Additional advanced settings and controls for specialized requirements.", + ) + parse: bool | None = Field(None, description="True will return structured data.") + parsing_instructions: dict | None = Field( + None, description="Instructions for parsing the results." + ) + + +class OxylabsGoogleSearchScraperTool(BaseTool): + """ + Scrape Google Search results with OxylabsGoogleSearchScraperTool. + + Get Oxylabs account: + https://dashboard.oxylabs.io/en + + Args: + username (str): Oxylabs username. + password (str): Oxylabs password. + config: Configuration options. See ``OxylabsGoogleSearchScraperConfig`` + """ + + model_config = ConfigDict( + arbitrary_types_allowed=True, + validate_assignment=True, + ) + name: str = "Oxylabs Google Search Scraper tool" + description: str = "Scrape Google Search results with Oxylabs Google Search Scraper" + args_schema: Type[BaseModel] = OxylabsGoogleSearchScraperArgs + + oxylabs_api: RealtimeClient + config: OxylabsGoogleSearchScraperConfig + package_dependencies: List[str] = ["oxylabs"] + + def __init__( + self, + username: str | None = None, + password: str | None = None, + config: OxylabsGoogleSearchScraperConfig + | dict = OxylabsGoogleSearchScraperConfig(), + **kwargs, + ): + bits, _ = architecture() + sdk_type = ( + f"oxylabs-crewai-sdk-python/" + f"{version('crewai')} " + f"({python_version()}; {bits})" + ) + + if username is None or password is None: + username, password = self._get_credentials_from_env() + + if OXYLABS_AVAILABLE: + # import RealtimeClient to make it accessible for the current scope + from oxylabs import RealtimeClient + + kwargs["oxylabs_api"] = RealtimeClient( + username=username, + password=password, + sdk_type=sdk_type, + ) + else: + import click + + if click.confirm( + "You are missing the 'oxylabs' package. Would you like to install it?" + ): + import subprocess + + try: + subprocess.run(["uv", "add", "oxylabs"], check=True) + from oxylabs import RealtimeClient + + kwargs["oxylabs_api"] = RealtimeClient( + username=username, + password=password, + sdk_type=sdk_type, + ) + except subprocess.CalledProcessError: + raise ImportError("Failed to install oxylabs package") + else: + raise ImportError( + "`oxylabs` package not found, please run `uv add oxylabs`" + ) + + super().__init__(config=config, **kwargs) + + def _get_credentials_from_env(self) -> tuple[str, str]: + username = os.environ.get("OXYLABS_USERNAME") + password = os.environ.get("OXYLABS_PASSWORD") + if not username or not password: + raise ValueError( + "You must pass oxylabs username and password when instantiating the tool " + "or specify OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables" + ) + return username, password + + def _run(self, query: str, **kwargs) -> str: + response = self.oxylabs_api.google.scrape_search( + query, + **self.config.model_dump(exclude_none=True), + ) + + content = response.results[0].content + + if isinstance(content, dict): + return json.dumps(content) + + return content diff --git a/src/crewai_tools/tools/oxylabs_universal_scraper_tool/README.md b/src/crewai_tools/tools/oxylabs_universal_scraper_tool/README.md new file mode 100644 index 000000000..82f345a65 --- /dev/null +++ b/src/crewai_tools/tools/oxylabs_universal_scraper_tool/README.md @@ -0,0 +1,69 @@ +# OxylabsUniversalScraperTool + +Scrape any website with `OxylabsUniversalScraperTool` + +## Installation + +``` +pip install 'crewai[tools]' oxylabs +``` + +## Example + +```python +from crewai_tools import OxylabsUniversalScraperTool + +# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set +tool = OxylabsUniversalScraperTool() + +result = tool.run(url="https://ip.oxylabs.io") + +print(result) +``` + +## Arguments + +- `username`: Oxylabs username. +- `password`: Oxylabs password. + +Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io). + +## Advanced example + +Check out the Oxylabs [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/other-websites) to get the full list of parameters. + +```python +from crewai_tools import OxylabsUniversalScraperTool + +# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set +tool = OxylabsUniversalScraperTool( + config={ + "render": "html", + "user_agent_type": "mobile", + "context": [ + {"key": "force_headers", "value": True}, + {"key": "force_cookies", "value": True}, + { + "key": "headers", + "value": { + "Custom-Header-Name": "custom header content", + }, + }, + { + "key": "cookies", + "value": [ + {"key": "NID", "value": "1234567890"}, + {"key": "1P JAR", "value": "0987654321"}, + ], + }, + {"key": "http_method", "value": "get"}, + {"key": "follow_redirects", "value": True}, + {"key": "successful_status_codes", "value": [808, 909]}, + ], + } +) + +result = tool.run(url="https://ip.oxylabs.io") + +print(result) +``` diff --git a/src/crewai_tools/tools/oxylabs_universal_scraper_tool/oxylabs_universal_scraper_tool.py b/src/crewai_tools/tools/oxylabs_universal_scraper_tool/oxylabs_universal_scraper_tool.py new file mode 100644 index 000000000..22d02f91f --- /dev/null +++ b/src/crewai_tools/tools/oxylabs_universal_scraper_tool/oxylabs_universal_scraper_tool.py @@ -0,0 +1,146 @@ +import json +import os +from importlib.metadata import version +from platform import architecture, python_version +from typing import Any, List, Type + +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field + +try: + from oxylabs import RealtimeClient + from oxylabs.sources.response import Response as OxylabsResponse + + OXYLABS_AVAILABLE = True +except ImportError: + RealtimeClient = Any + OxylabsResponse = Any + + OXYLABS_AVAILABLE = False + +__all__ = ["OxylabsUniversalScraperTool", "OxylabsUniversalScraperConfig"] + + +class OxylabsUniversalScraperArgs(BaseModel): + url: str = Field(description="Website URL") + + +class OxylabsUniversalScraperConfig(BaseModel): + """ + Universal Scraper configuration options: + https://developers.oxylabs.io/scraper-apis/web-scraper-api/other-websites + """ + + geo_location: str | None = Field(None, description="The Deliver to location.") + user_agent_type: str | None = Field(None, description="Device type and browser.") + render: str | None = Field(None, description="Enables JavaScript rendering.") + callback_url: str | None = Field(None, description="URL to your callback endpoint.") + context: list | None = Field( + None, + description="Additional advanced settings and controls for specialized requirements.", + ) + parse: bool | None = Field(None, description="True will return structured data.") + parsing_instructions: dict | None = Field( + None, description="Instructions for parsing the results." + ) + + +class OxylabsUniversalScraperTool(BaseTool): + """ + Scrape any website with OxylabsUniversalScraperTool. + + Get Oxylabs account: + https://dashboard.oxylabs.io/en + + Args: + username (str): Oxylabs username. + password (str): Oxylabs password. + config: Configuration options. See ``OxylabsUniversalScraperConfig`` + """ + + model_config = ConfigDict( + arbitrary_types_allowed=True, + validate_assignment=True, + ) + name: str = "Oxylabs Universal Scraper tool" + description: str = "Scrape any url with Oxylabs Universal Scraper" + args_schema: Type[BaseModel] = OxylabsUniversalScraperArgs + + oxylabs_api: RealtimeClient + config: OxylabsUniversalScraperConfig + package_dependencies: List[str] = ["oxylabs"] + + def __init__( + self, + username: str | None = None, + password: str | None = None, + config: OxylabsUniversalScraperConfig | dict = OxylabsUniversalScraperConfig(), + **kwargs, + ): + bits, _ = architecture() + sdk_type = ( + f"oxylabs-crewai-sdk-python/" + f"{version('crewai')} " + f"({python_version()}; {bits})" + ) + + if username is None or password is None: + username, password = self._get_credentials_from_env() + + if OXYLABS_AVAILABLE: + # import RealtimeClient to make it accessible for the current scope + from oxylabs import RealtimeClient + + kwargs["oxylabs_api"] = RealtimeClient( + username=username, + password=password, + sdk_type=sdk_type, + ) + else: + import click + + if click.confirm( + "You are missing the 'oxylabs' package. Would you like to install it?" + ): + import subprocess + + try: + subprocess.run(["uv", "add", "oxylabs"], check=True) + from oxylabs import RealtimeClient + + kwargs["oxylabs_api"] = RealtimeClient( + username=username, + password=password, + sdk_type=sdk_type, + ) + except subprocess.CalledProcessError: + raise ImportError("Failed to install oxylabs package") + else: + raise ImportError( + "`oxylabs` package not found, please run `uv add oxylabs`" + ) + + super().__init__(config=config, **kwargs) + + def _get_credentials_from_env(self) -> tuple[str, str]: + username = os.environ.get("OXYLABS_USERNAME") + password = os.environ.get("OXYLABS_PASSWORD") + if not username or not password: + raise ValueError( + "You must pass oxylabs username and password when instantiating the tool " + "or specify OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables" + ) + return username, password + + def _run(self, url: str) -> str: + response = self.oxylabs_api.universal.scrape_url( + url, + **self.config.model_dump(exclude_none=True), + ) + + content = response.results[0].content + + if isinstance(content, dict): + return json.dumps(content) + + return content diff --git a/tests/tools/test_oxylabs_tools.py b/tests/tools/test_oxylabs_tools.py new file mode 100644 index 000000000..3fd3feca3 --- /dev/null +++ b/tests/tools/test_oxylabs_tools.py @@ -0,0 +1,163 @@ +import json +import os +from typing import Type +from unittest.mock import MagicMock + +import pytest +from crewai.tools.base_tool import BaseTool +from oxylabs import RealtimeClient +from oxylabs.sources.response import Response as OxylabsResponse +from pydantic import BaseModel + +from crewai_tools import ( + OxylabsAmazonProductScraperTool, + OxylabsAmazonSearchScraperTool, + OxylabsGoogleSearchScraperTool, + OxylabsUniversalScraperTool, +) +from crewai_tools.tools.oxylabs_amazon_product_scraper_tool.oxylabs_amazon_product_scraper_tool import ( + OxylabsAmazonProductScraperConfig, +) +from crewai_tools.tools.oxylabs_google_search_scraper_tool.oxylabs_google_search_scraper_tool import ( + OxylabsGoogleSearchScraperConfig, +) + + +@pytest.fixture +def oxylabs_api() -> RealtimeClient: + oxylabs_api_mock = MagicMock() + + html_content = """ + + +
+ +Amazing product
+Price $14.99
+Good product
+Price $9.99
+