refactor(selenium): improve driver management and add headless mode (#268)

- Refactor Selenium scraping tool to use single driver instance
- Add headless mode configuration for Chrome
- Improve error handling with try/finally
- Simplify code structure and improve maintainability
This commit is contained in:
Lucas Gomide
2025-04-15 11:50:40 -03:00
committed by GitHub
parent a95be24865
commit 8cbdaeaff5
2 changed files with 39 additions and 29 deletions

View File

@@ -1,7 +1,8 @@
from unittest.mock import MagicMock, patch
import tempfile
import os
import tempfile
from unittest.mock import MagicMock, patch
import pytest
from bs4 import BeautifulSoup
from crewai_tools.tools.selenium_scraping_tool.selenium_scraping_tool import (
@@ -24,7 +25,7 @@ def mock_driver_with_html(html_content):
def initialize_tool_with(mock_driver):
tool = SeleniumScrapingTool()
tool.driver = MagicMock(return_value=mock_driver)
tool.driver = mock_driver
return tool
@@ -33,7 +34,7 @@ def initialize_tool_with(mock_driver):
def test_tool_initialization(mocked_chrome):
temp_dir = tempfile.mkdtemp()
mocked_chrome.return_value = MagicMock()
tool = SeleniumScrapingTool()
assert tool.website_url is None
@@ -41,7 +42,7 @@ def test_tool_initialization(mocked_chrome):
assert tool.cookie is None
assert tool.wait_time == 3
assert tool.return_html is False
try:
os.rmdir(temp_dir)
except:
@@ -102,3 +103,13 @@ def test_scrape_with_return_html_false(_mocked_chrome_driver):
mock_driver.get.assert_called_once_with("https://example.com")
mock_driver.find_element.assert_called_with("tag name", "body")
mock_driver.close.assert_called_once()
@patch("selenium.webdriver.Chrome")
def test_scrape_with_driver_error(_mocked_chrome_driver):
mock_driver = MagicMock()
mock_driver.find_element.side_effect = Exception("WebDriver error occurred")
tool = initialize_tool_with(mock_driver)
result = tool._run(website_url="https://example.com")
assert result == "Error scraping website: WebDriver error occurred"
mock_driver.close.assert_called_once()